├── allms
    ├── __init__.py
    ├── utils
    │   ├── __init__.py
    │   ├── logger_utils.py
    │   ├── io_utils.py
    │   ├── response_parsing_utils.py
    │   └── long_text_processing_utils.py
    ├── chains
    │   ├── __init__.py
    │   └── long_text_processing_chain.py
    ├── constants
    │   ├── __init__.py
    │   ├── prompt.py
    │   ├── vertex_ai.py
    │   ├── azure.py
    │   └── input_data.py
    ├── defaults
    │   ├── __init__.py
    │   ├── general_defaults.py
    │   ├── long_text_chain.py
    │   ├── azure_defaults.py
    │   └── vertex_ai.py
    ├── domain
    │   ├── __init__.py
    │   ├── input_data.py
    │   ├── prompt_dto.py
    │   ├── response.py
    │   ├── enumerables.py
    │   └── configuration.py
    ├── exceptions
    │   ├── __init__.py
    │   └── validation_input_data_exceptions.py
    └── models
    │   ├── azure_base.py
    │   ├── __init__.py
    │   ├── azure_openai.py
    │   ├── vertexai_palm.py
    │   ├── vertexai_gemma.py
    │   ├── azure_mistral.py
    │   ├── azure_llama2.py
    │   ├── vertexai_base.py
    │   ├── vertexai_gemini.py
    │   └── abstract.py
├── .gitignore
├── docs
    ├── assets
    │   └── images
    │   │   └── logo.png
    ├── faq.md
    ├── index.md
    ├── usage
    │   ├── basic.md
    │   ├── deploy_open_source_models.md
    │   ├── error_handling.md
    │   ├── forcing_response_format.md
    │   └── advanced.md
    ├── api
    │   ├── input_output_dataclasses.md
    │   └── models
    │   │   ├── azure_mistral_model.md
    │   │   ├── azure_llama2_model.md
    │   │   ├── vertexai_palm_model.md
    │   │   ├── vertexai_gemma.md
    │   │   ├── azure_openai_model.md
    │   │   └── vertexai_gemini_model.md
    └── installation_and_quick_start.md
├── resources
    └── images
    │   └── map_reduce_like_summary.png
├── .github
    ├── pull_request_template.md
    ├── dependabot.yaml
    └── workflows
    │   ├── release.yml
    │   ├── docs.yml
    │   └── build.yaml
├── tests
    ├── test_available_models_added_to_all.py
    ├── test_utf_characters_data.py
    ├── conftest.py
    ├── resources
    │   ├── test_input_data.csv
    │   └── test_end_to_end_expected_output.csv
    ├── test_output_parser.py
    ├── test_model_behavior_for_different_input_data.py
    └── test_end_to_end.py
├── Makefile
├── mkdocs.yml
├── pyproject.toml
├── README.md
├── LICENSE
└── examples
    └── introduction.ipynb


/allms/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/allms/utils/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/allms/chains/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/allms/constants/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/allms/defaults/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/allms/domain/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/allms/exceptions/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | .idea/
2 | __pycache__
3 | credentials
4 | .DS_Store


--------------------------------------------------------------------------------
/docs/assets/images/logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/allegro/allms/HEAD/docs/assets/images/logo.png


--------------------------------------------------------------------------------
/resources/images/map_reduce_like_summary.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/allegro/allms/HEAD/resources/images/map_reduce_like_summary.png


--------------------------------------------------------------------------------
/allms/defaults/general_defaults.py:
--------------------------------------------------------------------------------
1 | from dataclasses import dataclass
2 | 
3 | 
4 | @dataclass
5 | class GeneralDefaults:
6 |     MAX_RETRIES = 8
7 |     MAX_CONCURRENCY = 1000
8 | 


--------------------------------------------------------------------------------
/.github/pull_request_template.md:
--------------------------------------------------------------------------------
 1 | ### Feature Description
 2 | 
 3 | Few words on what the task is about...
 4 | 
 5 | #### Added
 6 | 
 7 | #### Changed
 8 | 
 9 | #### Fixed
10 | 
11 | #### Removed
12 | 


--------------------------------------------------------------------------------
/tests/test_available_models_added_to_all.py:
--------------------------------------------------------------------------------
1 | from allms import models
2 | 
3 | 
4 | class TestAvailableModelsAddedToAll:
5 | 
6 |     def test_available_models_added_to_all(self):
7 |         for model in models.get_available_models().values():
8 |             assert model.__name__ in models.__all__
9 | 


--------------------------------------------------------------------------------
/allms/constants/prompt.py:
--------------------------------------------------------------------------------
1 | class PromptConstants:
2 |     OUTPUT_DATA_MODEL = "output_data_model"
3 |     TEMPLATE_STR = "template"
4 |     INPUT_VARIABLES_STR = "input_variables"
5 |     PARTIAL_VARIABLES_STR = "partial_variables"
6 |     TEXT_STR = "text"
7 |     OUTPUT_DATA_MODEL_CLASS_SEPARATOR = "\n\n"
8 | 


--------------------------------------------------------------------------------
/allms/domain/input_data.py:
--------------------------------------------------------------------------------
 1 | import typing
 2 | 
 3 | from pydantic import BaseModel
 4 | 
 5 | 
 6 | class InputData(BaseModel):
 7 |     input_mappings: typing.Dict[str, str]
 8 |     id: str
 9 | 
10 |     def get_input_keys(self) -> typing.List[str]:
11 |         return list(self.input_mappings.keys())
12 | 


--------------------------------------------------------------------------------
/allms/constants/vertex_ai.py:
--------------------------------------------------------------------------------
1 | class VertexModelConstants:
2 |     RESPONSE_SEPARATOR = "<LLM_RESPONSE_SEPARATOR>"
3 |     RESPONSE_BLOCKED_STR = "<RESPONSE_BLOCKED>"
4 | 
5 |     GCP_PROJECT_ID_STR_NAME = "GCP_PROJECT_ID"
6 |     GCP_LLM_REGION_STR_NAME = "GCP_LLM_REGION"
7 |     GCP_MODEL_NAME_STR_NAME = "GCP_MODEL_NAME"
8 | 


--------------------------------------------------------------------------------
/allms/utils/logger_utils.py:
--------------------------------------------------------------------------------
1 | import logging
2 | import sys
3 | 
4 | 
5 | def setup_logger() -> None:
6 |     log_format = '[%(levelname)s] %(asctime)s %(filename)s (%(lineno)d)\t- %(message)s'
7 |     log_dateformat = '%Y-%m-%d %H:%M:%S'
8 |     logging.basicConfig(format=log_format, datefmt=log_dateformat, stream=sys.stdout, level=logging.INFO)
9 | 


--------------------------------------------------------------------------------
/.github/dependabot.yaml:
--------------------------------------------------------------------------------
 1 | # https://docs.github.com/en/free-pro-team@latest/github/administering-a-repository/keeping-your-dependencies-updated-automatically
 2 | version: 2
 3 | registries:
 4 |   updates:
 5 |     - package-ecosystem: "github-actions"
 6 |       directory: "/"
 7 |       schedule:
 8 |         interval: "weekly"
 9 |     - package-ecosystem: "pip"
10 |       directory: "/"
11 |       schedule:
12 |         interval: "weekly"


--------------------------------------------------------------------------------
/allms/domain/prompt_dto.py:
--------------------------------------------------------------------------------
 1 | from pydantic import BaseModel, Field
 2 | from typing import List, Union
 3 | 
 4 | 
 5 | class SummaryOutputClass(BaseModel):
 6 |     summary: str = Field(description="Summary of a product description")
 7 | 
 8 | 
 9 | class KeywordsOutputClass(BaseModel):
10 |     keywords: List[str] = Field(description="List of keywords")
11 | 
12 | 
13 | class AggregateOutputClass(BaseModel):
14 |     summaries: List[Union[SummaryOutputClass, KeywordsOutputClass]] = Field(description="List of aggregated outputs")
15 | 


--------------------------------------------------------------------------------
/allms/defaults/long_text_chain.py:
--------------------------------------------------------------------------------
 1 | class LongTextChainDefaults:
 2 |     OVERLAP_SIZE = 50
 3 |     AGGREGATOR_DEFAULT_STR_SEPARATOR = ", "
 4 | 
 5 |     # TODO Refactor along with adding support for LongDocument processing
 6 |     AGGREGATION_PROMPT = """You're an AI agent that combines product summaries. Write a summary of the provided summaries. Keep the most important information and discard
 7 |             redundant information. The input is a JSON object.
 8 | 
 9 |             {output_data_model}
10 | 
11 |             {text}
12 |         """
13 | 


--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
 1 | export TWINE_USERNAME?=__token__
 2 | export TWINE_PASSWORD?=your_token
 3 | 
 4 | install-poetry::
 5 | 	python3 -m pip install pip --upgrade
 6 | 	python3 -m pip install poetry==1.5.0
 7 | 
 8 | install-env::
 9 | 	poetry install --all-extras --ansi --no-root
10 | 
11 | build::
12 | 	poetry run python -m build --sdist --wheel .
13 | 
14 | linter::
15 | 	poetry run pylint allms --reports=no --output-format=colorized --fail-under=8.0
16 | 
17 | tests::
18 | 	poetry run python -m pytest -s --verbose
19 | 
20 | publish::
21 | 	poetry run python -m twine upload --verbose dist/*
22 | 
23 | docs::
24 | 	poetry run mkdocs build
25 | 


--------------------------------------------------------------------------------
/allms/constants/azure.py:
--------------------------------------------------------------------------------
 1 | class AzureOpenAIConstants:
 2 |     OPENAI_API_TYPE_STR_NAME: str = "OPENAI_API_TYPE"
 3 |     OPENAI_API_BASE_STR_NAME: str = "OPENAI_API_BASE"
 4 |     OPENAI_API_VERSION_STR_NAME: str = "OPENAI_API_VERSION"
 5 |     OPENAI_DEPLOYMENT_NAME_STR_NAME: str = "OPENAI_DEPLOYMENT_NAME"
 6 |     OPENAI_API_KEY_STR_NAME: str = "OPENAI_API_KEY"
 7 |     OPENAI_MODEL_NAME_STR_NAME: str = "OPENAI_MODEL_NAME"
 8 | 
 9 | 
10 | class AzureMLOnlineEndpointConstants:
11 |     AZURE_API_KEY_STR_NAME = "AZURE_API_KEY"
12 |     AZURE_ENDPOINT_URL_STR_NAME = "AZURE_ENDPOINT_URL"
13 |     AZURE_DEPLOYMENT_NAME_STR_NAME = "AZURE_DEPLOYMENT_NAME"
14 | 


--------------------------------------------------------------------------------
/allms/defaults/azure_defaults.py:
--------------------------------------------------------------------------------
 1 | from dataclasses import dataclass
 2 | 
 3 | 
 4 | @dataclass
 5 | class AzureGptTurboDefaults:
 6 |     OPENAI_API_TYPE: str = "azure"
 7 |     OPENAI_MODEL_NAME: str = "gpt-3.5-turbo"
 8 | 
 9 |     # These values were chosen based on the default values used by the LLM provider
10 |     MODEL_TOTAL_MAX_TOKENS: int = 4096
11 |     TEMPERATURE = 0.0
12 |     MAX_OUTPUT_TOKENS: int = 512
13 |     REQUEST_TIMEOUT_S = 60
14 | 
15 | 
16 | @dataclass
17 | class AzureLlama2Defaults:
18 |     MODEL_TOTAL_MAX_TOKENS: int = 4096
19 |     MAX_OUTPUT_TOKENS: int = 512
20 |     TEMPERATURE = 0.0
21 |     TOP_P = 1.0
22 | 
23 | 
24 | @dataclass
25 | class AzureMistralAIDefaults:
26 |     MODEL_TOTAL_MAX_TOKENS: int = 8192
27 |     MAX_OUTPUT_TOKENS: int = 1024
28 |     TEMPERATURE = 0.0
29 |     TOP_P = 1.0
30 | 


--------------------------------------------------------------------------------
/allms/defaults/vertex_ai.py:
--------------------------------------------------------------------------------
 1 | class PalmModelDefaults:
 2 |     # These values were chosen based on the default values used by the LLM provider
 3 |     GCP_MODEL_NAME = "text-bison@001"
 4 |     MODEL_TOTAL_MAX_TOKENS = 8192
 5 |     MAX_OUTPUT_TOKENS = 1024
 6 |     TEMPERATURE = 0.0
 7 |     TOP_P = 0.95
 8 |     TOP_K = 40
 9 |     VERBOSE = True
10 | 
11 | 
12 | class GeminiModelDefaults:
13 |     GCP_MODEL_NAME = "gemini-1.5-flash-001"
14 |     MODEL_TOTAL_MAX_TOKENS = 30720
15 |     MAX_OUTPUT_TOKENS = 2048
16 |     TEMPERATURE = 0.0
17 |     TOP_P = 0.95
18 |     TOP_K = 40
19 |     VERBOSE = True
20 | 
21 | 
22 | class GemmaModelDefaults:
23 |     GCP_MODEL_NAME = "gemma"
24 |     MODEL_TOTAL_MAX_TOKENS = 8192
25 |     MAX_OUTPUT_TOKENS = 1024
26 |     TEMPERATURE = 0.0
27 |     TOP_P = 0.95
28 |     TOP_K = 40
29 |     VERBOSE = True
30 | 


--------------------------------------------------------------------------------
/allms/domain/response.py:
--------------------------------------------------------------------------------
 1 | import typing
 2 | 
 3 | from pydantic import BaseModel
 4 | 
 5 | from allms.domain.input_data import InputData
 6 | 
 7 | 
 8 | class ResponseParsingOutput(BaseModel):
 9 |     response: typing.Optional[typing.Any]
10 |     error_message: typing.Optional[str]
11 | 
12 | 
13 | class ResponseData(BaseModel):
14 |     response: typing.Optional[typing.Any] = None
15 |     input_data: typing.Optional[InputData] = None
16 | 
17 |     number_of_prompt_tokens: typing.Optional[int] = None
18 |     number_of_generated_tokens: typing.Optional[int] = None
19 |     error: typing.Optional[str] = None
20 | 
21 |     # Without this, only classes inheriting from the pydantic BaseModel are allowed as field types. Exception isn't
22 |     # such a class and that's why we need it.
23 |     class Config:
24 |         arbitrary_types_allowed = True
25 | 


--------------------------------------------------------------------------------
/allms/domain/enumerables.py:
--------------------------------------------------------------------------------
 1 | from enum import Enum
 2 | from typing import List
 3 | 
 4 | 
 5 | class ListConvertableEnum(Enum):
 6 |     @classmethod
 7 |     def get_values(cls) -> List[str]:
 8 |         return list(map(lambda field: field.value, cls))
 9 | 
10 | 
11 | class AggregationLogicForLongInputData(str, ListConvertableEnum):
12 |     SIMPLE_CONCATENATION = "SIMPLE_CONCATENATION"
13 |     REDUCE_BY_LLM_PROMPTING = "REDUCE_BY_LLM_PROMPTING"
14 | 
15 | 
16 | class AvailableModels(str, ListConvertableEnum):
17 |     AZURE_OPENAI_MODEL = "azure_openai"
18 |     AZURE_LLAMA2_MODEL = "azure_llama2"
19 |     AZURE_MISTRAL_MODEL = "azure_mistral"
20 |     VERTEXAI_PALM2_MODEL = "vertexai_palm2"
21 |     VERTEXAI_GEMINI_MODEL = "vertexai_gemini"
22 |     VERTEXAI_GEMMA_MODEL = "vertexai_gemma"
23 | 
24 | 
25 | class LanguageModelTask(str, ListConvertableEnum):
26 |     SUMMARY = "SUMMARY"
27 |     KEYWORDS = "KEYWORDS"
28 | 


--------------------------------------------------------------------------------
/mkdocs.yml:
--------------------------------------------------------------------------------
 1 | site_name: allms
 2 | theme: readthedocs
 3 | nav:
 4 |   - Introduction: index.md
 5 |   - Installation & Quickstart: installation_and_quick_start.md
 6 |   - F.A.Q: faq.md
 7 |   - User guide:
 8 |       - Basic: usage/basic.md
 9 |       - Advanced: usage/advanced.md
10 |       - Forcing Structured Output Format: usage/forcing_response_format.md
11 |       - How to Deploy Open-source Models on Azure and GCP?: usage/deploy_open_source_models.md
12 |   - API:
13 |       - Domain: api/input_output_dataclasses.md
14 |       - Models:
15 |           - Azure Llama2: api/models/azure_llama2_model.md
16 |           - Azure Mistral: api/models/azure_mistral_model.md
17 |           - Azure OpenAI GPT: api/models/azure_openai_model.md
18 |           - VertexAI PaLM2: api/models/vertexai_palm_model.md
19 |           - VertexAI Gemini: api/models/vertexai_gemini_model.md
20 |           - VertexAI Gemma: api/models/vertexai_gemma.md


--------------------------------------------------------------------------------
/.github/workflows/release.yml:
--------------------------------------------------------------------------------
 1 | name: Release
 2 | on:
 3 |   release:
 4 |     types: [created]
 5 |   workflow_dispatch:
 6 | jobs:
 7 |   release:
 8 |     runs-on: ubuntu-latest
 9 |     steps:
10 |       - uses: actions/checkout@v2
11 |       - name: Set up Python 3.10.x
12 |         uses: actions/setup-python@v2
13 |         with:
14 |           python-version: "3.10"
15 |         env:
16 |           AGENT_TOOLSDIRECTORY: /opt/hostedtoolcache
17 |           RUNNER_TOOL_CACHE: /opt/hostedtoolcache
18 |       - run: python -m pip install build
19 |       - name: Install poetry
20 |         run: make install-poetry
21 |       - name: Install dependencies
22 |         run: make install-env
23 |       - name: Build allms package
24 |         run: make build
25 |       - name: Publish allms package to PyPI
26 |         env:
27 |           TWINE_PASSWORD: ${{ secrets.PYPI_API_TOKEN }}
28 |         run: |
29 |           make publish
30 |         
31 | 


--------------------------------------------------------------------------------
/tests/test_utf_characters_data.py:
--------------------------------------------------------------------------------
 1 | import html.entities
 2 | from unittest.mock import patch
 3 | 
 4 | import pytest
 5 | 
 6 | 
 7 | class TestModelBehaviorForSpecialCharacters:
 8 |     @patch("langchain.chains.base.Chain.arun")
 9 |     @patch("langchain_google_vertexai.llms.VertexAI.get_num_tokens")
10 |     @pytest.mark.parametrize("input_character", list(html.entities.entitydefs.values()))
11 |     def test_model_is_not_broken_by_special_characters(self, tokens_mock, arun_mock, input_character, models):
12 |         # GIVEN
13 |         arun_mock.return_value = f"{input_character}"
14 |         tokens_mock.return_value = 1
15 | 
16 |         # WHEN & THEN
17 |         for model in models.values():
18 |             response = model.generate(
19 |                 f"This is prompt with broken sign {input_character} and the model should work.")
20 |             assert response[0].error is None
21 |             assert response[0].response == input_character
22 | 


--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
 1 | [tool.poetry]
 2 | name = "allms"
 3 | version = "1.0.13"
 4 | description = ""
 5 | authors = ["Allegro Opensource <opensource@allegro.com>"]
 6 | readme = "README.md"
 7 | packages = [{include = "allms"}]
 8 | 
 9 | [tool.poetry.dependencies]
10 | python = ">=3.9.0,<4.0"
11 | fsspec = "^2025.3.0"
12 | google-cloud-aiplatform = "1.85.0"
13 | pydash = "^8.0.5"
14 | transformers = "^4.49.0"
15 | pydantic = "^2.10.6"
16 | langchain = "0.3.21"
17 | tiktoken = "^0.9.0"
18 | openai = "1.68.0"
19 | pytest-mock = "^3.14.0"
20 | respx = "^0.22.0"
21 | langchain-community = "^0.3.20"
22 | langchain-google-vertexai = "^2.0.15"
23 | sentencepiece = "^0.2.0"
24 | langchain-openai = "^0.3.9"
25 | 
26 | [tool.poetry.group.dev.dependencies]
27 | pytest = "^8.3.5"
28 | pylint = "^3.3.6"
29 | mkdocs = "^1.6.1"
30 | build = "^1.2.2.post1"
31 | twine = "^6.1.0"
32 | 
33 | [build-system]
34 | requires = ["poetry-core"]
35 | build-backend = "poetry.core.masonry.api"
36 | 


--------------------------------------------------------------------------------
/allms/constants/input_data.py:
--------------------------------------------------------------------------------
 1 | import typing
 2 | 
 3 | 
 4 | class IODataConstants:
 5 |     TEXT = "text"
 6 |     ID = "id"
 7 | 
 8 |     PROMPT_TOKENS_NUMBER = "number_of_prompt_tokens"
 9 |     GENERATED_TOKENS_NUMBER = "number_of_generated_tokens"
10 | 
11 |     RESPONSE_STR_NAME = "response"
12 | 
13 |     ERROR_MESSAGE_STR = "Response error"
14 |     VALUE_ERROR_MESSAGE = "Value Error has occurred"
15 |     INVALID_ARGUMENT_MESSAGE = "Invalid Argument Exception"
16 |     CONTENT_FILTER_MESSAGE = "Content Filter Message"
17 |     TIMEOUT_ERROR_MESSAGE = "Timeout Error"
18 | 
19 |     SUPPORTED_INPUT_DATA_FORMAT = "csv"
20 |     DEFAULT_ID = "DEFAULT_ID"
21 | 
22 |     @staticmethod
23 |     def get_columns_for_df_with_responses(input_keys: typing.List[str]) -> typing.List[str]:
24 |         return input_keys + [
25 |             IODataConstants.ID,
26 |             IODataConstants.RESPONSE_STR_NAME,
27 |             IODataConstants.PROMPT_TOKENS_NUMBER,
28 |             IODataConstants.GENERATED_TOKENS_NUMBER
29 |         ]
30 | 


--------------------------------------------------------------------------------
/allms/models/azure_base.py:
--------------------------------------------------------------------------------
 1 | import asyncio
 2 | import typing
 3 | from concurrent.futures import ThreadPoolExecutor
 4 | 
 5 | from langchain.callbacks.manager import AsyncCallbackManagerForLLMRun
 6 | from langchain_community.chat_models.azureml_endpoint import AzureMLChatOnlineEndpoint
 7 | 
 8 | 
 9 | class AzureMLOnlineEndpointAsync(AzureMLChatOnlineEndpoint):
10 | 
11 |     async def _acall(
12 |             self,
13 |             prompt: str,
14 |             stop: typing.Optional[typing.List[str]] = None,
15 |             run_manager: typing.Optional[AsyncCallbackManagerForLLMRun] = None,
16 |             **kwargs: typing.Any,
17 |     ) -> str:
18 |         # Under the hood, langchain uses urllib.request to query the Azure ML Endpoint. urllib.request is not compatible
19 |         # with asyncio, and that's why we had to implement the function this way
20 |         task_executor = ThreadPoolExecutor()
21 |         return await asyncio.wrap_future(
22 |             task_executor.submit(self._call, prompt, stop, run_manager, **kwargs)
23 |         )
24 | 


--------------------------------------------------------------------------------
/.github/workflows/docs.yml:
--------------------------------------------------------------------------------
 1 | name: Deploy docs
 2 | on:
 3 |   push:
 4 |     branches: ["main"]
 5 |     paths:
 6 |     - 'docs/**'
 7 |     - 'mkdocs.yml'
 8 |     - 'Pipfile'
 9 |   workflow_dispatch:
10 | permissions:
11 |   contents: read
12 |   pages: write
13 |   id-token: write
14 | concurrency:
15 |   group: "pages"
16 |   cancel-in-progress: false
17 | jobs:
18 |   deploy:
19 |     environment:
20 |       name: github-pages
21 |       url: ${{ steps.deployment.outputs.page_url }}
22 |     runs-on: ubuntu-latest
23 |     steps:
24 |       - uses: actions/checkout@v4
25 |       - uses: actions/configure-pages@v4
26 |       - uses: actions/setup-python@v4
27 |         with:
28 |           python-version: '3.10'
29 |       - run: python -m pip install build
30 |       - run: make install-poetry
31 |       - run: make install-env
32 |       - run: make docs
33 |       - uses: actions/upload-pages-artifact@v3
34 |         with:
35 |           path: 'site'
36 |       - name: Deploy to GitHub Pages
37 |         id: deployment
38 |         uses: actions/deploy-pages@v4
39 |         
40 | 


--------------------------------------------------------------------------------
/docs/faq.md:
--------------------------------------------------------------------------------
 1 | 
 2 | # Frequently Asked Questions
 3 | 
 4 | ### 1. How to use the allms in a python notebook?
 5 | When using the `allms` library, which utilizes asynchronous programming under the hood, you must install the `nest-asyncio` library to use it in a Jupyter notebook environment.
 6 | 
 7 | To ensure proper functionality, execute the following code at the beginning of your notebook:
 8 | ```jupyterpython
 9 | !pip install nest-asyncio
10 | import nest_asyncio
11 | nest_asyncio.apply()
12 | ```
13 | 
14 | 
15 | 
16 | ### 2. How can I estimate the cost of my queries?
17 | 
18 | The model provides information for each record about the count of tokens in the prompt and the count of generated tokens.
19 | In most cases, pricing for Language Models (LLMs) is determined based on the total number of tokens processed, which encompasses both prompt tokens and generated tokens. It is essential to familiarize yourself with the pricing details offered by your service provider to understand the associated costs. An example pricing for AzureOpenAI can be found [here](https://azure.microsoft.com/en-us/pricing/details/cognitive-services/openai-service/).
20 | 
21 | 
22 | 
23 | 
24 | 


--------------------------------------------------------------------------------
/.github/workflows/build.yaml:
--------------------------------------------------------------------------------
 1 | name: Build
 2 | on:
 3 |   pull_request:
 4 |   push:
 5 |     branches:
 6 |       - main
 7 | permissions:
 8 |   contents: write
 9 |   pull-requests: write
10 | jobs:
11 |   build:
12 |     runs-on: ubuntu-latest
13 |     steps:
14 |       - name: Checkout
15 |         uses: actions/checkout@v3
16 |       - name: Setup node
17 |         uses: actions/setup-python@v3
18 |         with:
19 |           python-version: "3.10.10"
20 |       - name: Install dependencies
21 |         run: | 
22 |           make install-poetry
23 |           make install-env
24 |       - name: Tests
25 |         run: |
26 |           make tests
27 |   dependabot:
28 |     runs-on: ubuntu-latest
29 |     if: ${{ github.event_name == 'pull_request' && github.actor == 'dependabot[bot]' }}
30 |     needs: build
31 |     steps:
32 |       - name: Dependabot metadata
33 |         id: metadata
34 |         uses: dependabot/fetch-metadata@v1.6.0
35 |         with:
36 |           github-token: "${{ secrets.GITHUB_TOKEN }}"
37 |       - name: Enable auto-merge for Dependabot PRs
38 |         run: gh pr merge --auto --merge "$PR_URL"
39 |         env:
40 |           PR_URL: ${{github.event.pull_request.html_url}}
41 |           GITHUB_TOKEN: ${{secrets.GITHUB_TOKEN}}
42 | 


--------------------------------------------------------------------------------
/allms/models/__init__.py:
--------------------------------------------------------------------------------
 1 | from typing import Dict, Type
 2 | 
 3 | from allms.domain.configuration import HarmBlockThreshold, HarmCategory
 4 | from allms.domain.enumerables import AvailableModels
 5 | from allms.models.abstract import AbstractModel
 6 | from allms.models.azure_llama2 import AzureLlama2Model
 7 | from allms.models.azure_mistral import AzureMistralModel
 8 | from allms.models.azure_openai import AzureOpenAIModel
 9 | from allms.models.vertexai_gemini import VertexAIGeminiModel
10 | from allms.models.vertexai_palm import VertexAIPalmModel
11 | from allms.models.vertexai_gemma import VertexAIGemmaModel
12 | 
13 | __all__ = [
14 |     "AzureOpenAIModel",
15 |     "AzureLlama2Model",
16 |     "AzureMistralModel",
17 |     "VertexAIPalmModel",
18 |     "VertexAIGeminiModel",
19 |     "VertexAIGemmaModel",
20 |     "HarmCategory",
21 |     "HarmBlockThreshold",
22 |     "get_available_models"
23 | ]
24 | 
25 | 
26 | def get_available_models() -> Dict[str, Type[AbstractModel]]:
27 |     return {
28 |         AvailableModels.AZURE_OPENAI_MODEL: AzureOpenAIModel,
29 |         AvailableModels.AZURE_LLAMA2_MODEL: AzureLlama2Model,
30 |         AvailableModels.AZURE_MISTRAL_MODEL: AzureMistralModel,
31 |         AvailableModels.VERTEXAI_PALM2_MODEL: VertexAIPalmModel,
32 |         AvailableModels.VERTEXAI_GEMINI_MODEL: VertexAIGeminiModel,
33 |         AvailableModels.VERTEXAI_GEMMA_MODEL: VertexAIGemmaModel,
34 |     }
35 | 
36 | 


--------------------------------------------------------------------------------
/allms/utils/io_utils.py:
--------------------------------------------------------------------------------
 1 | import csv
 2 | import logging
 3 | from pathlib import Path
 4 | from typing import Any, Dict, List, Optional, Union, OrderedDict
 5 | 
 6 | import fsspec
 7 | 
 8 | from allms.constants.input_data import IODataConstants
 9 | from allms.domain.input_data import InputData
10 | 
11 | logger = logging.getLogger(__name__)
12 | 
13 | 
14 | def load_csv(
15 |         path: str,
16 |         limit: Optional[int] = None
17 | ) -> List[OrderedDict[Any, Any]]:
18 |     logger.info(f"Loading test data from {path}")
19 |     with open(path, mode='r') as csv_file:
20 |         csv_reader = csv.DictReader(csv_file)
21 |         data = list(csv_reader)
22 |         return data[:limit] if limit else data
23 | 
24 | 
25 | def load_csv_to_input_data(path: str, limit: Optional[int] = None) -> List[InputData]:
26 |     csv_data = load_csv(path, limit=limit)
27 |     return list(
28 |         map(
29 |             lambda row: InputData(input_mappings=drop_dict_key(row, IODataConstants.ID),
30 |                                   id=str(row[IODataConstants.ID])),
31 |             csv_data
32 |         )
33 |     )
34 | 
35 | 
36 | def drop_dict_key(dictionary: Dict[Any, Any], key: Any) -> Dict[Any, Any]:
37 |     dict_copy = dictionary.copy()
38 |     dict_copy.pop(key)
39 |     return dict_copy
40 | 
41 | 
42 | def load_credentials(path: Union[str, Path]) -> str:
43 |     with fsspec.open(path, "r") as credentials_file:
44 |         return credentials_file.readline()
45 | 


--------------------------------------------------------------------------------
/docs/index.md:
--------------------------------------------------------------------------------
 1 | <p align="center">
 2 | <img src="assets/images/logo.png" alt="aLLMs Logo"/>
 3 | </p>
 4 | 
 5 | # Introduction
 6 | 
 7 | `allms` is a versatile and powerful library designed to streamline the process of querying large language models, offering a user-friendly experience.  The `allms` module is designed to simplify interactions with the underlying model by providing the following features:
 8 | 
 9 | - **Simple and User-Friendly Interface**: The module offers an intuitive and easy-to-use interface, making it straightforward to work with the model.
10 | 
11 | - **Asynchronous Querying (Default)**: Requests to the model are processed asynchronously by default, ensuring efficient and non-blocking interactions.
12 | 
13 | - **Automatic Retrying Mechanism**: The module includes an automatic retrying mechanism, which helps handle transient errors and ensures that queries to the model are robust.
14 | 
15 | - **Error Handling and Management**: Errors that may occur during interactions with the model are handled and managed gracefully, providing informative error messages and potential recovery options.
16 | 
17 | - **Simple Output Parsing**: The module simplifies the process of parsing and working with the model's output, allowing you to easily extract the information you need.
18 | 
19 | 
20 | 
21 | ### Supported Models
22 | 
23 | Currently, the library supports:
24 | 
25 | * OpenAI models hosted on Microsoft Azure (`gpt-3.5-turbo`, `gpt4`, `gpt4-turbo`);
26 | * Google Cloud Platform VertexAI models (`PaLM2`, `Gemini`);
27 | * Open-source models `Llama2` and `Mistral` self-deployed on Azure and `Gemma` self-deployed on GCP
28 | 
29 | 


--------------------------------------------------------------------------------
/allms/exceptions/validation_input_data_exceptions.py:
--------------------------------------------------------------------------------
 1 | def get_missing_input_data_in_prompt_message(example_id: str) -> str:
 2 |     return f"Missing input_keys in the prompt. Error occurred for id={example_id}"
 3 | 
 4 | 
 5 | def get_missing_input_data_in_input_data_message(example_id: str) -> str:
 6 |     return f"Missing input_keys in the input data. Error occurred for id={example_id}"
 7 | 
 8 | 
 9 | def get_different_number_of_inputs_message(example_id: str) -> str:
10 |     return (f"Number of input keys in input_data and prompt are different."
11 |             f"If your intention is to instruct the model to output a JSON, make sure you are using double curly brackets."
12 |             f" Please make sure the input_keys are consistent."
13 |             f" Error has occurred for id={example_id}")
14 | 
15 | 
16 | def get_different_input_keys_message(example_id: str) -> str:
17 |     return (f"Input variables in the prompt and in the input_data are different. Please make sure"
18 |             f"the input_keys are consistent. "
19 |             f"If your intention is to instruct the model to output a JSON, make sure you are using double curly brackets."
20 |             f"Error has occurred for id={example_id}")
21 | 
22 | 
23 | def get_prompt_contains_input_key_when_missing_input_data() -> str:
24 |     return f"When no input_data is provided prompt cannot contain any input_key."
25 | 
26 | 
27 | def get_system_prompt_contains_input_variables() -> str:
28 |     return "System prompt cannot contain any input variables. Please fix your system message and try again."
29 | 
30 | 
31 | def get_system_prompt_is_not_supported_by_model() -> str:
32 |     return "Mistral-based models don't support `system_prompt` parameter."
33 | 


--------------------------------------------------------------------------------
/docs/usage/basic.md:
--------------------------------------------------------------------------------
 1 | # Basic Usage
 2 | 
 3 | ## Single Query 
 4 | 
 5 | In the simplest approach you just need to pass a prompt and the model will provide a response for it.
 6 | 
 7 | ```python
 8 | from allms.models import AzureOpenAIModel
 9 | from allms.domain.configuration import AzureOpenAIConfiguration
10 | from allms.domain.response import ResponseData
11 | 
12 | configuration = AzureOpenAIConfiguration(
13 |     api_key="<OPENAI_API_KEY>",
14 |     base_url="<OPENAI_API_BASE>",
15 |     api_version="<OPENAI_API_VERSION>",
16 |     deployment="<OPENAI_API_DEPLOYMENT_NAME>",
17 |     model_name="<OPENAI_API_MODEL_NAME>"
18 | )
19 | 
20 | model = AzureOpenAIModel(config=configuration)
21 | 
22 | response = model.generate("What is the capital of Poland?")
23 | print(response)
24 | 
25 | # [ResponseData(response='The capital of Poland is Warsaw.', input_data=None, number_of_prompt_tokens=7, number_of_generated_tokens=7, error=None)]
26 | ```
27 | 
28 | As a response you'll get `List[ResponseData]`, where the first element will contain response from the model in the
29 | `ResponseData.response` field and also information about number of prompt and generated tokens. If any error occurred
30 | also `ResponseData.error` field will be filled with the actual exception.
31 | 
32 | ## Single Query with System Prompt
33 | 
34 | A System Prompt can be passed along with a standard prompt. Please note that adding a system prompt will increase the 
35 | prompt token count for your query, increasing costs and latency.
36 | 
37 | ```python
38 | response = model.generate(
39 |     system_prompt="You are an AI agent answering questions like a student during an exam. Answer the question in Polish.",
40 |     prompt="What is the capital of Poland?"
41 | )
42 | print(response)
43 | # Stolica Polski to Warszawa.
44 | ```
45 | 
46 | 


--------------------------------------------------------------------------------
/docs/api/input_output_dataclasses.md:
--------------------------------------------------------------------------------
 1 | ## `class allms.domain.input_data.InputData` dataclass
 2 | ```python
 3 | @dataclass
 4 | class InputData:
 5 |     input_mappings: Dict[str, str]
 6 |     id: str
 7 | ```
 8 | #### Fields
 9 | - `input_mappings` (`Dict[str, str]`): Contains mapping from symbolic variables used in the prompt to the actual data
10 |   that will be injected in place of these variables. You have to provide a map for each of symbolic variable used
11 |   in the prompt. 
12 | - `id` (`str`): Unique identifier. Requests are done in an async mode, so the order of the responses is not the same
13 |    as the order of the input data, so this field can be used to identify them.
14 | 
15 | ## `class allms.domain.response.ResponseData` dataclass
16 | ```python
17 | @dataclass
18 | class ResponseData:
19 |     response: Union[str, BaseModel]
20 |     input_data: Optional[InputData] = None
21 | 
22 |     number_of_prompt_tokens: Optional[int] = None
23 |     number_of_generated_tokens: Optional[int] = None
24 |     error: Optional[str] = None
25 | ```
26 | #### Fields
27 | - `response` (`Union[str, BaseModel]`): Contains response of the model. If `output_data_model_class` param was provided
28 |   to the `generate()` method, it'll contain response parsed to the provided class. If `output_data_model_class` wasn't
29 |   provided, it'll contain raw string returned from the model. 
30 | - `input_data` (`Optional[InputData]`): If `input_data` was provided to the `generate()` method, it'll copy-paste that
31 |   data to this field.
32 | - `number_of_prompt_tokens` (`int`): Number of tokens used in the prompt.
33 | - `number_of_generated_tokens` (`str`): Number of tokens generated by the model. 
34 | - `error` (`str`): If any error that prevented from completing the generation pipeline fully occurred, it'll be listed
35 |    here.
36 | 
37 | 


--------------------------------------------------------------------------------
/allms/domain/configuration.py:
--------------------------------------------------------------------------------
 1 | from dataclasses import dataclass, field
 2 | from typing import Dict, Optional, Sequence
 3 | 
 4 | import google.oauth2.credentials
 5 | from langchain_google_vertexai import HarmBlockThreshold, HarmCategory
 6 | 
 7 | from allms.defaults.vertex_ai import GeminiModelDefaults, PalmModelDefaults
 8 | 
 9 | 
10 | @dataclass
11 | class AzureOpenAIConfiguration:
12 |     base_url: str
13 |     deployment: str
14 |     model_name: str
15 |     api_version: str
16 |     api_key: Optional[str] = None
17 |     azure_ad_token: Optional[str] = None
18 | 
19 | 
20 | @dataclass
21 | class AzureSelfDeployedConfiguration:
22 |     api_key: str
23 |     deployment: str
24 |     endpoint_url: str
25 | 
26 | 
27 | @dataclass
28 | class VertexAIConfiguration:
29 |     cloud_project: str
30 |     cloud_location: str
31 |     palm_model_name: Optional[str] = PalmModelDefaults.GCP_MODEL_NAME
32 |     gemini_model_name: Optional[str] = GeminiModelDefaults.GCP_MODEL_NAME
33 |     gemini_safety_settings: Optional[Dict[HarmCategory, HarmBlockThreshold]] = None
34 |     api_endpoint: Optional[str] = None
35 |     endpoint_version: Optional[str] = "v1beta1" # the same as in _VertexAIBase
36 |     api_transport: Optional[str] = None
37 |     extra_headers: Optional[Sequence[tuple[str, str]]] = field(default_factory=list)
38 |     credentials: Optional[google.oauth2.credentials.Credentials] = None
39 | 
40 | 
41 | class VertexAIModelGardenConfiguration(VertexAIConfiguration):
42 |     def __init__(
43 |         self,
44 |         cloud_project: str,
45 |         cloud_location: str,
46 |         endpoint_id: str
47 |     ):
48 |         super().__init__(
49 |             cloud_project=cloud_project,
50 |             cloud_location=cloud_location,
51 |             palm_model_name=None,
52 |             gemini_model_name=None
53 |         )
54 |         self.endpoint_id = endpoint_id
55 | 


--------------------------------------------------------------------------------
/docs/usage/deploy_open_source_models.md:
--------------------------------------------------------------------------------
 1 | # How to Deploy Open-source LLMs on Azure and GCP?
 2 | 
 3 | ## Azure
 4 | 
 5 | To use Open-source models like Llama or Mistral with allms, first you have to deploy it on your own on Azure as a ML Online Endpoint. 
 6 | Here's how to do it:
 7 | 
 8 | 1. Go to [ml.azure.com](https://ml.azure.com/) and use a subscription with a workspace that has access to the
 9 |    `Model catalog`.
10 | 2. On the left click `Model catalog`, then under `Introducing Llama 2` click `View models`.
11 | 3. Click the model you want to deploy.
12 | 4. Click `Deploy -> Real-time endpoint`.
13 | 5. Select `Skip Azure AI Content Safety` and click `Proceed`.
14 | 6. Select a virtual machine and click `Deploy`. You must have sufficient quota to deploy the models. 
15 | 7. In the menu on the left, click `Endpoints` and select the endpoint you've just created.
16 | 8. After the deployment is complete, you'll see `Consume` tab where the endpoint URL and authentication key will be
17 |    provided.
18 | 9. Now you can start using the model by configuring it as in the example below:
19 | 
20 | ```python
21 | from allms.models import AzureLlama2Model
22 | from allms.domain.configuration import AzureSelfDeployedConfiguration
23 | 
24 | configuration = AzureSelfDeployedConfiguration(
25 |     api_key="<AZURE_API_KEY>",
26 |     endpoint_url="<AZURE_ENDPOINT_URL>",
27 |     deployment="<AZURE_DEPLOYMENT_NAME>"
28 | )
29 | 
30 | llama_model = AzureLlama2Model(config=configuration)
31 | llama_response = llama_model.generate("2+2 is?")
32 | ```
33 | 
34 | 
35 | In case of any problems with deployment, you can review this guide on the Azure blog: 
36 | [Introducing Llama 2 on Azure](https://techcommunity.microsoft.com/t5/ai-machine-learning-blog/introducing-llama-2-on-azure/ba-p/3881233)
37 | 
38 | ## GCP
39 | [Follow the following guide](https://cloud.google.com/vertex-ai/docs/start/explore-models#deploy-a-model) to deploy a model on the GCP VertexAI Model Garden. 


--------------------------------------------------------------------------------
/allms/models/azure_openai.py:
--------------------------------------------------------------------------------
 1 | from asyncio import AbstractEventLoop
 2 | from typing import Optional
 3 | 
 4 | from langchain_openai import AzureChatOpenAI
 5 | 
 6 | from allms.defaults.azure_defaults import AzureGptTurboDefaults
 7 | from allms.defaults.general_defaults import GeneralDefaults
 8 | from allms.domain.configuration import AzureOpenAIConfiguration
 9 | from allms.models.abstract import AbstractModel
10 | 
11 | 
12 | class AzureOpenAIModel(AbstractModel):
13 |     def __init__(
14 |             self,
15 |             config: AzureOpenAIConfiguration,
16 |             temperature: float = AzureGptTurboDefaults.TEMPERATURE,
17 |             max_output_tokens: int = AzureGptTurboDefaults.MAX_OUTPUT_TOKENS,
18 |             request_timeout_s: int = AzureGptTurboDefaults.REQUEST_TIMEOUT_S,
19 |             model_total_max_tokens: int = AzureGptTurboDefaults.MODEL_TOTAL_MAX_TOKENS,
20 |             max_concurrency: int = GeneralDefaults.MAX_CONCURRENCY,
21 |             max_retries: int = GeneralDefaults.MAX_RETRIES,
22 |             event_loop: Optional[AbstractEventLoop] = None
23 |     ) -> None:
24 |         self._request_timeout_s = request_timeout_s
25 |         self._config = config
26 | 
27 |         super().__init__(
28 |             temperature=temperature,
29 |             model_total_max_tokens=model_total_max_tokens,
30 |             max_output_tokens=max_output_tokens,
31 |             max_concurrency=max_concurrency,
32 |             max_retries=max_retries,
33 |             event_loop=event_loop
34 |         )
35 | 
36 |     def _create_llm(self) -> AzureChatOpenAI:
37 |         return AzureChatOpenAI(
38 |             deployment_name=self._config.deployment,
39 |             api_version=self._config.api_version,
40 |             model_name=self._config.model_name,
41 |             azure_endpoint=self._config.base_url,
42 |             api_key=self._config.api_key,
43 |             azure_ad_token=self._config.azure_ad_token,
44 |             temperature=self._temperature,
45 |             max_tokens=self._max_output_tokens,
46 |             request_timeout=self._request_timeout_s
47 |         )
48 | 


--------------------------------------------------------------------------------
/allms/models/vertexai_palm.py:
--------------------------------------------------------------------------------
 1 | from asyncio import AbstractEventLoop
 2 | from langchain_google_vertexai import VertexAI
 3 | from typing import Optional
 4 | 
 5 | from allms.defaults.general_defaults import GeneralDefaults
 6 | from allms.defaults.vertex_ai import PalmModelDefaults
 7 | from allms.domain.configuration import VertexAIConfiguration
 8 | from allms.models.vertexai_base import CustomVertexAI
 9 | from allms.models.abstract import AbstractModel
10 | 
11 | 
12 | class VertexAIPalmModel(AbstractModel):
13 |     def __init__(
14 |             self,
15 |             config: VertexAIConfiguration,
16 |             temperature: float = PalmModelDefaults.TEMPERATURE,
17 |             top_k: int = PalmModelDefaults.TOP_K,
18 |             top_p: float = PalmModelDefaults.TOP_P,
19 |             max_output_tokens: int = PalmModelDefaults.MAX_OUTPUT_TOKENS,
20 |             model_total_max_tokens: int = PalmModelDefaults.MODEL_TOTAL_MAX_TOKENS,
21 |             max_concurrency: int = GeneralDefaults.MAX_CONCURRENCY,
22 |             max_retries: int = GeneralDefaults.MAX_RETRIES,
23 |             verbose: bool = PalmModelDefaults.VERBOSE,
24 |             event_loop: Optional[AbstractEventLoop] = None
25 |     ) -> None:
26 |         self._top_p = top_p
27 |         self._top_k = top_k
28 |         self._verbose = verbose
29 |         self._config = config
30 | 
31 |         super().__init__(
32 |             temperature=temperature,
33 |             model_total_max_tokens=model_total_max_tokens,
34 |             max_output_tokens=max_output_tokens,
35 |             max_concurrency=max_concurrency,
36 |             max_retries=max_retries,
37 |             event_loop=event_loop
38 |         )
39 | 
40 |     def _create_llm(self) -> VertexAI:
41 |         return CustomVertexAI(
42 |             model_name=self._config.palm_model_name,
43 |             max_output_tokens=self._max_output_tokens,
44 |             temperature=self._temperature,
45 |             top_p=self._top_p,
46 |             top_k=self._top_k,
47 |             verbose=self._verbose,
48 |             project=self._config.cloud_project,
49 |             location=self._config.cloud_location
50 |         )


--------------------------------------------------------------------------------
/allms/models/vertexai_gemma.py:
--------------------------------------------------------------------------------
 1 | from asyncio import AbstractEventLoop
 2 | 
 3 | from langchain_google_vertexai import VertexAIModelGarden
 4 | from typing import Optional
 5 | 
 6 | from allms.defaults.general_defaults import GeneralDefaults
 7 | from allms.defaults.vertex_ai import GemmaModelDefaults
 8 | from allms.domain.configuration import VertexAIModelGardenConfiguration
 9 | from allms.models.vertexai_base import VertexAIModelGardenWrapper
10 | from allms.models.abstract import AbstractModel
11 | 
12 | 
13 | class VertexAIGemmaModel(AbstractModel):
14 |     def __init__(
15 |             self,
16 |             config: VertexAIModelGardenConfiguration,
17 |             temperature: float = GemmaModelDefaults.TEMPERATURE,
18 |             top_k: int = GemmaModelDefaults.TOP_K,
19 |             top_p: float = GemmaModelDefaults.TOP_P,
20 |             max_output_tokens: int = GemmaModelDefaults.MAX_OUTPUT_TOKENS,
21 |             model_total_max_tokens: int = GemmaModelDefaults.MODEL_TOTAL_MAX_TOKENS,
22 |             max_concurrency: int = GeneralDefaults.MAX_CONCURRENCY,
23 |             max_retries: int = GeneralDefaults.MAX_RETRIES,
24 |             verbose: bool = GemmaModelDefaults.VERBOSE,
25 |             event_loop: Optional[AbstractEventLoop] = None
26 |     ) -> None:
27 |         self._top_p = top_p
28 |         self._top_k = top_k
29 |         self._verbose = verbose
30 |         self._config = config
31 | 
32 |         super().__init__(
33 |             temperature=temperature,
34 |             model_total_max_tokens=model_total_max_tokens,
35 |             max_output_tokens=max_output_tokens,
36 |             max_concurrency=max_concurrency,
37 |             max_retries=max_retries,
38 |             event_loop=event_loop
39 |         )
40 | 
41 |         self._is_json_format_injected_into_prompt = False
42 | 
43 |     def _create_llm(self) -> VertexAIModelGarden:
44 |         return VertexAIModelGardenWrapper(
45 |             model_name=GemmaModelDefaults.GCP_MODEL_NAME,
46 |             max_tokens=self._max_output_tokens,
47 |             temperature=self._temperature,
48 |             top_p=self._top_p,
49 |             top_k=self._top_k,
50 |             verbose=self._verbose,
51 |             project=self._config.cloud_project,
52 |             location=self._config.cloud_location,
53 |             endpoint_id=self._config.endpoint_id
54 |         )
55 | 


--------------------------------------------------------------------------------
/allms/models/azure_mistral.py:
--------------------------------------------------------------------------------
 1 | import typing
 2 | from asyncio import AbstractEventLoop
 3 | 
 4 | from langchain_community.chat_models.azureml_endpoint import LlamaChatContentFormatter
 5 | 
 6 | from allms.defaults.azure_defaults import AzureMistralAIDefaults
 7 | from allms.defaults.general_defaults import GeneralDefaults
 8 | from allms.domain.configuration import AzureSelfDeployedConfiguration
 9 | from allms.models.abstract import AbstractModel
10 | from allms.models.azure_base import AzureMLOnlineEndpointAsync
11 | 
12 | 
13 | class AzureMistralModel(AbstractModel):
14 | 
15 |     def __init__(
16 |             self,
17 |             config: AzureSelfDeployedConfiguration,
18 |             temperature: float = AzureMistralAIDefaults.TEMPERATURE,
19 |             top_p: float = AzureMistralAIDefaults.TOP_P,
20 |             max_output_tokens: int = AzureMistralAIDefaults.MAX_OUTPUT_TOKENS,
21 |             model_total_max_tokens: int = AzureMistralAIDefaults.MODEL_TOTAL_MAX_TOKENS,
22 |             max_concurrency: int = GeneralDefaults.MAX_CONCURRENCY,
23 |             max_retries: int = GeneralDefaults.MAX_RETRIES,
24 |             event_loop: typing.Optional[AbstractEventLoop] = None
25 |     ) -> None:
26 |         self._top_p = top_p
27 |         self._config = config
28 | 
29 |         super().__init__(
30 |             temperature=temperature,
31 |             model_total_max_tokens=model_total_max_tokens,
32 |             max_output_tokens=max_output_tokens,
33 |             max_concurrency=max_concurrency,
34 |             max_retries=max_retries,
35 |             event_loop=event_loop
36 |         )
37 | 
38 |         self._is_json_format_injected_into_prompt = False
39 | 
40 |     def _create_llm(self) -> AzureMLOnlineEndpointAsync:
41 |         model_kwargs = {
42 |             "max_new_tokens": self._max_output_tokens, "top_p": self._top_p, "do_sample": False,
43 |             "return_full_text": False
44 |         }
45 |         if self._temperature > 0:
46 |             model_kwargs["temperature"] = self._temperature
47 |             model_kwargs["do_sample"] = True
48 | 
49 |         return AzureMLOnlineEndpointAsync(
50 |             endpoint_api_key=self._config.api_key,
51 |             endpoint_url=self._config.endpoint_url,
52 |             model_kwargs=model_kwargs,
53 |             content_formatter=LlamaChatContentFormatter(),
54 |             deployment_name=self._config.deployment
55 |         )
56 | 


--------------------------------------------------------------------------------
/allms/models/azure_llama2.py:
--------------------------------------------------------------------------------
 1 | import typing
 2 | from asyncio import AbstractEventLoop
 3 | from typing import List, Type
 4 | 
 5 | from langchain_community.chat_models.azureml_endpoint import LlamaChatContentFormatter
 6 | from pydantic import BaseModel
 7 | 
 8 | from allms.defaults.azure_defaults import AzureLlama2Defaults
 9 | from allms.defaults.general_defaults import GeneralDefaults
10 | from allms.domain.configuration import AzureSelfDeployedConfiguration
11 | from allms.domain.input_data import InputData
12 | from allms.domain.response import ResponseData
13 | from allms.models.abstract import AbstractModel
14 | from allms.models.azure_base import AzureMLOnlineEndpointAsync
15 | 
16 | 
17 | class AzureLlama2Model(AbstractModel):
18 | 
19 |     def __init__(
20 |             self,
21 |             config: AzureSelfDeployedConfiguration,
22 |             temperature: float = AzureLlama2Defaults.TEMPERATURE,
23 |             top_p: float = AzureLlama2Defaults.TOP_P,
24 |             max_output_tokens: int = AzureLlama2Defaults.MAX_OUTPUT_TOKENS,
25 |             model_total_max_tokens: int = AzureLlama2Defaults.MODEL_TOTAL_MAX_TOKENS,
26 |             max_concurrency: int = GeneralDefaults.MAX_CONCURRENCY,
27 |             max_retries: int = GeneralDefaults.MAX_RETRIES,
28 |             event_loop: typing.Optional[AbstractEventLoop] = None
29 |     ) -> None:
30 |         self._top_p = top_p
31 |         self._config = config
32 | 
33 |         super().__init__(
34 |             temperature=temperature,
35 |             model_total_max_tokens=model_total_max_tokens,
36 |             max_output_tokens=max_output_tokens,
37 |             max_concurrency=max_concurrency,
38 |             max_retries=max_retries,
39 |             event_loop=event_loop
40 |         )
41 | 
42 |         self._is_json_format_injected_into_prompt = False
43 | 
44 |     def _create_llm(self) -> AzureMLOnlineEndpointAsync:
45 |         model_kwargs = {"max_new_tokens": self._max_output_tokens, "top_p": self._top_p, "do_sample": False}
46 |         if self._temperature > 0:
47 |             model_kwargs["temperature"] = self._temperature
48 |             model_kwargs["do_sample"] = True
49 | 
50 |         return AzureMLOnlineEndpointAsync(
51 |             endpoint_api_key=self._config.api_key,
52 |             endpoint_url=self._config.endpoint_url,
53 |             model_kwargs=model_kwargs,
54 |             content_formatter=LlamaChatContentFormatter(),
55 |             deployment_name=self._config.deployment
56 |         )
57 | 


--------------------------------------------------------------------------------
/docs/usage/error_handling.md:
--------------------------------------------------------------------------------
 1 | # Error Handling
 2 | 
 3 | ## Too long prompt
 4 | Each LLM has its own context size defined. This is the maximum number of input plus output tokens that the model is able
 5 | to consume. `allms` before sending the request to the model automatically checks if your input data will fit into
 6 | the model's context size and if not it'll either:
 7 | - raise `ValueError` saying that your prompt is too long if the prompt alone has already more tokens than the allowed
 8 |   maximum context size of the model
 9 | - log warning saying that number of prompt tokens plus generated tokens may exceed the max allowed number of tokens of 
10 |   the model if the number of tokens in the prompt plus the `max_output_tokens` you set for the model is longer than the 
11 |   allowed maximum context size of the model
12 | 
13 | In the first case, the only solution is to truncate the input data, so that it'll fit into the context size of the
14 | model.
15 | 
16 | The second case is just a warning, because the model will be able to start the generation, but it may fail randomly
17 | if the number of generated tokens will be long enough to exceed the model maximum context size. In this case you have
18 | two options. You can either truncate the input data or lower the `max_output_tokens` so that they added together won't 
19 | exceed the max context size.
20 | 
21 | In the future releases, we plan to add automatic long sequences handling. Then the package will be able to automatically
22 | split the whole input into shorter chunks, process them separately and combine the outputs. But it's not there yet.
23 | 
24 | 
25 | ## Output parsing errors
26 | If you use the [Forcing model response format](forcing_response_format.md) functionality, sometimes the model can 
27 | generate a response that actually doesn't comform to the provided output data schema. In this case, `allms` won't
28 | be able to parse the output to the provided output data model class. So as a response you'll get a `ResponseData` where
29 | `ResponseData.response` will be a raw, unparsed response from the model, and the `ResponseData.error` will be
30 | `OutputParserException`.
31 | 
32 | 
33 | ## API errors
34 | `allms` automatically retries failed requests. But even with this feature, the model can fail to return a response
35 | more times than the maximum number of retries (which is currently set to 8) or some other unexpected errors may occur.
36 | In all of these cases, `ResponseData.error` will contain the exception that occurred. So a good rule of thumb is to 
37 | first check the `ResponseData.error` and only if it's empty move to processing the response of the model.


--------------------------------------------------------------------------------
/allms/utils/response_parsing_utils.py:
--------------------------------------------------------------------------------
 1 | import re
 2 | import typing
 3 | 
 4 | from langchain.output_parsers import PydanticOutputParser
 5 | from langchain.schema import OutputParserException
 6 | from pydantic import ValidationError
 7 | 
 8 | from allms.domain.response import ResponseData, ResponseParsingOutput
 9 | 
10 | 
11 | class ResponseParser:
12 |     def __init__(self, parser: PydanticOutputParser) -> None:
13 |         self._json_pattern = re.compile(r"{.*?}", re.DOTALL)
14 |         self._parser = parser
15 | 
16 |     def _clean_extracted_json(self, extracted_json: str) -> str:
17 |         json_without_newlines = extracted_json.replace("\\n", "")
18 |         json_without_backslashes = json_without_newlines.replace("\\", "")
19 | 
20 |         return json_without_backslashes
21 | 
22 |     def _extract_json_from_response(self, model_response_data: ResponseData) -> str:
23 |         search_results = self._json_pattern.findall(model_response_data.response)
24 |         
25 |         if len(search_results) == 0:
26 |             return model_response_data.response
27 | 
28 |         return self._clean_extracted_json(search_results[0])
29 |         
30 |     def _parse_response(
31 |         self, 
32 |         model_response_data: ResponseData
33 |     ) -> ResponseParsingOutput:
34 |         raw_response = self._extract_json_from_response(model_response_data)
35 | 
36 |         try:
37 |             return ResponseParsingOutput(
38 |                 response=self._parser.parse(raw_response), 
39 |                 error_message=None
40 |             )
41 |         except OutputParserException as output_parser_exception:
42 |             return ResponseParsingOutput(
43 |                 response=None, 
44 |                 error_message=f"""
45 |                     An OutputParserException has occurred for the model response: {raw_response}
46 |                     The exception message: {output_parser_exception}
47 |                     """
48 |             )
49 |         except ValidationError as validation_error:
50 |             return ResponseParsingOutput(
51 |                 response=None,
52 |                 error_message=f"""
53 |                     A ValidationError has occurred for the model response: {model_response_data.response}
54 |                     The exception message: {validation_error}
55 |                     """
56 |             )
57 | 
58 |         
59 |     def parse_model_output(
60 |         self, 
61 |         model_responses_data: typing.List[ResponseData]
62 |     ) -> typing.List[ResponseData]:
63 |         parsed_responses = []
64 | 
65 |         for model_response_data in model_responses_data:
66 |             if not model_response_data.error:
67 |                 response_with_error = self._parse_response(model_response_data)
68 | 
69 |                 parsed_responses.append(ResponseData(
70 |                     input_data=model_response_data.input_data,
71 |                     response=response_with_error.response,
72 |                     error=response_with_error.error_message,
73 |                     number_of_prompt_tokens=model_response_data.number_of_prompt_tokens,
74 |                     number_of_generated_tokens=model_response_data.number_of_generated_tokens
75 | 
76 |                 ))
77 |             else:
78 |                 parsed_responses.append(model_response_data)
79 | 
80 |         return parsed_responses


--------------------------------------------------------------------------------
/docs/api/models/azure_mistral_model.md:
--------------------------------------------------------------------------------
 1 | ## `class allms.models.AzureMistralModel` API
 2 | ### Methods
 3 | ```python
 4 | __init__(
 5 |     config: AzureSelfDeployedConfiguration,
 6 |     temperature: float = 0.0,
 7 |     top_p: float = 1.0,
 8 |     max_output_tokens: int = 1024,
 9 |     model_total_max_tokens: int = 8192,
10 |     max_concurrency: int = 1000,
11 |     max_retries: int = 8
12 | )
13 | ```
14 | #### Parameters
15 | - `config` (`AzureSelfDeployedConfiguration`): an instance of `AzureSelfDeployedConfiguration` class
16 | - `temperature` (`float`): The sampling temperature, between 0 and 1. Higher values like 0.8 will make the output more
17 |    random, while lower values like 0.2 will make it more focused and deterministic. Default: `0.0`.
18 | - `top_p` (`float`): Default: `1.0`.
19 | - `max_output_tokens` (`int`): The maximum number of tokens to generate by the model. The total length of input tokens 
20 |    and generated tokens is limited by the model's context length. Default: `1024`.
21 | - `model_total_max_tokens` (`int`): Context length of the model - maximum number of input plus generated tokens.
22 |    Default: `8192`.
23 | - `max_concurrency` (`int`): Maximum number of concurrent requests. Default: `1000`.
24 | - `max_retries` (`int`): Maximum number of retries if a request fails. Default: `8`.
25 | 
26 | ---
27 | 
28 | ```python
29 | generate(
30 |     prompt: str,
31 |     input_data: typing.Optional[typing.List[InputData]] = None,
32 |     output_data_model_class: typing.Optional[typing.Type[BaseModel]] = None
33 | ) -> typing.List[ResponseData]:
34 | ```
35 | #### Parameters
36 | - `prompt` (`str`): Prompt to use to query the model.
37 | - `input_data` (`Optional[List[InputData]]`): If prompt contains symbolic variables you can use this parameter to
38 |    generate model responses for batch of examples. Each symbolic variable from the prompt should have mapping provided
39 |    in the `input_mappings` of `InputData`.
40 | - `output_data_model_class` (`Optional[Type[BaseModel]]`): Generated response is automatically parsed to this class. WARNING: You need to manually provide the JSON format instructions in the prompt, they are not injected for this model.
41 | 
42 | Note that Mistral-based models currently don't support system prompts.
43 | 
44 | #### Returns
45 | `List[ResponseData]`: Each `ResponseData` contains the response for a single example from `input_data`. If `input_data`
46 | is not provided, the length of this list is equal 1, and the first element is the response for the raw prompt. 
47 | 
48 | ---
49 | 
50 | ## `class allms.domain.configuration.AzureSelfDeployedConfiguration` API
51 | ```python
52 | AzureSelfDeployedConfiguration(
53 |     api_key: str,
54 |     endpoint_url: str,
55 |     deployment: str
56 | )
57 | ```
58 | #### Parameters
59 | - `api_key` (`str`): Authentication key for the endpoint.
60 | - `endpoint_url` (`str`): URL of pre-existing endpoint.
61 | - `deployment` (`str`): The name under which the model was deployed.
62 | 
63 | ---
64 | 
65 | ### Example usage
66 | 
67 | ```python
68 | from allms.models import AzureMistralModel
69 | from allms.domain.configuration import AzureSelfDeployedConfiguration
70 | 
71 | configuration = AzureSelfDeployedConfiguration(
72 |     api_key="<AZURE_API_KEY>",
73 |     endpoint_url="<AZURE_ENDPOINT_URL>",
74 |     deployment="<AZURE_DEPLOYMENT_NAME>"
75 | )
76 | 
77 | mistral_model = AzureMistralModel(config=configuration)
78 | mistral_response = mistral_model.generate("2+2 is?")
79 | ```


--------------------------------------------------------------------------------
/docs/api/models/azure_llama2_model.md:
--------------------------------------------------------------------------------
 1 | ## `class allms.models.AzureLlama2Model` API
 2 | ### Methods
 3 | ```python
 4 | __init__(
 5 |     config: AzureSelfDeployedConfiguration,
 6 |     temperature: float = 0.0,
 7 |     top_p: float = 1.0,
 8 |     max_output_tokens: int = 512,
 9 |     model_total_max_tokens: int = 4096,
10 |     max_concurrency: int = 1000,
11 |     max_retries: int = 8
12 | )
13 | ```
14 | #### Parameters
15 | - `config` (`AzureSelfDeployedConfiguration`): an instance of `AzureSelfDeployedConfiguration` class
16 | - `temperature` (`float`): The sampling temperature, between 0 and 1. Higher values like 0.8 will make the output more
17 |    random, while lower values like 0.2 will make it more focused and deterministic. Default: `0.0`.
18 | - `top_p` (`float`): Default: `1.0`.
19 | - `max_output_tokens` (`int`): The maximum number of tokens to generate by the model. The total length of input tokens 
20 |    and generated tokens is limited by the model's context length. Default: `512`.
21 | - `model_total_max_tokens` (`int`): Context length of the model - maximum number of input plus generated tokens.
22 |    Default: `4096`.
23 | - `max_concurrency` (`int`): Maximum number of concurrent requests. Default: `1000`.
24 | - `max_retries` (`int`): Maximum number of retries if a request fails. Default: `8`.
25 | 
26 | ---
27 | 
28 | ```python
29 | generate(
30 |     prompt: str,
31 |     system_prompt: Optional[str] = None,
32 |     input_data: typing.Optional[typing.List[InputData]] = None,
33 |     output_data_model_class: typing.Optional[typing.Type[BaseModel]] = None
34 | ) -> typing.List[ResponseData]:
35 | ```
36 | #### Parameters
37 | - `prompt` (`str`): Prompt to use to query the model.
38 | - `system_prompt` (`Optional[str]`): System prompt that will be used by the model.
39 | - `input_data` (`Optional[List[InputData]]`): If prompt contains symbolic variables you can use this parameter to
40 |    generate model responses for batch of examples. Each symbolic variable from the prompt should have mapping provided
41 |    in the `input_mappings` of `InputData`.
42 | - `output_data_model_class` (`Optional[Type[BaseModel]]`): Generated response is automatically parsed to this class. WARNING: You need to manually provide the JSON format instructions in the prompt, they are not injected for this model.
43 | 
44 | #### Returns
45 | `List[ResponseData]`: Each `ResponseData` contains the response for a single example from `input_data`. If `input_data`
46 | is not provided, the length of this list is equal 1, and the first element is the response for the raw prompt. 
47 | 
48 | ---
49 | 
50 | ## `class allms.domain.configuration.AzureSelfDeployedConfiguration` API
51 | ```python
52 | AzureSelfDeployedConfiguration(
53 |     api_key: str,
54 |     endpoint_url: str,
55 |     deployment: str
56 | )
57 | ```
58 | #### Parameters
59 | - `api_key` (`str`): Authentication key for the endpoint.
60 | - `endpoint_url` (`str`): URL of pre-existing endpoint.
61 | - `deployment` (`str`): The name under which the model was deployed.
62 | 
63 | ---
64 | 
65 | ### Example usage
66 | 
67 | ```python
68 | from allms.models import AzureLlama2Model
69 | from allms.domain.configuration import AzureSelfDeployedConfiguration
70 | 
71 | configuration = AzureSelfDeployedConfiguration(
72 |     api_key="<AZURE_API_KEY>",
73 |     endpoint_url="<AZURE_ENDPOINT_URL>",
74 |     deployment="<AZURE_DEPLOYMENT_NAME>"
75 | )
76 | 
77 | llama_model = AzureLlama2Model(config=configuration)
78 | llama_response = llama_model.generate("2+2 is?")
79 | ```


--------------------------------------------------------------------------------
/docs/api/models/vertexai_palm_model.md:
--------------------------------------------------------------------------------
 1 | ## `class allms.models.VertexAIPalmModel` API
 2 | ### Methods
 3 | ```python
 4 | __init__(
 5 |     config: VertexAIConfiguration,
 6 |     temperature: float = 0.0,
 7 |     top_k: int = 40,
 8 |     top_p: float = 0.95,
 9 |     max_output_tokens: int = 1024,
10 |     model_total_max_tokens: int = 8192,
11 |     max_concurrency: int = 1000,
12 |     max_retries: int = 8,
13 |     verbose: bool = True
14 | )
15 | ```
16 | #### Parameters
17 | - `config` (`VertexAIConfiguration`): An instance of `VertexAIConfiguration` class
18 | - `temperature` (`float`): The sampling temperature, between 0 and 1. Higher values like 0.8 will make the output more
19 |    random, while lower values like 0.2 will make it more focused and deterministic. Default: `0.0`.
20 | - `top_k` (`int`): Changes how the model selects tokens for output. A top-k of 3 means that the next token is selected
21 |    from among the 3 most probable tokens. Default: `40`.
22 | - `top_p` (`float`): Top-p changes how the model selects tokens for output. Tokens are selected from most probable to
23 |    least until the sum of their probabilities equals the top_p value. Default: `0.95`.
24 | - `max_output_tokens` (`int`): The maximum number of tokens to generate by the model. The total length of input tokens 
25 |    and generated tokens is limited by the model's context length. Default: `1024`.
26 | - `model_total_max_tokens` (`int`): Context length of the model - maximum number of input plus generated tokens. Default: `8192`.
27 | - `max_concurrency` (`int`): Maximum number of concurrent requests. Default: `1000`.
28 | - `max_retries` (`int`): Maximum number of retries if a request fails. Default: `8`.
29 | - `verbose` (`bool`): Default: `True`.
30 | 
31 | ---
32 | 
33 | ```python
34 | generate(
35 |     prompt: str,
36 |     system_prompt: Optional[str] = None,
37 |     input_data: typing.Optional[typing.List[InputData]] = None,
38 |     output_data_model_class: typing.Optional[typing.Type[BaseModel]] = None
39 | ) -> typing.List[ResponseData]:
40 | ```
41 | #### Parameters
42 | - `prompt` (`str`): Prompt to use to query the model.
43 | - `system_prompt` (`Optional[str]`): System prompt that will be used by the model.
44 | - `input_data` (`Optional[List[InputData]]`): If prompt contains symbolic variables you can use this parameter to
45 |    generate model responses for batch of examples. Each symbolic variable from the prompt should have mapping provided
46 |    in the `input_mappings` of `InputData`.
47 | - `output_data_model_class` (`Optional[Type[BaseModel]]`): If provided forces the model to generate output in the
48 |   format defined by the passed class. Generated response is automatically parsed to this class.
49 | 
50 | #### Returns
51 | `List[ResponseData]`: Each `ResponseData` contains the response for a single example from `input_data`. If `input_data`
52 | is not provided, the length of this list is equal 1, and the first element is the response for the raw prompt. 
53 | 
54 | ---
55 | 
56 | ## `class allms.domain.configuration.VertexAIConfiguration` API
57 | ```python
58 | VertexAIConfiguration(
59 |     cloud_project: str,
60 |     cloud_location: str,
61 |     palm_model_name: str
62 | )
63 | ```
64 | #### Parameters
65 | - `cloud_project` (`str`): The GCP project to use when making Vertex API calls.
66 | - `cloud_location` (`str`): The region to use when making API calls.
67 | - `palm_model_name` (`str`): The specific Palm version you want to use. Default value: `text-bison@001`.
68 | 
69 | ---
70 | 
71 | ### Example usage
72 | 
73 | ```python
74 | from allms.models import VertexAIPalmModel
75 | from allms.domain.configuration import VertexAIConfiguration
76 | 
77 | configuration = VertexAIConfiguration(
78 |     cloud_project="<GCP_PROJECT_ID>",
79 |     cloud_location="<MODEL_REGION>",
80 |     palm_model_name="<MODEL_NAME>"
81 | )
82 | 
83 | vertex_model = VertexAIPalmModel(config=configuration)
84 | vertex_response = vertex_model.generate("2+2 is?")
85 | ```


--------------------------------------------------------------------------------
/docs/api/models/vertexai_gemma.md:
--------------------------------------------------------------------------------
 1 | ## `class allms.models.VertexAIGemmaModel` API
 2 | ### Methods
 3 | ```python
 4 | __init__(
 5 |     config: VertexAIModelGardenConfiguration,
 6 |     temperature: float = 0.0,
 7 |     top_k: int = 40,
 8 |     top_p: float = 0.95,
 9 |     max_output_tokens: int = 1024,
10 |     model_total_max_tokens: int = 8192,
11 |     max_concurrency: int = 1000,
12 |     max_retries: int = 8,
13 |     verbose: bool = True
14 | )
15 | ```
16 | #### Parameters
17 | - `config` (`VertexAIModelGardenConfiguration`): An instance of `VertexAIModelGardenConfiguration` class
18 | - `temperature` (`float`): The sampling temperature, between 0 and 1. Higher values like 0.8 will make the output more
19 |    random, while lower values like 0.2 will make it more focused and deterministic. Default: `0.0`.
20 | - `top_k` (`int`): Changes how the model selects tokens for output. A top-k of 3 means that the next token is selected
21 |    from among the 3 most probable tokens. Default: `40`.
22 | - `top_p` (`float`): Top-p changes how the model selects tokens for output. Tokens are selected from most probable to
23 |    least until the sum of their probabilities equals the top_p value. Default: `0.95`.
24 | - `max_output_tokens` (`int`): The maximum number of tokens to generate by the model. The total length of input tokens 
25 |    and generated tokens is limited by the model's context length. Default: `1024`.
26 | - `model_total_max_tokens` (`int`): Context length of the model - maximum number of input plus generated tokens. Default: `8192`.
27 | - `max_concurrency` (`int`): Maximum number of concurrent requests. Default: `1000`.
28 | - `max_retries` (`int`): Maximum number of retries if a request fails. Default: `8`.
29 | - `verbose` (`bool`): Default: `True`.
30 | 
31 | ---
32 | 
33 | ```python
34 | generate(
35 |     prompt: str,
36 |     system_prompt: Optional[str] = None,
37 |     input_data: typing.Optional[typing.List[InputData]] = None,
38 |     output_data_model_class: typing.Optional[typing.Type[BaseModel]] = None
39 | ) -> typing.List[ResponseData]:
40 | ```
41 | #### Parameters
42 | - `prompt` (`str`): Prompt to use to query the model.
43 | - `system_prompt` (`Optional[str]`): System prompt that will be used by the model.
44 | - `input_data` (`Optional[List[InputData]]`): If prompt contains symbolic variables you can use this parameter to
45 |    generate model responses for batch of examples. Each symbolic variable from the prompt should have mapping provided
46 |    in the `input_mappings` of `InputData`.
47 | - `output_data_model_class` (`Optional[Type[BaseModel]]`): Generated response is automatically parsed to this class. WARNING: You need to manually provide the JSON format instructions in the prompt, they are not injected for this model.
48 | 
49 | #### Returns
50 | `List[ResponseData]`: Each `ResponseData` contains the response for a single example from `input_data`. If `input_data`
51 | is not provided, the length of this list is equal 1, and the first element is the response for the raw prompt. 
52 | 
53 | ---
54 | 
55 | ## `class allms.domain.configuration.VertexAIModelGardenConfiguration` API
56 | ```python
57 | VertexAIModelGardenConfiguration(
58 |     cloud_project: str,
59 |     cloud_location: str,
60 |     endpoint_id: str
61 | )
62 | ```
63 | #### Parameters
64 | - `cloud_project` (`str`): The GCP project to use when making Vertex API calls.
65 | - `cloud_location` (`str`): The region to use when making API calls.
66 | - `endpoint_id` (`str`): ID of an endpoint where the model has been deployed.
67 | 
68 | ---
69 | 
70 | ### Example usage
71 | 
72 | ```python
73 | from allms.models import VertexAIGemmaModel
74 | from allms.domain.configuration import VertexAIModelGardenConfiguration
75 | 
76 | configuration = VertexAIModelGardenConfiguration(
77 |     cloud_project="<GCP_PROJECT_ID>",
78 |     cloud_location="<MODEL_REGION>",
79 |     endpoint_id="<ENDPOINT_ID>"
80 | )
81 | 
82 | vertex_model = VertexAIGemmaModel(config=configuration)
83 | vertex_response = vertex_model.generate("2+2 is?")
84 | ```


--------------------------------------------------------------------------------
/docs/api/models/azure_openai_model.md:
--------------------------------------------------------------------------------
 1 | ## `class allms.models.AzureOpenAIModel` API
 2 | ### Methods
 3 | ```python
 4 | __init__(
 5 |     config: AzureOpenAIConfiguration,
 6 |     temperature: float = 0.0,
 7 |     max_output_tokens: int = 512,
 8 |     request_timeout_s: int = 60,
 9 |     model_total_max_tokens: int = 4096,
10 |     max_concurrency: int = 1000,
11 |     max_retries: int = 8
12 | )
13 | ```
14 | #### Parameters
15 | - `config` (`AzureOpenAIConfiguration`): An instance of `AzureOpenAIConfiguration` class
16 | - `temperature` (`float`): The sampling temperature, between 0 and 1. Higher values like 0.8 will make the output more
17 |    random, while lower values like 0.2 will make it more focused and deterministic. Default: `0.0`.
18 | - `max_output_tokens` (`int`): The maximum number of tokens to generate by the model. The total length of input tokens 
19 |    and generated tokens is limited by the model's context length. Default: `512`.
20 | - `request_timeout_s` (`int`): Timeout for requests to the model. Default: `60`.
21 | - `model_total_max_tokens` (`int`): Context length of the model - maximum number of input plus generated tokens.
22 |    Default: `4096`.
23 | - `max_concurrency` (`int`): Maximum number of concurrent requests. Default: `1000`.
24 | - `max_retries` (`int`): Maximum number of retries if a request fails. Default: `8`.
25 | 
26 | ---
27 | 
28 | ```python
29 | generate(
30 |     prompt: str,
31 |     system_prompt: Optional[str] = None,
32 |     input_data: typing.Optional[typing.List[InputData]] = None,
33 |     output_data_model_class: typing.Optional[typing.Type[BaseModel]] = None
34 | ) -> typing.List[ResponseData]:
35 | ```
36 | #### Parameters
37 | - `prompt` (`str`): Prompt to use to query the model.
38 | - `system_prompt` (`Optional[str]`): System prompt that will be used by the model.
39 | - `input_data` (`Optional[List[InputData]]`): If prompt contains symbolic variables you can use this parameter to
40 |    generate model responses for batch of examples. Each symbolic variable from the prompt should have mapping provided
41 |    in the `input_mappings` of `InputData`.
42 | - `output_data_model_class` (`Optional[Type[BaseModel]]`): If provided forces the model to generate output in the
43 |   format defined by the passed class. Generated response is automatically parsed to this class.
44 | 
45 | #### Returns
46 | `List[ResponseData]`: Each `ResponseData` contains the response for a single example from `input_data`. If `input_data`
47 | is not provided, the length of this list is equal 1, and the first element is the response for the raw prompt. 
48 | 
49 | ---
50 | 
51 | ## `class allms.domain.configuration.AzureOpenAIConfiguration` API
52 | ```python
53 | AzureOpenAIConfiguration(
54 |     api_key: str,
55 |     base_url: str,
56 |     api_version: str,
57 |     deployment: str,
58 |     model_name: str
59 | )
60 | ```
61 | Sets up the environment for the `AzureOpenAIModel` model.
62 | #### Parameters
63 | - `api_key` (`str`):  The API key for your Azure OpenAI resource. You can find this in the Azure portal under
64 |    your Azure OpenAI resource.
65 | - `base_url` (`str`): The base URL for your Azure OpenAI resource. You can find this in the Azure portal under
66 |    your Azure OpenAI resource. 
67 | - `api_version` (`str`): The API version (for example: `2023-03-15-preview`)
68 | - `deployment` (`str`): The name under which the model was deployed.
69 | - `model_name` (`str`): Model name to use (for example: `{gpt-3.5-turbo, gpt-4}`)
70 | 
71 | ---
72 | 
73 | ### Example usage
74 | 
75 | ```python
76 | from allms.models import AzureOpenAIModel
77 | from allms.domain.configuration import AzureOpenAIConfiguration
78 | 
79 | configuration = AzureOpenAIConfiguration(
80 |     api_key="<OPENAI_API_KEY>",
81 |     base_url="<OPENAI_API_BASE>",
82 |     api_version="<OPENAI_API_VERSION>",
83 |     deployment="<OPENAI_API_DEPLOYMENT_NAME>",
84 |     model_name="<OPENAI_API_MODEL_NAME>"
85 | )
86 | 
87 | gpt_model = AzureOpenAIModel(config=configuration)
88 | gpt_response = gpt_model.generate("2+2 is?")
89 | ```
90 | 


--------------------------------------------------------------------------------
/docs/api/models/vertexai_gemini_model.md:
--------------------------------------------------------------------------------
 1 | ## `class allms.models.VertexAIGeminiModel` API
 2 | ### Methods
 3 | ```python
 4 | __init__(
 5 |     config: VertexAIConfiguration,
 6 |     temperature: float = 0.0,
 7 |     top_k: int = 40,
 8 |     top_p: float = 0.95,
 9 |     max_output_tokens: int = 2048,
10 |     model_total_max_tokens: int = 30720,
11 |     max_concurrency: int = 1000,
12 |     max_retries: int = 8,
13 |     verbose: bool = True
14 | )
15 | ```
16 | #### Parameters
17 | - `config` (`VertexAIConfiguration`): An instance of `VertexAIConfiguration` class
18 | - `temperature` (`float`): The sampling temperature, between 0 and 1. Higher values like 0.8 will make the output more
19 |    random, while lower values like 0.2 will make it more focused and deterministic. Default: `0.0`.
20 | - `top_k` (`int`): Changes how the model selects tokens for output. A top-k of 3 means that the next token is selected
21 |    from among the 3 most probable tokens. Default: `40`.
22 | - `top_p` (`float`): Top-p changes how the model selects tokens for output. Tokens are selected from most probable to
23 |    least until the sum of their probabilities equals the top_p value. Default: `0.95`.
24 | - `max_output_tokens` (`int`): The maximum number of tokens to generate by the model. The total length of input tokens 
25 |    and generated tokens is limited by the model's context length. Default: `2048`.
26 | - `model_total_max_tokens` (`int`): Context length of the model - maximum number of input plus generated tokens. Default: `30720`.
27 | - `max_concurrency` (`int`): Maximum number of concurrent requests. Default: `1000`.
28 | - `max_retries` (`int`): Maximum number of retries if a request fails. Default: `8`.
29 | - `verbose` (`bool`): Default: `True`.
30 | 
31 | ---
32 | 
33 | ```python
34 | generate(
35 |     prompt: str,
36 |     system_prompt: Optional[str] = None,
37 |     input_data: typing.Optional[typing.List[InputData]] = None,
38 |     output_data_model_class: typing.Optional[typing.Type[BaseModel]] = None
39 | ) -> typing.List[ResponseData]:
40 | ```
41 | #### Parameters
42 | - `prompt` (`str`): Prompt to use to query the model.
43 | - `system_prompt` (`Optional[str]`): System prompt that will be used by the model.
44 | - `input_data` (`Optional[List[InputData]]`): If prompt contains symbolic variables you can use this parameter to
45 |    generate model responses for batch of examples. Each symbolic variable from the prompt should have mapping provided
46 |    in the `input_mappings` of `InputData`.
47 | - `output_data_model_class` (`Optional[Type[BaseModel]]`): If provided forces the model to generate output in the
48 |   format defined by the passed class. Generated response is automatically parsed to this class.
49 | 
50 | #### Returns
51 | `List[ResponseData]`: Each `ResponseData` contains the response for a single example from `input_data`. If `input_data`
52 | is not provided, the length of this list is equal 1, and the first element is the response for the raw prompt. 
53 | 
54 | ---
55 | 
56 | ## `class allms.domain.configuration.VertexAIConfiguration` API
57 | ```python
58 | VertexAIConfiguration(
59 |     cloud_project: str,
60 |     cloud_location: str,
61 |     gemini_model_name: str
62 | )
63 | ```
64 | #### Parameters
65 | - `cloud_project` (`str`): The GCP project to use when making Vertex API calls.
66 | - `cloud_location` (`str`): The region to use when making API calls.
67 | - `gemini_model_name` (`str`): The specific Gemini version you want to use. Default value: `gemini-pro` (i.e. Gemini 1.0)
68 |   For an updated list of supported models, please refer to the official [Gemini API documentation](https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#gemini-models).
69 | 
70 | ---
71 | 
72 | ### Example usage
73 | 
74 | ```python
75 | from allms.models import VertexAIGeminiModel
76 | from allms.domain.configuration import VertexAIConfiguration
77 | 
78 | configuration = VertexAIConfiguration(
79 |     cloud_project="<GCP_PROJECT_ID>",
80 |     cloud_location="<MODEL_REGION>",
81 |     gemini_model_name="<MODEL_NAME>"
82 | )
83 | 
84 | vertex_model = VertexAIGeminiModel(config=configuration)
85 | vertex_response = vertex_model.generate("2+2 is?")
86 | ```


--------------------------------------------------------------------------------
/tests/conftest.py:
--------------------------------------------------------------------------------
  1 | import asyncio
  2 | import typing
  3 | from contextlib import ExitStack
  4 | from dataclasses import dataclass
  5 | from unittest.mock import patch, Mock
  6 | 
  7 | import pytest
  8 | from langchain_community.llms.fake import FakeListLLM
  9 | 
 10 | from allms.domain.configuration import (
 11 |     AzureOpenAIConfiguration, AzureSelfDeployedConfiguration, VertexAIConfiguration, VertexAIModelGardenConfiguration)
 12 | from allms.models import AzureOpenAIModel, VertexAIPalmModel, AzureLlama2Model
 13 | from allms.models.azure_mistral import AzureMistralModel
 14 | from allms.models.vertexai_gemini import VertexAIGeminiModel
 15 | from allms.models.vertexai_gemma import VertexAIGemmaModel
 16 | 
 17 | 
 18 | class AzureOpenAIEnv:
 19 |     OPENAI_API_BASE: str = "https://dummy-endpoint.openai.azure.com/"
 20 |     OPENAI_API_VERSION: str = "dummy-api-version"
 21 |     OPENAI_DEPLOYMENT_NAME: str = "dummy-deployment-name"
 22 | 
 23 | 
 24 | @dataclass
 25 | class GenerativeModels:
 26 |     azure_gpt: typing.Optional[AzureOpenAIModel] = None
 27 |     vertex_palm: typing.Optional[VertexAIPalmModel] = None
 28 | 
 29 | @dataclass
 30 | class Client:
 31 |     client: typing.Any
 32 | 
 33 | 
 34 | class ModelWithoutAsyncRequestsMock(FakeListLLM, Client):
 35 |     def __init__(self, *args, **kwargs):
 36 |         super().__init__(responses=["{}"], client=Mock())
 37 | 
 38 | 
 39 | @pytest.fixture(scope="function")
 40 | def models():
 41 |     event_loop = asyncio.new_event_loop()
 42 | 
 43 |     with ExitStack() as stack:
 44 |         stack.enter_context(patch("allms.models.vertexai_palm.CustomVertexAI", ModelWithoutAsyncRequestsMock))
 45 |         stack.enter_context(patch("allms.models.vertexai_gemini.CustomVertexAI", ModelWithoutAsyncRequestsMock))
 46 |         stack.enter_context(patch("allms.models.vertexai_gemma.VertexAIModelGardenWrapper", ModelWithoutAsyncRequestsMock))
 47 |         stack.enter_context(patch("allms.models.azure_llama2.AzureMLOnlineEndpointAsync", ModelWithoutAsyncRequestsMock))
 48 |         stack.enter_context(patch("allms.models.azure_mistral.AzureMLOnlineEndpointAsync", ModelWithoutAsyncRequestsMock))
 49 | 
 50 |         return {
 51 |                 "azure_open_ai": AzureOpenAIModel(
 52 |                     config=AzureOpenAIConfiguration(
 53 |                         api_key="dummy_api_key",
 54 |                         base_url=AzureOpenAIEnv.OPENAI_API_BASE,
 55 |                         api_version=AzureOpenAIEnv.OPENAI_API_VERSION,
 56 |                         deployment=AzureOpenAIEnv.OPENAI_DEPLOYMENT_NAME,
 57 |                         model_name="gpt-4"
 58 |                     ),
 59 |                     event_loop=event_loop
 60 |                 ),
 61 |                 "vertex_palm": VertexAIPalmModel(
 62 |                     config=VertexAIConfiguration(
 63 |                         cloud_project="dummy-project-id",
 64 |                         cloud_location="us-central1"
 65 |                     ),
 66 |                     event_loop=event_loop
 67 |                 ),
 68 |                 "vertex_gemini": VertexAIGeminiModel(
 69 |                     config=VertexAIConfiguration(
 70 |                         cloud_project="dummy-project-id",
 71 |                         cloud_location="us-central1"
 72 |                     ),
 73 |                     event_loop=event_loop
 74 |                 ),
 75 |                 "vertex_gemma": VertexAIGemmaModel(
 76 |                     config=VertexAIModelGardenConfiguration(
 77 |                         cloud_project="dummy-project-id",
 78 |                         cloud_location="us-central1",
 79 |                         endpoint_id="dummy-endpoint-id"
 80 |                     ),
 81 |                     event_loop=event_loop
 82 |                 ),
 83 |                 "azure_llama2": AzureLlama2Model(
 84 |                     config=AzureSelfDeployedConfiguration(
 85 |                         api_key="dummy_api_key",
 86 |                         endpoint_url="https://dummy-endpoint.dummy-region.inference.ml.azure.com/score",
 87 |                         deployment="dummy_deployment_name"
 88 |                     ),
 89 |                     event_loop=event_loop
 90 |                 ),
 91 |                 "azure_mistral": AzureMistralModel(
 92 |                     config=AzureSelfDeployedConfiguration(
 93 |                         api_key="dummy_api_key",
 94 |                         endpoint_url="https://dummy-endpoint.dummy-region.inference.ml.azure.com/score",
 95 |                         deployment="dummy_deployment_name"
 96 |                     ),
 97 |                     event_loop=event_loop
 98 |                 )
 99 |             }
100 | 


--------------------------------------------------------------------------------
/allms/models/vertexai_base.py:
--------------------------------------------------------------------------------
  1 | from typing import List, Optional, Any, Dict
  2 | 
  3 | from google.cloud.aiplatform.models import Prediction
  4 | from langchain_core.callbacks import AsyncCallbackManagerForLLMRun
  5 | from langchain_core.outputs import LLMResult, Generation
  6 | from langchain_google_vertexai import VertexAI, VertexAIModelGarden
  7 | from pydash import chain
  8 | 
  9 | from allms.constants.vertex_ai import VertexModelConstants
 10 | 
 11 | 
 12 | class GCPInvalidRequestError(Exception):
 13 |     pass
 14 | 
 15 | 
 16 | class CustomVertexAI(VertexAI):
 17 |     async def _agenerate(
 18 |         self,
 19 |         prompts: List[str],
 20 |         stop: Optional[List[str]] = None,
 21 |         run_manager: Optional[AsyncCallbackManagerForLLMRun] = None,
 22 |         **kwargs: Any,
 23 |     ) -> LLMResult:
 24 |         def was_response_blocked(generation: Generation) -> bool:
 25 |             return (
 26 |                 generation.generation_info is not None 
 27 |                     and 'is_blocked' in generation.generation_info 
 28 |                     and generation.generation_info['is_blocked']
 29 |             )
 30 | 
 31 |         result = await super()._agenerate(
 32 |             prompts=prompts,
 33 |             stop=stop,
 34 |             run_manager=run_manager,
 35 |             **kwargs
 36 |         )
 37 | 
 38 |         if not all(result.generations):
 39 |             raise GCPInvalidRequestError("The response is empty. It may have been blocked due to content filtering.")
 40 | 
 41 |         return LLMResult(
 42 |             generations=(
 43 |                 chain(result.generations)
 44 |                 .map(lambda generation_candidates: (
 45 |                     chain(generation_candidates)
 46 |                     .map(
 47 |                         lambda single_candidate: Generation(
 48 |                             text=VertexModelConstants.RESPONSE_BLOCKED_STR
 49 |                         ) if was_response_blocked(single_candidate) else single_candidate
 50 |                     )
 51 |                     .value()
 52 |                 ))
 53 |                 .value()
 54 |             ),
 55 |             llm_output=result.llm_output,
 56 |             run=result.run
 57 |         )
 58 | 
 59 | 
 60 | class VertexAIModelGardenWrapper(VertexAIModelGarden):
 61 |     temperature: float = 0.0
 62 |     max_tokens: int = 128
 63 |     top_p: float = 0.95
 64 |     top_k: int = 40
 65 |     n: int = 1
 66 | 
 67 |     def __init__(self, **kwargs: Any) -> None:
 68 |         super().__init__(**kwargs)
 69 |         self.allowed_model_args = list(self._default_params.keys())
 70 | 
 71 |     @property
 72 |     def _default_params(self) -> Dict[str, Any]:
 73 |         return {
 74 |             "temperature": self.temperature,
 75 |             "max_tokens": self.max_tokens,
 76 |             "top_k": self.top_k,
 77 |             "top_p": self.top_p,
 78 |             "n": self.n
 79 |         }
 80 | 
 81 |     def _parse_response(self, predictions: "Prediction", prompts: List[str]) -> LLMResult:
 82 |         generations: List[List[Generation]] = []
 83 |         for result, prompt in zip(predictions.predictions, prompts):
 84 |             if isinstance(result, str):
 85 |                 generations.append([Generation(text=self._parse_prediction(result, prompt))])
 86 |             else:
 87 |                 generations.append(
 88 |                     [
 89 |                         Generation(text=self._parse_prediction(prediction, prompt))
 90 |                         for prediction in result
 91 |                     ]
 92 |                 )
 93 |         return LLMResult(generations=generations)
 94 | 
 95 |     def _parse_prediction(self, prediction: Any, prompt: str) -> str:
 96 |         parsed_prediction = super()._parse_prediction(prediction)
 97 |         try:
 98 |             text_to_remove = f"Prompt:\n{prompt}\nOutput:\n"
 99 |             return parsed_prediction.rsplit(text_to_remove, maxsplit=1)[1]
100 |         except Exception:
101 |             raise ValueError(f"Output returned from the model doesn't follow the expected format.")
102 | 
103 |     async def _agenerate(
104 |         self,
105 |         prompts: List[str],
106 |         stop: Optional[List[str]] = None,
107 |         run_manager: Optional[AsyncCallbackManagerForLLMRun] = None,
108 |         **kwargs: Any,
109 |     ) -> LLMResult:
110 |         kwargs = {**kwargs, **self._default_params}
111 |         instances = self._prepare_request(prompts, **kwargs)
112 |         response = await self.async_client.predict(
113 |             endpoint=self.endpoint_path, instances=instances
114 |         )
115 |         return self._parse_response(response, prompts)
116 | 
117 | 


--------------------------------------------------------------------------------
/docs/installation_and_quick_start.md:
--------------------------------------------------------------------------------
  1 | # Installation
  2 | Install the package via pip:
  3 | 
  4 | ```bash
  5 | pip install allms
  6 | ```
  7 | 
  8 | # Quick Start 
  9 | 
 10 | To use our package, you must have access to the credentials of the endpoint with the deployed model.
 11 | Each of the supported models have a different set of credentials
 12 | that must be passed in the corresponding configuration object. Below is a brief overview of how to use each of these models.
 13 | 
 14 | ## Simple usage
 15 | 
 16 | ### Azure GPT
 17 | 
 18 | ```python
 19 | from allms.models import AzureOpenAIModel
 20 | from allms.domain.configuration import AzureOpenAIConfiguration
 21 | 
 22 | configuration = AzureOpenAIConfiguration(
 23 |     api_key="<OPENAI_API_KEY>",
 24 |     base_url="<OPENAI_API_BASE>",
 25 |     api_version="<OPENAI_API_VERSION>",
 26 |     deployment="<OPENAI_API_DEPLOYMENT_NAME>",
 27 |     model_name="<OPENAI_API_MODEL_NAME>"
 28 | )
 29 | 
 30 | gpt_model = AzureOpenAIModel(config=configuration)
 31 | gpt_response = gpt_model.generate("2+2 is?")
 32 | ```
 33 | 
 34 | * `<OPENAI_API_KEY>`: The API key for your Azure OpenAI resource. You can find this in the Azure portal under your
 35 |    Azure OpenAI resource.
 36 | * `<OPENAI_API_BASE>`: The base URL for your Azure OpenAI resource. You can find this in the Azure portal under your
 37 |    Azure OpenAI resource.
 38 | * `<OPENAI_API_VERSION>`: The API version.
 39 | * `<OPENAI_API_DEPLOYMENT_NAME>`: The name under which the model was deployed.
 40 | * `<OPENAI_API_MODEL_NAME>`: The underlying model's name.
 41 | 
 42 | ### VertexAI PaLM
 43 | 
 44 | ```python
 45 | from allms.models import VertexAIPalmModel
 46 | from allms.domain.configuration import VertexAIConfiguration
 47 | 
 48 | configuration = VertexAIConfiguration(
 49 |     cloud_project="<GCP_PROJECT_ID>",
 50 |     cloud_location="<MODEL_REGION>"
 51 | )
 52 | 
 53 | palm_model = VertexAIPalmModel(config=configuration)
 54 | palm_response = palm_model.generate("2+2 is?")
 55 | ```
 56 | 
 57 | * `<GCP_PROJECT_ID>`: The GCP project in which you have access to the PALM model.
 58 | * `<MODEL_REGION>`: The region where the model is deployed.
 59 | 
 60 | ### VertexAI Gemini
 61 | 
 62 | ```python
 63 | from allms.models import VertexAIGeminiModel
 64 | from allms.domain.configuration import VertexAIConfiguration
 65 | 
 66 | configuration = VertexAIConfiguration(
 67 |     cloud_project="<GCP_PROJECT_ID>",
 68 |     cloud_location="<MODEL_REGION>"
 69 | )
 70 | 
 71 | gemini_model = VertexAIGeminiModel(config=configuration)
 72 | gemini_response = gemini_model.generate("2+2 is?")
 73 | ```
 74 | 
 75 | * `<GCP_PROJECT_ID>`: The GCP project in which you have access to the PALM model.
 76 | * `<MODEL_REGION>`: The region where the model is deployed.
 77 | 
 78 | ### VertexAI Gemma
 79 | 
 80 | ```python
 81 | from allms.models import VertexAIGemmaModel
 82 | from allms.domain.configuration import VertexAIModelGardenConfiguration
 83 | 
 84 | configuration = VertexAIModelGardenConfiguration(
 85 |     cloud_project="<GCP_PROJECT_ID>",
 86 |     cloud_location="<MODEL_REGION>",
 87 |     endpoint_id="<ENDPOINT_ID>"
 88 | )
 89 | 
 90 | gemini_model = VertexAIGemmaModel(config=configuration)
 91 | gemini_response = gemini_model.generate("2+2 is?")
 92 | ```
 93 | 
 94 | * `<GCP_PROJECT_ID>`: The GCP project in which you have access to the PALM model.
 95 | * `<MODEL_REGION>`: The region where the model is deployed.
 96 | * `<ENDPOINT_ID>`: ID of an endpoint where the model has been deployed.
 97 | 
 98 | ### Azure LLaMA 2
 99 | 
100 | ```python
101 | from allms.models import AzureLlama2Model
102 | from allms.domain.configuration import AzureSelfDeployedConfiguration
103 | 
104 | configuration = AzureSelfDeployedConfiguration(
105 |     api_key="<AZURE_API_KEY>",
106 |     endpoint_url="<AZURE_ENDPOINT_URL>",
107 |     deployment="<AZURE_DEPLOYMENT_NAME>"
108 | )
109 | 
110 | llama_model = AzureLlama2Model(config=configuration)
111 | llama_response = llama_model.generate("2+2 is?")
112 | ```
113 | 
114 | * `<AZURE_API_KEY>`: Authentication key for the endpoint.
115 | * `<AZURE_ENDPOINT_URL>`: URL of pre-existing endpoint.
116 | * `<AZURE_DEPLOYMENT_NAME>`: The name under which the model was deployed.
117 | 
118 | ### Azure Mistral
119 | 
120 | ```python
121 | from allms.models.azure_mistral import AzureMistralModel
122 | from allms.domain.configuration import AzureSelfDeployedConfiguration
123 | 
124 | configuration = AzureSelfDeployedConfiguration(
125 |     api_key="<AZURE_API_KEY>",
126 |     endpoint_url="<AZURE_ENDPOINT_URL>",
127 |     deployment="<AZURE_DEPLOYMENT_NAME>"
128 | )
129 | 
130 | mistral_model = AzureMistralModel(config=configuration)
131 | mistral_response = mistral_model.generate("2+2 is?")
132 | ```
133 | 
134 | * `<AZURE_API_KEY>`: Authentication key for the endpoint.
135 | * `<AZURE_ENDPOINT_URL>`: URL of pre-existing endpoint.
136 | * `<AZURE_DEPLOYMENT_NAME>`: The name under which the model was deployed.
137 | 


--------------------------------------------------------------------------------
/docs/usage/forcing_response_format.md:
--------------------------------------------------------------------------------
 1 | # Forcing Structured Response Format
 2 | 
 3 | ## Pydantic BaseModels Integration
 4 | 
 5 | If you want to force the model to output the response in a given JSON schema, `allms` provides an easy way to do 
 6 | it. You just need to provide a data model that describes the desired output format and the package does all the rest. 
 7 | As an output you get string already parsed to a provided data model class.
 8 | 
 9 | Here's how to use this functionality step by step:
10 | 
11 | 1. Define the desired output data model class. It needs to inherit from pydantic `BaseModel`. Each field should have
12 | type defined and a description provided in `Field()` which should describe what given field means. By providing accurate
13 | description, you make it easier for the model to generate proper response.
14 | 
15 | ```python
16 | import typing
17 |     
18 | from pydantic import BaseModel, Field
19 |     
20 | class ReviewOutputDataModel(BaseModel):
21 |     summary: str = Field(description="Summary of a product description")
22 |     should_buy: bool = Field(description="Recommendation whether I should buy the product or not")
23 |     brand_name: str = Field(description="Brand of the coffee")
24 |     aroma:str = Field(description="Description of the coffee aroma")
25 |     cons: typing.List[str] = Field(description="List of cons of the coffee")
26 | ```
27 | 
28 | 2. Provide the data model class together with prompt and input data to the `.generate()` method. `allms` will 
29 | automatically force the model to output the data in the provided format and will parse the string returned from the
30 | model to the provided data model class.
31 | 
32 | ```python
33 | review = "Marketing is doing its job and I was tempted too, but this Blue Orca coffee is nothing above the level of coffees from the supermarket. And the method of brewing or grinding does not help here. The coffee is simply weak - both in terms of strength and taste. I do not recommend."
34 |     
35 | prompt = "Summarize review of the coffee. Review: {review}"
36 | input_data = [
37 |     InputData(input_mappings={"review": review}, id="0")
38 | ]
39 |     
40 | responses = model.generate(
41 |     prompt=prompt, 
42 |     input_data=input_data,
43 |     output_data_model_class=ReviewOutputDataModel
44 | )
45 | response = responses[0].response
46 | ```
47 | 
48 | Now we can check the response:
49 | 
50 | ```python
51 | >>> type(response)
52 | ReviewOutputDataModel
53 | 
54 | >>> response.should_buy
55 | False
56 | 
57 | >>> response.brand_name
58 | "Blue Orca"
59 | 
60 | >>> response.aroma
61 | "Not mentioned in the review"
62 | 
63 | >>> response.cons
64 | ['Weak in terms of strength', 'Weak in terms of taste']
65 | ```
66 | 
67 | ## What to do when output formatting doesn't work?
68 | 
69 | The feature described above works only with advanced proprietary models like GPT and PaLM/Gemini. Less capable models like Llama2 or Mistral
70 | are unable to understand instructions passed as output_dataclasses.
71 | 
72 | For these less capable models, you need to manually specify in the prompt how the response should look like. You can then pass the `output_data_model_class` to try parsing the output. Using few-shot learning techniques is also advisable. In the case of JSON-like output, use double curly brackets instead of single ones, e.g. `{{"key": "value"}}` instead of `{"key": "value"}`.
73 | 
74 | ## How forcing response format works under the hood?
75 | To force the model to provide output in a desired format, under the hood `allms` automatically adds a description
76 | of the desired output format. For example, for the `ReviewOutputDataModel` the description looks like this:
77 | 
78 | ````text
79 | The output should be formatted as a JSON instance that conforms to the JSON schema below.
80 | 
81 | As an example, for the schema {"properties": {"foo": {"title": "Foo", "description": "a list of strings", "type": "array", "items": {"type": "string"}}}, "required": ["foo"]}}
82 | the object {"foo": ["bar", "baz"]} is a well-formatted instance of the schema. The object {"properties": {"foo": ["bar", "baz"]}} is not well-formatted.
83 | 
84 | Here is the output schema:
85 | ```
86 | {"properties": {"summary": {"title": "Summary", "description": "Summary of a product description", "type": "string"}, "should_buy": {"title": "Should Buy", "description": "Recommendation whether I should buy the product or not", "type": "boolean"}, "brand_name": {"title": "Brand Name", "description": "Brand of the coffee", "type": "string"}, "aroma": {"title": "Aroma", "description": "Description of the coffee aroma", "type": "string"}, "cons": {"title": "Cons", "description": "List of cons of the coffee", "type": "array", "items": {"type": "string"}}}, "required": ["summary", "should_buy", "brand_name", "aroma", "cons"]}
87 | ```
88 | ````
89 | 
90 | This feature is really helpful, but you have to keep in mind that by using it you increase the number or prompt tokens
91 | so it'll make the requests more costly (if you're using model with per token pricing)
92 | 
93 | If the model will return an output that doesn't comform to the defined data model, raw model response will be returned
94 | in `ResponseData.response` and `ResponseData.error` will be `OutputParserException`.


--------------------------------------------------------------------------------
/allms/utils/long_text_processing_utils.py:
--------------------------------------------------------------------------------
 1 | from typing import List
 2 | 
 3 | from langchain import BasePromptTemplate
 4 | from langchain.base_language import BaseLanguageModel
 5 | from langchain.schema import Document
 6 | 
 7 | from allms.defaults.long_text_chain import LongTextChainDefaults
 8 | 
 9 | 
10 | def truncate_text_to_max_size(
11 |         llm: BaseLanguageModel,
12 |         prompt_template: BasePromptTemplate,
13 |         text: str,
14 |         model_total_max_tokens: int,
15 |         max_output_tokens: int,
16 | ) -> str:
17 |     """
18 |     This function is supposed to truncate the input to fit the maximum context size of a model. The problem is that
19 |     the max context size is in tokens and in our code we operate on raw, un-tokenized strings. We can only calculate
20 |     how many tokens given string has. So to find the point on which we should truncate, this function calculates in
21 |     tokens how many times the current `text` is longer than the allowed limit. Then it assumes that the ration is true
22 |     also when reasoning on words instead of tokens. And based on this the `split_point_index` is calculated.
23 |     This is only an approximation (ration calculated on tokens is only similar to the ratio calculated on words, but in
24 |     most of the cases it won't be the same). That's why this function is used in a recursive way. It calculates the
25 |     split point, truncates the text and checks again if the total prompt length is lower than the max context size of
26 |     a model. If not it reruns itself again and if yes, then it returns the truncated text.
27 | 
28 |     Another possibility would be to implement this function using tokenizer to tokenize text to tokens, then truncate
29 |     the text, detokenize it to string and return truncated text. But for this solution, first we'd need to have a list
30 |     of tokenizers used by every model we'd like to support (now it's provided inside langchain) and second, the
31 |     tokenization and de-tokenization steps could change the input prompt by introducing some artifacts.
32 |     """
33 |     max_token_limit = get_max_allowed_number_of_tokens(model_total_max_tokens, max_output_tokens)
34 |     num_tokens = int(llm.get_num_tokens(prompt_template.format(text=text)))
35 | 
36 |     if num_tokens <= max_token_limit:
37 |         return text
38 | 
39 |     # We add `text="text"` and not empty string, because the empty string may be tokenized together with the whitespaces
40 |     # that are around it in the prompt. But when joining the actual `{text}` with the prompt instructions we get one
41 |     # additional token
42 |     num_tokens_prompt_wo_text = int(llm.get_num_tokens(prompt_template.format(text="text")))
43 |     num_tokens_text = int(llm.get_num_tokens(text))
44 |     num_tokens_left_for_text = max_token_limit - num_tokens_prompt_wo_text
45 |     if num_tokens_left_for_text <= 0:
46 |         raise ValueError("Prompt instruction (without the actual text) is longer than the allowed model input length")
47 | 
48 |     # How many times the current text is longer than the allowed length
49 |     current_to_allowed_length_ration = num_tokens_text / num_tokens_left_for_text
50 |     words = text.split()
51 |     split_point_index = int(len(words) / current_to_allowed_length_ration)
52 | 
53 |     text_truncated = " ".join(words[:split_point_index])
54 | 
55 |     return truncate_text_to_max_size(
56 |         llm=llm,
57 |         prompt_template=prompt_template,
58 |         text=text_truncated,
59 |         model_total_max_tokens=model_total_max_tokens,
60 |         max_output_tokens=max_output_tokens
61 |     )
62 | 
63 | 
64 | def split_text_to_max_size(
65 |         llm: BaseLanguageModel,
66 |         prompt_template: BasePromptTemplate,
67 |         text: str,
68 |         model_total_max_tokens: int,
69 |         max_output_tokens: int,
70 |         overlap_size: int = LongTextChainDefaults.OVERLAP_SIZE
71 | ) -> List[Document]:
72 |     max_token_limit = get_max_allowed_number_of_tokens(model_total_max_tokens, max_output_tokens)
73 |     if int(llm.get_num_tokens(prompt_template.format(text=text))) < max_token_limit:
74 |         return [Document(page_content=text)]
75 | 
76 |     words = text.split()
77 |     middle_word_index = len(words) // 2
78 | 
79 |     overlap_left = overlap_size // 2
80 |     overlap_right = overlap_size - overlap_left
81 |     data_left_half = " ".join(words[:middle_word_index + overlap_left])
82 |     data_right_half = " ".join(words[middle_word_index - overlap_right:])
83 | 
84 |     return (
85 |             split_text_to_max_size(llm=llm, prompt_template=prompt_template, text=data_left_half,
86 |                                    model_total_max_tokens=model_total_max_tokens, max_output_tokens=max_output_tokens)
87 |             + split_text_to_max_size(llm=llm, prompt_template=prompt_template, text=data_right_half,
88 |                                      model_total_max_tokens=model_total_max_tokens, max_output_tokens=max_output_tokens)
89 |     )
90 | 
91 | 
92 | def get_max_allowed_number_of_tokens(model_total_max_tokens: int, max_output_tokens: int) -> int:
93 |     buffer = 50  # for things like BOS, EOS and other unexpected things
94 |     return model_total_max_tokens - max_output_tokens - buffer
95 | 


--------------------------------------------------------------------------------
/allms/chains/long_text_processing_chain.py:
--------------------------------------------------------------------------------
  1 | import asyncio
  2 | from functools import reduce
  3 | from typing import List, Any, Tuple, Optional, Union
  4 | 
  5 | from langchain import LLMChain, BasePromptTemplate
  6 | from langchain.base_language import BaseLanguageModel
  7 | from langchain.chains.combine_documents.base import BaseCombineDocumentsChain
  8 | from langchain.schema import Document
  9 | 
 10 | from allms.domain.enumerables import AggregationLogicForLongInputData, LanguageModelTask
 11 | from allms.domain.input_data import InputData
 12 | from allms.domain.prompt_dto import (AggregateOutputClass, KeywordsOutputClass, SummaryOutputClass)
 13 | from allms.utils.long_text_processing_utils import split_text_to_max_size
 14 | 
 15 | 
 16 | class LongTextProcessingChain(BaseCombineDocumentsChain):
 17 |     task: LanguageModelTask
 18 |     model_total_max_tokens: int
 19 |     max_output_tokens: int
 20 |     map_llm_chain: LLMChain
 21 |     reduce_llm_chain: LLMChain
 22 |     input_data_variable_name: str
 23 |     aggregation_strategy: AggregationLogicForLongInputData
 24 | 
 25 |     @property
 26 |     def _chain_type(self) -> str:
 27 |         return "long_description_chain"
 28 | 
 29 |     async def combine_docs(self, input_data: Document, **kwargs: Any) -> Tuple[str, dict]:
 30 |         chunked_input: List[Document] = split_text_to_max_size(
 31 |             llm=self.map_llm_chain.llm,
 32 |             prompt_template=self.map_llm_chain.prompt,
 33 |             text=input_data,
 34 |             model_total_max_tokens=self.model_total_max_tokens,
 35 |             max_output_tokens=self.max_output_tokens
 36 |         )
 37 | 
 38 |         chunk_responses = await self._map_step(chunked_input)
 39 |         aggregated_response = self._reduce_step(chunk_responses)
 40 | 
 41 |         return aggregated_response, {}
 42 | 
 43 |     async def acombine_docs(self, input_data: List[Document], **kwargs: Any) -> Tuple[str, dict]:
 44 |         return await self.combine_docs(input_data)
 45 | 
 46 |     async def _map_step(self, chunked_document: List[Document]) -> List[str]:
 47 |         results = list(map(lambda document: self.map_llm_chain.arun(document), chunked_document))
 48 |         return await asyncio.gather(*results)
 49 | 
 50 |     def _reduce_step(self, chunk_responses: List[InputData]) -> str:
 51 |         if self.aggregation_strategy == AggregationLogicForLongInputData.REDUCE_BY_LLM_PROMPTING:
 52 |             return self._construct_input_from_list_and_run_reduce_chain(chunk_responses)
 53 |         elif self.aggregation_strategy == AggregationLogicForLongInputData.SIMPLE_CONCATENATION:
 54 |             if self.task == LanguageModelTask.SUMMARY:
 55 |                 return self._aggregate_results_for_summary(chunk_responses).json()
 56 |             elif self.task == LanguageModelTask.KEYWORDS:
 57 |                 return self._aggregate_results_for_keywords(chunk_responses).json()
 58 | 
 59 |     def _deserialize_response(self, response: str) -> Union[SummaryOutputClass, KeywordsOutputClass]:
 60 |         if self.task == LanguageModelTask.SUMMARY:
 61 |             return SummaryOutputClass.parse_raw(response)
 62 |         elif self.task == LanguageModelTask.KEYWORDS:
 63 |             return KeywordsOutputClass.parse_raw(response)
 64 | 
 65 |     def _construct_input_from_list_and_run_reduce_chain(self, response_list: List[InputData]) -> str:
 66 |         aggregate_input = Document(
 67 |             page_content=AggregateOutputClass(summaries=[
 68 |                 self._deserialize_response(response) for response in response_list]
 69 |             ).json()
 70 |         )
 71 | 
 72 |         return self.reduce_llm_chain.run(aggregate_input.text)
 73 | 
 74 |     @staticmethod
 75 |     def _aggregate_results_for_summary(chunk_responses: List[Document]) -> SummaryOutputClass:
 76 |         return SummaryOutputClass(summary=" ".join([
 77 |             SummaryOutputClass.parse_raw(response_json).summary for response_json in chunk_responses
 78 |         ]))
 79 | 
 80 |     @staticmethod
 81 |     def _aggregate_results_for_keywords(chunk_responses: List[str]) -> KeywordsOutputClass:
 82 |         return KeywordsOutputClass(keywords=list(reduce(
 83 |             lambda x, y: x + y,
 84 |             [KeywordsOutputClass.parse_raw(response_json).keywords for response_json in chunk_responses],
 85 |             []
 86 |         )))
 87 | 
 88 | 
 89 | def load_long_text_processing_chain(
 90 |         task: LanguageModelTask,
 91 |         llm: BaseLanguageModel,
 92 |         model_total_max_tokens: int,
 93 |         max_output_tokens: int,
 94 |         map_prompt: BasePromptTemplate,
 95 |         reduce_prompt: BasePromptTemplate,
 96 |         aggregation_strategy: AggregationLogicForLongInputData,
 97 |         input_data_variable_name: str = "text",
 98 |         verbose: Optional[bool] = None
 99 | ) -> LongTextProcessingChain:
100 |     map_chain = LLMChain(llm=llm, prompt=map_prompt, verbose=verbose)
101 |     reduce_chain = LLMChain(llm=llm, prompt=reduce_prompt, verbose=verbose)
102 | 
103 |     return LongTextProcessingChain(
104 |         task=task,
105 |         model_total_max_tokens=model_total_max_tokens,
106 |         max_output_tokens=max_output_tokens,
107 |         map_llm_chain=map_chain,
108 |         reduce_llm_chain=reduce_chain,
109 |         input_data_variable_name=input_data_variable_name,
110 |         aggregation_strategy=aggregation_strategy,
111 |         verbose=verbose
112 |     )
113 | 


--------------------------------------------------------------------------------
/docs/usage/advanced.md:
--------------------------------------------------------------------------------
  1 | # Advanced Usage
  2 | 
  3 | ## Symbolic Variables and Batch Mode
  4 | 
  5 | If you want to generate responses for a batch of examples, you can achieve this by preparing a prompt with symbolic
  6 | variables and providing input data that will be injected into this prompt. `allms` will automatically make these
  7 | requests in an async mode and retry them in case of any API error.
  8 | 
  9 | Let's say we want to classify reviews of coffee as positive or negative. Here's how to do it:
 10 | 
 11 | ```python
 12 | from allms.models import AzureOpenAIModel
 13 | from allms.domain.configuration import AzureOpenAIConfiguration
 14 | from allms.domain.input_data import InputData
 15 | 
 16 | configuration = AzureOpenAIConfiguration(
 17 |     api_key="<OPENAI_API_KEY>",
 18 |     base_url="<OPENAI_API_BASE>",
 19 |     api_version="<OPENAI_API_VERSION>",
 20 |     deployment="<OPENAI_API_DEPLOYMENT_NAME>",
 21 |     model_name="<OPENAI_API_MODEL_NAME>"
 22 | )
 23 | 
 24 | model = AzureOpenAIModel(config=configuration)
 25 | 
 26 | positive_review_0 = "Very good coffee, lightly roasted, with good aroma and taste. The taste of sourness is barely noticeable (which is good because I don't like sour coffees). After grinding, the aroma spreads throughout the room. I recommend it to all those who do not like strongly roasted and pitch-black coffees. A very good solution is to close the package with string, which allows you to preserve the aroma and freshness."
 27 | positive_review_1 = "Delicious coffee!! Delicate, just the way I like it, and the smell after opening is amazing. It smells freshly roasted. Faithful to Lavazza coffee for years, I decided to look for other flavors. Based on the reviews, I blindly bought it and it was a 10-shot, it outperformed Lavazze in taste. For me the best."
 28 | negative_review = "Marketing is doing its job and I was tempted too, but this coffee is nothing above the level of coffees from the supermarket. And the method of brewing or grinding does not help here. The coffee is simply weak - both in terms of strength and taste. I do not recommend."
 29 | 
 30 | prompt = "You'll be provided with a review of a coffe. Decide if the review is positive or negative. Review: {review}"
 31 | input_data = [
 32 |     InputData(input_mappings={"review": positive_review_0}, id="0"),
 33 |     InputData(input_mappings={"review": positive_review_1}, id="1"),
 34 |     InputData(input_mappings={"review": negative_review}, id="2")
 35 | ]
 36 | 
 37 | responses = model.generate(prompt=prompt, input_data=input_data)
 38 | ```
 39 | 
 40 | As an output we'll get `List[ResponseData]` where each `ResponseData` will contain response for a single example from
 41 | `input_data`. The requests are performed in an async mode, so remember that the order of the `responses` is not the same
 42 | as the order of the `input_data`. That's why together with the response, we pass also the `ResponseData.input_data` to
 43 | the output.
 44 | 
 45 | So let's see the responses:
 46 | ```python
 47 | >>> {f"review_id={response.input_data.id}": response.response for response in responses}
 48 | {
 49 |     'review_id=0': 'The review is positive.',
 50 |     'review_id=1': 'The review is positive.',
 51 |     'review_id=2': 'The review is negative.'
 52 | }
 53 | 
 54 | ```
 55 | 
 56 | ## Multiple symbolic variables
 57 | You can also define prompt with multiple symbolic variables. The rule is that each symbolic variable from the prompt
 58 | should have mapping provided in the `input_mappings` of `InputData`. Let's say we want to provide two reviews in one 
 59 | prompt and let the model decide which one of them is positive. Here's how to do it:
 60 | 
 61 | ```python
 62 | prompt = """You'll be provided with two reviews of a coffee. Decide which one is positive.
 63 | 
 64 | First review: {first_review}
 65 | Second review: {second_review}"""
 66 | input_data = [
 67 |     InputData(input_mappings={"first_review": positive_review_0, "second_review": negative_review}, id="0"),
 68 |     InputData(input_mappings={"first_review": negative_review, "second_review": positive_review_1}, id="1"),
 69 | ]
 70 | 
 71 | responses = model.generate(prompt=prompt, input_data=input_data)
 72 | ```
 73 | 
 74 | And the results:
 75 | ```python
 76 | >>> {f"example_id={response.input_data.id}": response.response for response in responses}
 77 | {
 78 |     'example_id=0': 'The first review is positive.',
 79 |     'example_id=1': 'The second review is positive.'
 80 | }
 81 | ```
 82 | 
 83 | ## Controlling the Number of Concurrent Requests
 84 | As it's written above, `allms` automatically makes requests in an async mode. By default, the maximum number of 
 85 | concurrent requests is set to 1000. You can control this value by setting the `max_concurrency` parameter when
 86 | initializing the model. Set it to a value that is appropriate for your model endpoint.
 87 | 
 88 | ## Using a common asyncio event loop
 89 | By default, each model instance has its own event loop for handling the execution of async tasks. If you want to use
 90 | a common loop for multiple models or to have a custom loop, it's possible to specify it in the model constructor:
 91 | 
 92 | ```python
 93 | import asyncio
 94 | 
 95 | from allms.models import AzureOpenAIModel
 96 | from allms.domain.configuration import AzureOpenAIConfiguration
 97 | 
 98 | custom_event_loop = asyncio.new_event_loop()
 99 | 
100 | configuration = AzureOpenAIConfiguration(
101 |     api_key="<OPENAI_API_KEY>",
102 |     base_url="<OPENAI_API_BASE>",
103 |     api_version="<OPENAI_API_VERSION>",
104 |     deployment="<OPENAI_API_DEPLOYMENT_NAME>",
105 |     model_name="<OPENAI_API_MODEL_NAME>"
106 | )
107 | 
108 | model = AzureOpenAIModel(
109 |     config=configuration,
110 |     event_loop=custom_event_loop
111 | )
112 | ``` 


--------------------------------------------------------------------------------
/allms/models/vertexai_gemini.py:
--------------------------------------------------------------------------------
  1 | import logging
  2 | import typing
  3 | from asyncio import AbstractEventLoop
  4 | from typing import Optional
  5 | 
  6 | from langchain_core.prompts import ChatPromptTemplate
  7 | from vertexai.preview import tokenization
  8 | from vertexai.tokenization._tokenizers import Tokenizer
  9 | 
 10 | from allms.defaults.general_defaults import GeneralDefaults
 11 | from allms.defaults.vertex_ai import GeminiModelDefaults
 12 | from allms.domain.configuration import VertexAIConfiguration
 13 | from allms.domain.input_data import InputData
 14 | from allms.models.abstract import AbstractModel
 15 | from allms.models.vertexai_base import CustomVertexAI
 16 | from allms.utils.logger_utils import setup_logger
 17 | 
 18 | 
 19 | logger = logging.getLogger(__name__)
 20 | setup_logger()
 21 | 
 22 | BASE_GEMINI_MODEL_NAMES = [
 23 |     "gemini-1.0-pro",
 24 |     "gemini-1.5-pro",
 25 |     "gemini-1.5-flash",
 26 |     # TODO: add `gemini-2.0-flash` when available
 27 | ]
 28 | 
 29 | 
 30 | class VertexAIGeminiModel(AbstractModel):
 31 |     def __init__(
 32 |             self,
 33 |             config: VertexAIConfiguration,
 34 |             temperature: float = GeminiModelDefaults.TEMPERATURE,
 35 |             top_k: int = GeminiModelDefaults.TOP_K,
 36 |             top_p: float = GeminiModelDefaults.TOP_P,
 37 |             max_output_tokens: int = GeminiModelDefaults.MAX_OUTPUT_TOKENS,
 38 |             model_total_max_tokens: int = GeminiModelDefaults.MODEL_TOTAL_MAX_TOKENS,
 39 |             max_concurrency: int = GeneralDefaults.MAX_CONCURRENCY,
 40 |             max_retries: int = GeneralDefaults.MAX_RETRIES,
 41 |             verbose: bool = GeminiModelDefaults.VERBOSE,
 42 |             event_loop: Optional[AbstractEventLoop] = None
 43 |     ) -> None:
 44 |         self._top_p = top_p
 45 |         self._top_k = top_k
 46 |         self._verbose = verbose
 47 |         self._config = config
 48 | 
 49 |         self._gcp_tokenizer = self._get_gcp_tokenizer(self._config.gemini_model_name)
 50 | 
 51 |         super().__init__(
 52 |             temperature=temperature,
 53 |             model_total_max_tokens=model_total_max_tokens,
 54 |             max_output_tokens=max_output_tokens,
 55 |             max_concurrency=max_concurrency,
 56 |             max_retries=max_retries,
 57 |             event_loop=event_loop
 58 |         )
 59 | 
 60 |     def _create_llm(self) -> CustomVertexAI:
 61 |         llm = CustomVertexAI(
 62 |             model_name=self._config.gemini_model_name,
 63 |             max_output_tokens=self._max_output_tokens,
 64 |             temperature=self._temperature,
 65 |             top_p=self._top_p,
 66 |             top_k=self._top_k,
 67 |             safety_settings=self._config.gemini_safety_settings,
 68 |             verbose=self._verbose,
 69 |             project=self._config.cloud_project,
 70 |             location=self._config.cloud_location,
 71 |             api_endpoint=self._config.api_endpoint,
 72 |             api_transport=self._config.api_transport,
 73 |             credentials=self._config.credentials,
 74 |         )
 75 |         # NOTE: this param is for some reason not passed, see: langchain_google_vertexai.llms.VertexAI.validate_environment
 76 |         # `endpoint_version` is not passed to the `ChatVertexAI` constructor
 77 |         # but in _VertexAIBase, grandparent of VertexAI (VertexAI ->  _VertexAICommon -> _VertexAIBase)
 78 |         # it's set v1beta1 by default
 79 |         if self._config.endpoint_version:
 80 |             llm.client.endpoint_version = self._config.endpoint_version
 81 |         # NOTE: `ChatVertexAI` is child of _VertexAICommon and grandchild of _VertexAIBase, the same as `VertexAI`,
 82 |         # so they use the same validation in langchain_google_vertexai._base._VertexAIBase.validate_params_base.
 83 |         # In `validate_params_base` the `default_metadata` is set to `additional_headers`.
 84 |         # And in constructor of `ChatVertexAI` `additional_headers` is not passed.
 85 |         # So `default_metadata` is always set to default value.
 86 |         if self._config.extra_headers:
 87 |             llm.client.default_metadata = self._config.extra_headers
 88 |         return llm
 89 | 
 90 |     def _get_prompt_tokens_number(self, prompt: ChatPromptTemplate, input_data: InputData) -> int:
 91 |         return self._gcp_tokenizer.count_tokens(
 92 |             prompt.format_prompt(**input_data.input_mappings).to_string()
 93 |         ).total_tokens
 94 | 
 95 |     def _get_model_response_tokens_number(self, model_response: typing.Optional[str]) -> int:
 96 |         if model_response:
 97 |             return self._gcp_tokenizer.count_tokens(model_response).total_tokens
 98 |         return 0
 99 | 
100 | 
101 |     @staticmethod
102 |     def _get_gcp_tokenizer(model_name) -> Tokenizer:
103 |         try:
104 |             return tokenization.get_tokenizer_for_model(model_name)
105 |         except ValueError:
106 |             for base_model_name in BASE_GEMINI_MODEL_NAMES:
107 |                 if model_name.startswith(base_model_name):
108 |                     return tokenization.get_tokenizer_for_model(base_model_name)
109 |                 else:
110 |                     # Currently supported models for token listing and counting
111 |                     # https://cloud.google.com/vertex-ai/generative-ai/docs/multimodal/list-token#supported-models
112 |                     # `gemini-2.0` family of models is not supported yet, hence we need this workaround
113 |                     logger.info(
114 |                         f"Model %s is not supported for tokenization, using default tokenizer:"
115 |                         f" {GeminiModelDefaults.GCP_MODEL_NAME}",
116 |                         model_name
117 |                     )
118 |                     return tokenization.get_tokenizer_for_model(GeminiModelDefaults.GCP_MODEL_NAME)
119 |             raise
120 | 
121 | 


--------------------------------------------------------------------------------
/tests/resources/test_input_data.csv:
--------------------------------------------------------------------------------
1 | text,id
2 | "<p><b>Indywidualna racja żywnościowa wojskowa S-R-9  (set nr 9)</b></p><p><b>Skład zestawu :</b></p><ul><li>Makaron po bolońsku  (300g)</li><li>Konserwa tyrolska  (100g)</li><li>Suchary (90 g)</li><li>Koncentrat napoju herbacianego instant o smaku owoców leśnych   (15g)</li><li>Dżem malinowy  (25 g)</li><li>Baton zbożowo- owocowy o smaku figowym (35 g)</li><li>Guma do żucia (2 szt)</li><li>Cukierek z ekstraktem z kawy naturalnej (1 szt)</li><li>Cukierek z witaminą C (1 szt)</li><li>Sól  (1 g)</li><li>Pieprz (0,2 g)</li><li>Serwetka (1 szt)</li><li>Chusteczka nawilżona  (1 szt)</li><li>Papier toaletowy ( 150 cm)</li><li>Torebka strunowa (1 szt)</li><li>Łyżka jednorazowa (1 szt)</li><li>Rurka do napojów ( 1 szt)</li><li>Bezpłomieniowy podgrzewacz chemiczny (1 szt)</li></ul><p><b>Racja żywnościowa</b> to często jedyny sposób, aby przetrwać trudne chwile i doczekać momentu, w którym nadejdzie pomoc. Z takiego rozwiązania korzystają wojskowi, ale także survivalowcy. Nic też nie stoi na przeszkodzie, aby racje żywnościowe zabrać ze sobą za każdym razem, kiedy wybierasz się w długą podróż.</p> <h1>WAŻNOŚĆ ZESTAWU TO MINIMUM 30.11.2023r.</h1><p><b>Posiadam również inne numery racji !</b></p><p><b>Zapraszam do zapoznania się z aukcjami .</b></p><p><b>Prosimy nie sugerować się zdjęciami . Zdjęcia są tylko poglądowe .</b></p>",12181
3 | "<p>Witam,</p><p>Przedmiotem mojej oferty jest:</p><p><b>Indywidualna racja żywnościowa żołnierza Wojska Polskiego S-R-6.</b></p><p><b>Skład racji żywnościowej:</b></p><ul><li>bigos z kiełbasą (300 g)</li><li>gulasz angielski(100 g)</li><li>suchary (90 g)</li><li>dżem jagodowy(25 g)</li><li>koncentrat napoju herbacianego instant o smaku cytrynowym (15 g)</li><li>baton zbożowo- owocowy o smaku wiśniowym(1 szt )</li><li>guma do żucia (2 szt)</li><li>cukierek zawierający ekstrakt kawy naturalnej (1 szt)</li><li>cukierek zawierający witaminę C (1 szt)</li><li>sól, pieprz</li><li>serwetka papierowa (1 szt)</li><li>serwetka nawilżona  (1 szt)</li><li>papier toaletowy ( 1 szt)</li><li>woreczek strunowy (1 szt)</li><li>łyżka jednorazowa (1 szt)</li><li>rurka do napojów ( 1 szt)</li><li>bezpłomieniowy podgrzewacz chemiczny (1 szt)</li></ul><p>Racje żywnościowe wydawane są żołnierzom, gdy nie można zapewnić im ciepłego posiłku z kuchni polowej, na przykład na szkoleniach poligonowych. Nie oznacza to jednak, że żołnierz będzie jadł zimny posiłek. W każdej racji znajduje się podgrzewacz chemiczny, który w łatwy sposób w krótkim czasie pozwala podgrzać posiłek bez użycia ognia. Racje są często stosowane również przez harcerzy, turystów(szczególnie pieszych), himalaistów, wędkarzy, żeglarzy i grotołazów.</p><p><b>(Zdjęcia poglądowe, proszę sugerować się menu i datą podanymi w opisie)</b></p><p><b>Ważność zestawu minimum 30.11.2023 r.</b></p><p><b>W mojej subiektywnej ocenie termin przydatności w przypadku tego rodzaju pożywienia nie jest wskaźnikiem przydatności. Regulacje prawne nie pozwalają na wydłużone terminy przydatności dla żywności, a najczęściej podanym terminem przydatności jest produkt o najkrótszym terminie ( np. dżem, powidła etc.)</b></p><p><b>Zapraszam na inne moje oferty - posiadam inne numery!!!!</b></p>",12181
4 | "<h1>Kross Level 8.0 szary pp 2023. Koła 29 cali, rama 20"".</h1><p>Rama i widelec</p><ul><li><b>WYKOŃCZENIE LAKIERU:</b>POŁYSK</li><li><b>MATERIAŁ RAMY:</b>CARBON NO.2</li><li><b>WIDELEC:</b>ROCK SHOX RECON SILVER RL</li><li><b>SKOK WIDELCA:</b>100MM</li><li><b>TYLNY AMORTYZATOR:</b>BRAK</li><li><b>SKOK TYLNEGO AMORTYZATORA:</b>BRAK</li></ul><p>Napęd</p><ul><li><b>PRZERZUTKA PRZÓD:</b>BRAK</li><li><b>PRZERZUTKA TYŁ:</b>SHIMANO DEORE M6100</li><li><b>MANETKI:</b>SHIMANO DEORE M6100</li><li><b>KORBA:</b>PROWHEEL MPX-CR094S-TT-B</li><li><b>KORONKI:</b>34T/170MM</li><li><b>KASETA / WOLNOBIEG:</b>MTB-CS-HR-1050AHS</li><li><b>ZAKRES KASETY/WOLNOBIEGU:</b>10-50T</li><li><b>ILOŚĆ PRZEŁOŻEŃ:</b>12</li><li><b>SUPORT:</b>SHIMANO BSA BB52</li><li><b>ŁAŃCUCH:</b>SHIMANO M6100</li></ul><p>Koła</p><ul><li><b>PIASTA PRZÓD:</b>GL-B93F-B/CL-X15 32H</li><li><b>PIASTA TYŁ:</b>GL-B93R-SB/CL-X12 MS 32H</li><li><b>OBRĘCZE:</b>KROSS</li><li><b>OPONY:</b>SCHWALBE RAPID ROB 29X2.25</li></ul><p>Hamulce</p><ul><li><b>HAMULEC PRZÓD:</b>SHIMANO MT200</li><li><b>HAMULEC TYŁ:</b>SHIMANO MT200</li><li><b>DŹWIGNIE HAMULCA:</b>SHIMANO MT200</li><li><b>TARCZE HAMULCOWE:</b>SHIMANO (160)</li><li><b>TARCZE HAMULCOWE TYŁ:</b>SHIMANO (160)</li></ul><p>Komponenty</p><ul><li><b>KIEROWNICA:</b>ALUMINIUM 720 MM 31.8</li><li><b>WSPORNIK KIEROWNICY:</b>ALUMINIUM 31,8/7° (S - 60 MM, M, L - 70 MM, XL - 80 MM)</li><li><b>SIODŁO:</b>SELLE ROYAL</li><li><b>WSPORNIK SIODŁA:</b>ALUMINIUM 27,2X400</li><li><b>STERY:</b>FSA ORBIT C-40-ACB NO.42</li><li><b>CHWYTY:</b>KROSS SCALE</li></ul><p>System e-bike</p><ul><li><b>SILNIK:</b>BRAK</li><li><b>BATERIA:</b>BRAK</li><li><b>WYŚWIETLACZ:</b>BRAK</li></ul><p>Informacje dodatkowe</p><ul><li><b>WAGA [KG]:</b>S - 12,67 KG, M - 12,7 KG, L - 12,75 KG, XL - BD</li></ul>",16483
5 | "<p>Witam,</p><p>Przedmiotem mojej oferty jest:</p><p><b>Indywidualna racja żywnościowa żołnierza Armii USA MRE nr 6</b></p><p><b>Oryginalna racja żywnościowa US Army MRE (Meal, Ready-to-Eat) przeznaczona dla jednej osoby. Całość zamknięta została w szczelnym opakowaniu dzięki czemu posiada długi okres przydatności do spożycia. Dodatkowo każdy z produktów wewnątrz posiada osobne opakowanie. Przyrządzanie racji odbywa się za pomocą bezpłomieniowego podgrzewacza chemicznego, dzięki czemu możemy zjeść ciepły posiłek bez konieczności rozpalania ogniska. Każde Menu posiada inne danie główne.</b></p><p><b>Skład racji żywnościowej:</b></p><p><b>Menu 6</b></p><p>Beef Taco</p><p>Santa Fe Style Rice and Beans</p><p>Cheddar Cheese Spread</p><p>Tortillas</p><p>Nut and Fruit Mix w/ M&amp;Ms</p><p>Orange Beverage Base, Sugar-free</p><p>Coffee</p><p>Creamer</p><p>Sugar</p><p>Matches</p><p>Chewing Gum, Sugar Free</p><p>Towelette</p><p>Salt</p><p>Toilet Tissue</p><p><b>(Zdjęcia poglądowe, proszę sugerować się menu i datą podanymi w opisie)</b></p><p><b>Ważność zestawu minimum 12.2023 r.</b></p><p><b>W mojej subiektywnej ocenie termin przydatności w przypadku tego rodzaju pożywienia nie jest wskaźnikiem przydatności. Regulacje prawne nie pozwalają na wydłużone terminy przydatności dla żywności, a najczęściej podanym terminem przydatności jest produkt o najkrótszym terminie ( np. dżem, powidła etc.)</b></p><p><b>Skład zestawu może się delikatnie różnić w zależności od partii produkcji.</b></p><p><b>Zapraszam na inne moje oferty - posiadam inne numery!!!!</b></p>",19504
6 | 


--------------------------------------------------------------------------------
/tests/test_output_parser.py:
--------------------------------------------------------------------------------
  1 | import json
  2 | from unittest.mock import patch
  3 | 
  4 | import pytest
  5 | 
  6 | from allms.domain.input_data import InputData
  7 | from allms.domain.prompt_dto import SummaryOutputClass, KeywordsOutputClass
  8 | 
  9 | 
 10 | class TestOutputModelParserForDifferentModelOutputs:
 11 |     @patch("langchain.chains.base.Chain.arun")
 12 |     @patch("langchain_google_vertexai.llms.VertexAI.get_num_tokens")
 13 |     def test_output_parser_returns_desired_format(self, tokens_mock, chain_run_mock, models):
 14 |         # GIVEN
 15 |         text_output = "This is the model output"
 16 |         expected_model_response = json.dumps({"summary": text_output})
 17 |         chain_run_mock.return_value = expected_model_response
 18 |         tokens_mock.return_value = 1
 19 | 
 20 |         input_data = [InputData(input_mappings={"text": "Some dummy text"}, id="1")]
 21 |         prompt = "Some Dummy Prompt {text}"
 22 | 
 23 |         # WHEN & THEN
 24 |         for model in models.values():
 25 |             model_response = model.generate(prompt, input_data, SummaryOutputClass)
 26 |             assert type(model_response[0].response) == SummaryOutputClass
 27 |             assert model_response[0].response.summary == text_output
 28 | 
 29 |     @patch("langchain.chains.base.Chain.arun")
 30 |     @patch("langchain_google_vertexai.llms.VertexAI.get_num_tokens")
 31 |     def test_output_parser_returns_error_when_model_output_returns_different_field(self, tokens_mock, chain_run_mock, models):
 32 |         # GIVEN
 33 |         text_output = "This is the model output"
 34 |         expected_model_response = json.dumps({"other_key": text_output})
 35 |         chain_run_mock.return_value = expected_model_response
 36 |         tokens_mock.return_value = 1
 37 | 
 38 |         input_data = [InputData(input_mappings={"text": "Some dummy text"}, id="1")]
 39 |         prompt = "Some Dummy Prompt {text}"
 40 | 
 41 |         # WHEN & THEN
 42 |         for model in models.values():
 43 |             model_response = model.generate(prompt, input_data, SummaryOutputClass)
 44 |             assert "OutputParserException" in model_response[0].error
 45 |             assert model_response[0].response is None
 46 | 
 47 |     @patch("langchain.chains.base.Chain.arun")
 48 |     @patch("langchain_google_vertexai.llms.VertexAI.get_num_tokens")
 49 |     @pytest.mark.parametrize("json_response", [
 50 |         ("{\"summary\": \"This is the model output\"}"),
 51 |         ("Sure! Here's the JSON you wanted: {\"summary\": \"This is the model output\"} Have a nice day!"),
 52 |         ("<<SYS>>\\n{\\n    \"summary\": \"This is the model output\"\\n}\\n<</SYS>>"),
 53 |         ("{\\\"summary\\\": \\\"This is the model output\\\"}\\n}")
 54 |     ])
 55 |     def test_output_parser_extracts_json_from_response(self, tokens_mock, chain_run_mock, models, json_response):
 56 |         # GIVEN
 57 |         chain_run_mock.return_value = json_response
 58 |         tokens_mock.return_value = 1
 59 | 
 60 |         input_data = [InputData(input_mappings={"text": "Some dummy text"}, id="1")]
 61 |         prompt = "Some Dummy Prompt {text}"
 62 | 
 63 |         # WHEN & THEN
 64 |         for model in models.values():
 65 |             model_response = model.generate(prompt, input_data, SummaryOutputClass)
 66 |             assert model_response[0].response == SummaryOutputClass(summary="This is the model output")
 67 | 
 68 |     @patch("langchain.chains.base.Chain.arun")
 69 |     @patch("langchain_google_vertexai.llms.VertexAI.get_num_tokens")
 70 |     def test_output_parser_returns_error_when_json_is_garbled(self, tokens_mock, chain_run_mock, models):
 71 |         # GIVEN
 72 |         chain_run_mock.return_value = "Sure! Here's the JSON you wanted: {\"summary: \"text\"}"
 73 |         tokens_mock.return_value = 1
 74 | 
 75 |         input_data = [InputData(input_mappings={"text": "Some dummy text"}, id="1")]
 76 |         prompt = "Some Dummy Prompt {text}"
 77 | 
 78 |         # WHEN & THEN
 79 |         for model in models.values():
 80 |             model_response = model.generate(prompt, input_data, SummaryOutputClass)
 81 |             assert "OutputParserException" in model_response[0].error
 82 |             assert model_response[0].response is None
 83 | 
 84 |     @patch("langchain.chains.base.Chain.arun")
 85 |     @patch("langchain_google_vertexai.llms.VertexAI.get_num_tokens")
 86 |     def test_output_parser_returns_parsed_class_when_model_output_returns_too_many_fields(self, tokens_mock, chain_run_mock, models):
 87 |         # GIVEN
 88 |         text_output = "This is the model output"
 89 |         expected_model_response = json.dumps({"other_key": text_output, "summary": text_output})
 90 |         chain_run_mock.return_value = expected_model_response
 91 |         tokens_mock.return_value = 1
 92 | 
 93 |         input_data = [InputData(input_mappings={"text": "Some dummy text"}, id="1")]
 94 |         prompt = "Some Dummy Prompt {text}"
 95 | 
 96 |         # WHEN & THEN
 97 |         for model in models.values():
 98 |             model_response = model.generate(prompt, input_data, SummaryOutputClass)
 99 |             assert type(model_response[0].response) == SummaryOutputClass
100 |             assert model_response[0].response.summary == text_output
101 | 
102 |     @patch("langchain.chains.base.Chain.arun")
103 |     @patch("langchain_google_vertexai.llms.VertexAI.get_num_tokens")
104 |     def test_model_returns_output_as_python_list_correctly(self, tokens_mock, chain_run_mock, models):
105 |         # GIVEN
106 |         text_output = ["1", "2", "3"]
107 |         expected_model_response = json.dumps({"text": text_output, "keywords": text_output})
108 |         chain_run_mock.return_value = expected_model_response
109 |         tokens_mock.return_value = 1
110 | 
111 |         input_data = [InputData(input_mappings={"text": "Some dummy text"}, id="1")]
112 |         prompt = "Some Dummy Prompt {text}"
113 | 
114 |         # WHEN & THEN
115 |         for model in models.values():
116 |             model_response = model.generate(prompt, input_data, KeywordsOutputClass)
117 |             assert type(model_response[0].response) == KeywordsOutputClass
118 |             assert model_response[0].response.keywords == list(map(str, text_output))
119 | 
120 |     @patch("langchain.chains.base.Chain.arun")
121 |     @patch("langchain_google_vertexai.llms.VertexAI.get_num_tokens")
122 |     def test_model_output_when_input_data_is_empty(self, tokens_mock, chain_run_mock, models):
123 |         # GIVEN
124 |         expected_model_response = "2+2 is 4"
125 |         chain_run_mock.return_value = expected_model_response
126 |         tokens_mock.return_value = 1
127 | 
128 |         prompt = "2+2 is..."
129 | 
130 |         # WHEN & THEN
131 |         for model in models.values():
132 |             model_response = model.generate(prompt, None, KeywordsOutputClass)
133 |             assert model_response[0].response is None
134 |             assert "OutputParserException" in model_response[0].error


--------------------------------------------------------------------------------
/tests/resources/test_end_to_end_expected_output.csv:
--------------------------------------------------------------------------------
 1 | text,response,id,number_of_prompt_tokens,number_of_generated_tokens
 2 | "<p><b>Indywidualna racja żywnościowa wojskowa S-R-9  (set nr 9)</b></p><p><b>Skład zestawu :</b></p><ul><li>Makaron po bolońsku  (300g)</li><li>Konserwa tyrolska  (100g)</li><li>Suchary (90 g)</li><li>Koncentrat napoju herbacianego instant o smaku owoców leśnych   (15g)</li><li>Dżem malinowy  (25 g)</li><li>Baton zbożowo- owocowy o smaku figowym (35 g)</li><li>Guma do żucia (2 szt)</li><li>Cukierek z ekstraktem z kawy naturalnej (1 szt)</li><li>Cukierek z witaminą C (1 szt)</li><li>Sól  (1 g)</li><li>Pieprz (0,2 g)</li><li>Serwetka (1 szt)</li><li>Chusteczka nawilżona  (1 szt)</li><li>Papier toaletowy ( 150 cm)</li><li>Torebka strunowa (1 szt)</li><li>Łyżka jednorazowa (1 szt)</li><li>Rurka do napojów ( 1 szt)</li><li>Bezpłomieniowy podgrzewacz chemiczny (1 szt)</li></ul><p><b>Racja żywnościowa</b> to często jedyny sposób, aby przetrwać trudne chwile i doczekać momentu, w którym nadejdzie pomoc. Z takiego rozwiązania korzystają wojskowi, ale także survivalowcy. Nic też nie stoi na przeszkodzie, aby racje żywnościowe zabrać ze sobą za każdym razem, kiedy wybierasz się w długą podróż.</p> <h1>WAŻNOŚĆ ZESTAWU TO MINIMUM 30.11.2023r.</h1><p><b>Posiadam również inne numery racji !</b></p><p><b>Zapraszam do zapoznania się z aukcjami .</b></p><p><b>Prosimy nie sugerować się zdjęciami . Zdjęcia są tylko poglądowe .</b></p>","['Indywidualna racja żywnościowa',
 3 |   'wojskowa',
 4 |   'S-R-9',
 5 |   'set nr 9',
 6 |   'Makaron po bolońsku',
 7 |   'Konserwa tyrolska',
 8 |   'Suchary',
 9 |   'Koncentrat napoju herbacianego instant o smaku owoców leśnych',
10 |   'Dżem malinowy',
11 |   'Baton zbożowo-owocowy o smaku figowym']",12181,772,105
12 | "<p>Witam,</p><p>Przedmiotem mojej oferty jest:</p><p><b>Indywidualna racja żywnościowa żołnierza Wojska Polskiego S-R-6.</b></p><p><b>Skład racji żywnościowej:</b></p><ul><li>bigos z kiełbasą (300 g)</li><li>gulasz angielski(100 g)</li><li>suchary (90 g)</li><li>dżem jagodowy(25 g)</li><li>koncentrat napoju herbacianego instant o smaku cytrynowym (15 g)</li><li>baton zbożowo- owocowy o smaku wiśniowym(1 szt )</li><li>guma do żucia (2 szt)</li><li>cukierek zawierający ekstrakt kawy naturalnej (1 szt)</li><li>cukierek zawierający witaminę C (1 szt)</li><li>sól, pieprz</li><li>serwetka papierowa (1 szt)</li><li>serwetka nawilżona  (1 szt)</li><li>papier toaletowy ( 1 szt)</li><li>woreczek strunowy (1 szt)</li><li>łyżka jednorazowa (1 szt)</li><li>rurka do napojów ( 1 szt)</li><li>bezpłomieniowy podgrzewacz chemiczny (1 szt)</li></ul><p>Racje żywnościowe wydawane są żołnierzom, gdy nie można zapewnić im ciepłego posiłku z kuchni polowej, na przykład na szkoleniach poligonowych. Nie oznacza to jednak, że żołnierz będzie jadł zimny posiłek. W każdej racji znajduje się podgrzewacz chemiczny, który w łatwy sposób w krótkim czasie pozwala podgrzać posiłek bez użycia ognia. Racje są często stosowane również przez harcerzy, turystów(szczególnie pieszych), himalaistów, wędkarzy, żeglarzy i grotołazów.</p><p><b>(Zdjęcia poglądowe, proszę sugerować się menu i datą podanymi w opisie)</b></p><p><b>Ważność zestawu minimum 30.11.2023 r.</b></p><p><b>W mojej subiektywnej ocenie termin przydatności w przypadku tego rodzaju pożywienia nie jest wskaźnikiem przydatności. Regulacje prawne nie pozwalają na wydłużone terminy przydatności dla żywności, a najczęściej podanym terminem przydatności jest produkt o najkrótszym terminie ( np. dżem, powidła etc.)</b></p><p><b>Zapraszam na inne moje oferty - posiadam inne numery!!!!</b></p>","['Indywidualna racja żywnościowa',
13 |   'wojskowa',
14 |   'S-R-9',
15 |   'set nr 9',
16 |   'Makaron po bolońsku',
17 |   'Konserwa tyrolska',
18 |   'Suchary',
19 |   'Koncentrat napoju herbacianego instant o smaku owoców leśnych',
20 |   'Dżem malinowy',
21 |   'Baton zbożowo-owocowy o smaku figowym']",12181,942,105
22 | "<h1>Kross Level 8.0 szary pp 2023. Koła 29 cali, rama 20"".</h1><p>Rama i widelec</p><ul><li><b>WYKOŃCZENIE LAKIERU:</b>POŁYSK</li><li><b>MATERIAŁ RAMY:</b>CARBON NO.2</li><li><b>WIDELEC:</b>ROCK SHOX RECON SILVER RL</li><li><b>SKOK WIDELCA:</b>100MM</li><li><b>TYLNY AMORTYZATOR:</b>BRAK</li><li><b>SKOK TYLNEGO AMORTYZATORA:</b>BRAK</li></ul><p>Napęd</p><ul><li><b>PRZERZUTKA PRZÓD:</b>BRAK</li><li><b>PRZERZUTKA TYŁ:</b>SHIMANO DEORE M6100</li><li><b>MANETKI:</b>SHIMANO DEORE M6100</li><li><b>KORBA:</b>PROWHEEL MPX-CR094S-TT-B</li><li><b>KORONKI:</b>34T/170MM</li><li><b>KASETA / WOLNOBIEG:</b>MTB-CS-HR-1050AHS</li><li><b>ZAKRES KASETY/WOLNOBIEGU:</b>10-50T</li><li><b>ILOŚĆ PRZEŁOŻEŃ:</b>12</li><li><b>SUPORT:</b>SHIMANO BSA BB52</li><li><b>ŁAŃCUCH:</b>SHIMANO M6100</li></ul><p>Koła</p><ul><li><b>PIASTA PRZÓD:</b>GL-B93F-B/CL-X15 32H</li><li><b>PIASTA TYŁ:</b>GL-B93R-SB/CL-X12 MS 32H</li><li><b>OBRĘCZE:</b>KROSS</li><li><b>OPONY:</b>SCHWALBE RAPID ROB 29X2.25</li></ul><p>Hamulce</p><ul><li><b>HAMULEC PRZÓD:</b>SHIMANO MT200</li><li><b>HAMULEC TYŁ:</b>SHIMANO MT200</li><li><b>DŹWIGNIE HAMULCA:</b>SHIMANO MT200</li><li><b>TARCZE HAMULCOWE:</b>SHIMANO (160)</li><li><b>TARCZE HAMULCOWE TYŁ:</b>SHIMANO (160)</li></ul><p>Komponenty</p><ul><li><b>KIEROWNICA:</b>ALUMINIUM 720 MM 31.8</li><li><b>WSPORNIK KIEROWNICY:</b>ALUMINIUM 31,8/7° (S - 60 MM, M, L - 70 MM, XL - 80 MM)</li><li><b>SIODŁO:</b>SELLE ROYAL</li><li><b>WSPORNIK SIODŁA:</b>ALUMINIUM 27,2X400</li><li><b>STERY:</b>FSA ORBIT C-40-ACB NO.42</li><li><b>CHWYTY:</b>KROSS SCALE</li></ul><p>System e-bike</p><ul><li><b>SILNIK:</b>BRAK</li><li><b>BATERIA:</b>BRAK</li><li><b>WYŚWIETLACZ:</b>BRAK</li></ul><p>Informacje dodatkowe</p><ul><li><b>WAGA [KG]:</b>S - 12,67 KG, M - 12,7 KG, L - 12,75 KG, XL - BD</li></ul>","['Indywidualna racja żywnościowa',
23 |   'wojskowa',
24 |   'S-R-9',
25 |   'set nr 9',
26 |   'Makaron po bolońsku',
27 |   'Konserwa tyrolska',
28 |   'Suchary',
29 |   'Koncentrat napoju herbacianego instant o smaku owoców leśnych',
30 |   'Dżem malinowy',
31 |   'Baton zbożowo-owocowy o smaku figowym']",16483,1152,105
32 | "<p>Witam,</p><p>Przedmiotem mojej oferty jest:</p><p><b>Indywidualna racja żywnościowa żołnierza Armii USA MRE nr 6</b></p><p><b>Oryginalna racja żywnościowa US Army MRE (Meal, Ready-to-Eat) przeznaczona dla jednej osoby. Całość zamknięta została w szczelnym opakowaniu dzięki czemu posiada długi okres przydatności do spożycia. Dodatkowo każdy z produktów wewnątrz posiada osobne opakowanie. Przyrządzanie racji odbywa się za pomocą bezpłomieniowego podgrzewacza chemicznego, dzięki czemu możemy zjeść ciepły posiłek bez konieczności rozpalania ogniska. Każde Menu posiada inne danie główne.</b></p><p><b>Skład racji żywnościowej:</b></p><p><b>Menu 6</b></p><p>Beef Taco</p><p>Santa Fe Style Rice and Beans</p><p>Cheddar Cheese Spread</p><p>Tortillas</p><p>Nut and Fruit Mix w/ M&amp;Ms</p><p>Orange Beverage Base, Sugar-free</p><p>Coffee</p><p>Creamer</p><p>Sugar</p><p>Matches</p><p>Chewing Gum, Sugar Free</p><p>Towelette</p><p>Salt</p><p>Toilet Tissue</p><p><b>(Zdjęcia poglądowe, proszę sugerować się menu i datą podanymi w opisie)</b></p><p><b>Ważność zestawu minimum 12.2023 r.</b></p><p><b>W mojej subiektywnej ocenie termin przydatności w przypadku tego rodzaju pożywienia nie jest wskaźnikiem przydatności. Regulacje prawne nie pozwalają na wydłużone terminy przydatności dla żywności, a najczęściej podanym terminem przydatności jest produkt o najkrótszym terminie ( np. dżem, powidła etc.)</b></p><p><b>Skład zestawu może się delikatnie różnić w zależności od partii produkcji.</b></p><p><b>Zapraszam na inne moje oferty - posiadam inne numery!!!!</b></p>","['Indywidualna racja żywnościowa',
33 |   'wojskowa',
34 |   'S-R-9',
35 |   'set nr 9',
36 |   'Makaron po bolońsku',
37 |   'Konserwa tyrolska',
38 |   'Suchary',
39 |   'Koncentrat napoju herbacianego instant o smaku owoców leśnych',
40 |   'Dżem malinowy',
41 |   'Baton zbożowo-owocowy o smaku figowym']",19504,795,105
42 | 


--------------------------------------------------------------------------------
/tests/test_model_behavior_for_different_input_data.py:
--------------------------------------------------------------------------------
  1 | import logging
  2 | 
  3 | import allms.exceptions.validation_input_data_exceptions as input_validation_messages
  4 | from unittest.mock import patch
  5 | 
  6 | import pytest
  7 | 
  8 | from allms.domain.input_data import InputData
  9 | from allms.domain.response import ResponseData
 10 | import allms.models as llm_models
 11 | 
 12 | 
 13 | class TestModelBehaviorForDifferentInput:
 14 | 
 15 |     @patch("langchain.chains.base.Chain.arun")
 16 |     def test_no_input_variables_provided_in_the_prompt_raise_exception(self, chain_run_mock, models):
 17 |         for model in models.values():
 18 |             chain_run_mock.return_value = "{}"
 19 | 
 20 |             input_data = [InputData(input_mappings={"text": "Some dummy text"}, id="1")]
 21 | 
 22 |             prompt = "Some Dummy Prompt without input variable"
 23 | 
 24 |             with pytest.raises(ValueError, match=input_validation_messages.get_missing_input_data_in_prompt_message(
 25 |                     input_data[0].id)) as expected_value_exception:
 26 |                 model.generate(prompt, input_data)
 27 | 
 28 |     @patch("langchain.chains.base.Chain.arun")
 29 |     def test_no_input_variables_provided_in_the_input_data_raise_exception(self, chain_run_mock, models):
 30 |         for model in models.values():
 31 |             chain_run_mock.return_value = "{}"
 32 | 
 33 |             input_data = [InputData(input_mappings={}, id="1")]
 34 | 
 35 |             prompt = "Some Dummy Prompt without input variable :{text}"
 36 | 
 37 |             with pytest.raises(ValueError, match=input_validation_messages.get_missing_input_data_in_input_data_message(
 38 |                     input_data[0].id)) as expected_value_exception:
 39 |                 model.generate(prompt, input_data)
 40 | 
 41 |     @patch("langchain.chains.base.Chain.arun")
 42 |     def test_different_input_keys_provided_in_input_data_and_prompt(self, chain_run_mock, models):
 43 |         for model in models.values():
 44 |             chain_run_mock.return_value = "{}"
 45 | 
 46 |             input_data = [InputData(input_mappings={"text": "Some dummy text", "text_2": "Another dummy text"}, id="1")]
 47 | 
 48 |             prompt = "Some Dummy Prompt without input variable {text} {text_1}"
 49 | 
 50 |             with pytest.raises(ValueError, match=input_validation_messages.get_different_input_keys_message(
 51 |                     input_data[0].id)) as expected_value_exception:
 52 |                 model.generate(prompt, input_data)
 53 | 
 54 |     @patch("langchain.chains.base.Chain.arun")
 55 |     def test_different_number_of_input_keys_provided_in_input_data_and_prompt(self, chain_run_mock, models):
 56 |         for model in models.values():
 57 |             chain_run_mock.return_value = "{}"
 58 | 
 59 |             input_data = [InputData(input_mappings={"text": "Some dummy text", "text_2": "Another dummy text"}, id="1")]
 60 | 
 61 |             prompt = "Some Dummy Prompt without input variable {text} {text_1} {text_2}"
 62 | 
 63 |             with pytest.raises(ValueError, match=input_validation_messages.get_different_number_of_inputs_message(
 64 |                     input_data[0].id)) as expected_value_exception:
 65 |                 model.generate(prompt, input_data)
 66 | 
 67 |     @patch("langchain.chains.base.Chain.arun")
 68 |     def test_exception_when_input_data_is_missing_and_prompt_contains_input_key(self, chain_run_mock, models):
 69 |         for model in models.values():
 70 |             chain_run_mock.return_value = "{}"
 71 | 
 72 |             prompt = "Some Dummy Prompt without input variable {text} {text_1}"
 73 | 
 74 |             with pytest.raises(
 75 |                     ValueError,
 76 |                     match=input_validation_messages.get_prompt_contains_input_key_when_missing_input_data()
 77 |             ):
 78 |                 model.generate(prompt, None)
 79 | 
 80 |     @patch("langchain.chains.base.Chain.arun")
 81 |     @patch("langchain_google_vertexai.llms.VertexAI.get_num_tokens")
 82 |     def test_exception_when_num_prompt_tokens_larger_than_model_total_max_tokens(self, tokens_mock, chain_run_mock, models):
 83 |         # GIVEN
 84 |         chain_run_mock.return_value = "{}"
 85 | 
 86 |         input_data = [InputData(input_mappings={"text": "Some dummy text", "text_2": "Another dummy text"}, id="1")]
 87 | 
 88 |         prompt = "Some dummy really, really long prompt. " * 10000 + "input variables: {text} {text_2}"
 89 |         tokens_mock.return_value = len(prompt.split())
 90 | 
 91 |         # WHEN & THEN
 92 |         for model in models.values():
 93 |             response = model.generate(prompt, input_data)[0]
 94 | 
 95 |             assert isinstance(response, ResponseData)
 96 |             assert response.response is None
 97 |             assert "Value Error has occurred: Prompt is too long" in response.error
 98 | 
 99 |     @patch("langchain.chains.base.Chain.arun")
100 |     @patch("langchain_google_vertexai.llms.VertexAI.get_num_tokens")
101 |     def test_whether_curly_brackets_are_not_breaking_the_prompt(self, tokens_mock, chain_run_mock, models):
102 |         # GIVEN
103 |         chain_run_mock.return_value = "{}"
104 | 
105 |         input_data = [InputData(input_mappings={"text": "Some dummy text"}, id="1")]
106 | 
107 |         prompt = "Extract parameters from this text: {text} and output them as a JSON: {{name: parameter_name, value: parameter_value}}"
108 |         tokens_mock.return_value = len(prompt.split())
109 | 
110 |         # WHEN & THEN
111 |         for model in models.values():
112 |             response = model.generate(prompt, input_data)[0]
113 | 
114 |             assert isinstance(response, ResponseData)
115 |             assert response.response is not None
116 | 
117 |     @patch("langchain.chains.base.Chain.arun")
118 |     @patch("langchain_google_vertexai.llms.VertexAI.get_num_tokens")
119 |     def test_warning_when_num_prompt_tokens_plus_max_output_tokens_larger_than_model_total_max_tokens(
120 |             self,
121 |             tokens_mock,
122 |             chain_run_mock,
123 |             models,
124 |             caplog
125 |     ):
126 |         # GIVEN
127 |         chain_run_mock.return_value = "{}"
128 | 
129 |         input_data = [InputData(input_mappings={"text": "Some dummy text", "text_2": "Another dummy text"}, id="1")]
130 | 
131 |         prompt = "Some dummy prompt. input variables: {text} {text_2}"
132 |         tokens_mock.return_value = len(prompt.split())
133 | 
134 |         # WHEN & THEN
135 |         for model in models.values():
136 |             model._max_output_tokens = 100000
137 | 
138 |             with caplog.at_level(logging.WARNING):
139 |                 model.generate(prompt, input_data)
140 | 
141 |             log_records = caplog.records
142 |             assert len(log_records) == 1
143 |             assert log_records[0].levelname == "WARNING"
144 |             assert "Number of prompt tokens plus generated tokens may exceed the the max allowed number of tokens of the model." in log_records[0].message
145 | 
146 |             caplog.clear()
147 | 
148 |     @patch("langchain.chains.base.Chain.arun")
149 |     def test_model_raises_exception_when_system_prompt_is_invalid(self, chain_run_mock, models):
150 |         for model in models.values():
151 |             chain_run_mock.return_value = "{}"
152 | 
153 |             prompt = "Some Dummy Prompt without input variable"
154 |             if isinstance(model, llm_models.AzureMistralModel):
155 |                 with pytest.raises(
156 |                         ValueError, match=input_validation_messages.get_system_prompt_is_not_supported_by_model()
157 |                 ) as expected_value_exception:
158 |                     model.generate(prompt, system_prompt="This is a system prompt with {additional} field")
159 |             else:
160 |                 with pytest.raises(
161 |                         ValueError, match=input_validation_messages.get_system_prompt_contains_input_variables()
162 |                 ) as expected_value_exception:
163 |                     model.generate(prompt, system_prompt="This is a system prompt with {additional} field")
164 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # allms
  2 | 
  3 | ___
  4 | ## About
  5 | 
  6 | allms is a versatile and powerful library designed to streamline the process of querying Large Language Models
  7 | (LLMs) 🤖💬
  8 | 
  9 | Developed by the Allegro engineers, allms is based on popular libraries like transformers, pydantic, and langchain. It takes care 
 10 | of the boring boiler-plate code you write around your LLM applications, quickly enabling you to prototype ideas, and eventually helping you to scale up 
 11 | for production use-cases!
 12 | 
 13 | Among the allms most notable features, you will find:
 14 | 
 15 | * **😊 Simple and User-Friendly Interface**: The module offers an intuitive and easy-to-use interface, making it straightforward to work with the model.
 16 | 
 17 | * **🔀 Asynchronous Querying**: Requests to the model are processed asynchronously by default, ensuring efficient and non-blocking interactions.
 18 | 
 19 | * **🔄 Automatic Retrying Mechanism** : The module includes an automatic retrying mechanism, which helps handle transient errors and ensures that queries to the model are robust.
 20 | 
 21 | * **🛠️ Error Handling and Management**: Errors that may occur during interactions with the model are handled and managed gracefully, providing informative error messages and potential recovery options.
 22 | 
 23 | * **⚙️ Output Parsing**: The module simplifies the process of defining the model's output format as well as parsing and working with it, allowing you to easily extract the information you need.
 24 | 
 25 | ___
 26 | 
 27 | ## Supported Models
 28 | 
 29 | | LLM Family  | Hosting             | Supported LLMs                                                                    |
 30 | | :---------- | :------------------ | :---------------------------------------------------------------------------------- |
 31 | | GPT(s)      | OpenAI endpoint     | `gpt-3.5-turbo`, `gpt-4`, `gpt-4-turbo`, `gpt4-o`, `gpt4-o mini`                   |
 32 | | Google LLMs | VertexAI deployment | `text-bison@001`, [Gemini family](https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#gemini-models) |
 33 | | Llama2      | Azure deployment    | `llama2-7b`, `llama2-13b`, `llama2-70b`                                          |
 34 | | Mistral     | Azure deployment    | `Mistral-7b`, `Mixtral-7bx8`                                                      |
 35 | | Gemma       | GCP deployment      | `gemma`                                                                           |
 36 | 
 37 | * Do you already have a subscription to a Cloud Provider for any the models above? Configure
 38 | the model using your credentials and start querying!
 39 | * Are you interested in knowing how to self-deploy open-source models in Azure and GCP?
 40 | Consult our [guide](https://allms.allegro.tech/usage/deploy_open_source_models/)
 41 | 
 42 | ___
 43 | 
 44 | ## Documentation
 45 | 
 46 | Full documentation available at **[allms.allegro.tech](https://allms.allegro.tech/)**
 47 | 
 48 | Get familiar with allms 🚀: [introductory jupyter notebook](https://github.com/allegro/allms/blob/main/examples/introduction.ipynb)
 49 | 
 50 | ___
 51 | 
 52 | ## Quickstart
 53 | 
 54 | ### Installation 🚧
 55 | 
 56 | Install the package via pip:
 57 | 
 58 | ```
 59 | pip install allms
 60 | ```
 61 | 
 62 | ### Basic Usage ⭐
 63 | 
 64 | Configure endpoint credentials and start querying the model with any prompt:
 65 | 
 66 | ```python
 67 | from allms.models import AzureOpenAIModel
 68 | from allms.domain.configuration import AzureOpenAIConfiguration
 69 | 
 70 | configuration = AzureOpenAIConfiguration(
 71 |    api_key="your-secret-api-key",
 72 |    base_url="https://endpoint.openai.azure.com/",
 73 |    api_version="2023-03-15-preview",
 74 |    deployment="gpt-35-turbo",
 75 |    model_name="gpt-3.5-turbo"
 76 | )
 77 | 
 78 | gpt_model = AzureOpenAIModel(config=configuration)
 79 | gpt_response = gpt_model.generate(prompt="Plan me a 3-day holiday trip to Italy")
 80 | ```
 81 | 
 82 | You can pass also a system prompt:
 83 | 
 84 | ```python
 85 | gpt_response = gpt_model.generate(
 86 |     system_prompt="You are an AI assistant acting as a trip planner",
 87 |     prompt="Plan me a 3-day holiday trip to Italy"
 88 | )
 89 | ```
 90 | 
 91 | ### Advanced Usage 🔥
 92 | 
 93 | ### Batch Querying and Symbolic Variables
 94 | 
 95 | If you want to generate responses for a batch of examples, you can achieve this by preparing a prompt with symbolic
 96 | variables and providing input data that will be injected into the prompt. Symbolic variables can be more than one.
 97 | 
 98 | ```python
 99 | positive_review_0 = "Very good coffee, lightly roasted, with good aroma and taste. The taste of sourness is barely noticeable (which is good because I don't like sour coffees). After grinding, the aroma spreads throughout the room. I recommend it to all those who do not like strongly roasted and pitch-black coffees. A very good solution is to close the package with string, which allows you to preserve the aroma and freshness."
100 | positive_review_1 = "Delicious coffee!! Delicate, just the way I like it, and the smell after opening is amazing. It smells freshly roasted. Faithful to Lavazza coffee for years, I decided to look for other flavors. Based on the reviews, I blindly bought it and it was a 10-shot, it outperformed Lavazze in taste. For me the best."
101 | negative_review = "Marketing is doing its job and I was tempted too, but this coffee is nothing above the level of coffees from the supermarket. And the method of brewing or grinding does not help here. The coffee is simply weak - both in terms of strength and taste. I do not recommend."
102 | 
103 | prompt = "You'll be provided with a review of a coffe. Decide if the review is positive or negative. Review: {review}"
104 | input_data = [
105 |     InputData(input_mappings={"review": positive_review_0}, id="0"),
106 |     InputData(input_mappings={"review": positive_review_1}, id="1"),
107 |     InputData(input_mappings={"review": negative_review}, id="2")
108 | ]
109 | 
110 | responses = model.generate(prompt=prompt, input_data=input_data)
111 | 
112 | # >>> {f"review_id={response.input_data.id}": response.response for response in responses}
113 | # {
114 | #     'review_id=0': 'The review is positive.',
115 | #     'review_id=1': 'The review is positive.',
116 | #     'review_id=2': 'The review is negative.'
117 | # }
118 | ```
119 | 
120 | ### Forcing Structured Output Format
121 | 
122 | Through pydantic integration, in allms you can pass an output dataclass and force the LLM to provide
123 | you the response in a structured way.
124 | 
125 | ```python
126 | from pydantic import BaseModel, Field
127 | 
128 | class ReviewOutputDataModel(BaseModel):
129 |     summary: str = Field(description="Summary of a product description")
130 |     should_buy: bool = Field(description="Recommendation whether I should buy the product or not")
131 |     brand_name: str = Field(description="Brand of the coffee")
132 |     aroma:str = Field(description="Description of the coffee aroma")
133 |     cons: list[str] = Field(description="List of cons of the coffee")
134 | 
135 | 
136 | review = "Marketing is doing its job and I was tempted too, but this Blue Orca coffee is nothing above the level of coffees from the supermarket. And the method of brewing or grinding does not help here. The coffee is simply weak - both in terms of strength and taste. I do not recommend."
137 |     
138 | prompt = "Summarize review of the coffee. Review: {review}"
139 | input_data = [InputData(input_mappings={"review": review}, id="0")]
140 |     
141 | responses = model.generate(
142 |     prompt=prompt, 
143 |     input_data=input_data,
144 |     output_data_model_class=ReviewOutputDataModel
145 | )
146 | response = responses[0].response
147 | 
148 | # >>> type(response)
149 | # ReviewOutputDataModel
150 | # 
151 | # >>> response.should_buy
152 | # False
153 | # 
154 | # >>> response.brand_name
155 | # "Blue Orca"
156 | # 
157 | # >>> response.aroma
158 | # "Not mentioned in the review"
159 | # 
160 | # >>> response.cons
161 | # ['Weak in terms of strength', 'Weak in terms of taste']
162 | ```
163 | ___
164 | 
165 | ## Local Development 🛠️
166 | 
167 | ### Installation from the source
168 | 
169 | We assume that you have python `3.10.*` installed on your machine. 
170 | You can set it up using [pyenv](https://github.com/pyenv/pyenv#installationbrew) 
171 | ([How to install pyenv on MacOS](https://jordanthomasg.medium.com/python-development-on-macos-with-pyenv-2509c694a808)). To install allms env locally:
172 | 
173 | * Activate your venv;
174 | * Install Poetry via:
175 | 
176 | ```bash
177 | make install-poetry
178 | ```
179 | 
180 | * Install allms dependencies with the command:
181 | 
182 | ```bash
183 | make install-env
184 | ```
185 | 
186 | 
187 | ### Tests
188 | 
189 | In order to execute tests, run:
190 | 
191 | ```bash
192 | make tests
193 | ```
194 | 
195 | ### Updating the documentation
196 | 
197 | Run `mkdocs serve` to serve a local instance of the documentation.
198 | 
199 | Modify the content of `docs` directory to update the documentation. The updated content will be deployed
200 | via the github action `.github/workflows/docs.yml`
201 | 
202 | ### Make a new release
203 | 
204 | When a new version of allms is ready to be released, do the following operations:
205 | 
206 | 1. **Merge to master** the dev branch in which the new version has been specified:
207 |     1. In this branch, `version` under `[tool.poetry]` section in `pyproject.toml` should be updated, e.g `0.1.0`;
208 | 
209 | 2. **Tag the new master** with the name of the newest version using command-line:
210 |     1. `git tag -a <NEW-VERSION>`
211 |     2. `git push origin <NEW-VERSION>`
212 | 
213 | 3. **Publish package to PyPI**:
214 |     1. Go to _Actions_ → _Manual Publish To PyPI_;
215 |     2. Select "master" as branch and click _Run workflow_;
216 |     3. If successful, you will find the package under # TODO: open-source.
217 | 
218 | 4. **Make a GitHub release**:
219 |     1. Go to _Releases_ → _Draft a new release_;
220 |     2. Select the recently created tag in _Choose a tag_ window;
221 |     3. Copy/paste all the content present in the CHANGELOG under the version you are about to release;
222 |     4. Upload `allms-<NEW-VERSION>.whl` and `allms-<NEW-VERSION>.tar.gz` as assets;
223 |     5. Click `Publish release`.


--------------------------------------------------------------------------------
/tests/test_end_to_end.py:
--------------------------------------------------------------------------------
  1 | import re
  2 | import logging
  3 | 
  4 | import pytest
  5 | import httpx
  6 | import respx
  7 | from httpx import Response
  8 | from langchain.prompts import ChatPromptTemplate, HumanMessagePromptTemplate, PromptTemplate, \
  9 |     SystemMessagePromptTemplate
 10 | 
 11 | from allms.constants.input_data import IODataConstants
 12 | from allms.defaults.vertex_ai import GeminiModelDefaults
 13 | from allms.domain.configuration import VertexAIConfiguration
 14 | from allms.domain.prompt_dto import KeywordsOutputClass
 15 | from allms.models import VertexAIGeminiModel, HarmBlockThreshold, HarmCategory
 16 | from allms.utils import io_utils
 17 | from tests.conftest import AzureOpenAIEnv
 18 | 
 19 | 
 20 | class TestEndToEnd:
 21 | 
 22 |     def test_model_is_queried_successfully(
 23 |             self,
 24 |             models
 25 |     ):
 26 |         # GIVEN
 27 |         with respx.mock:
 28 |             respx.post(
 29 |                 url=re.compile(f"^{AzureOpenAIEnv.OPENAI_API_BASE}.*$")).mock(
 30 |                 return_value=Response(status_code=200, json={
 31 |                     "choices": [{
 32 |                         "message": {
 33 |                             "content": "{\"keywords\": [\"Indywidualna racja żywnościowa\", \"wojskowa\", \"S-R-9\", \"set nr 9\", \"Makaron po bolońsku\", \"Konserwa tyrolska\", \"Suchary\", \"Koncentrat napoju herbacianego instant o smaku owoców leśnych\", \"Dżem malinowy\", \"Baton zbożowo-owocowy o smaku figowym\"]}",
 34 |                             "role": ""
 35 |                         }
 36 |                     }],
 37 |                     "usage": {}
 38 |                 },
 39 |                                       )
 40 |             )
 41 | 
 42 |             input_data = io_utils.load_csv_to_input_data(
 43 |                 limit=5,
 44 |                 path="./tests/resources/test_input_data.csv"
 45 |             )
 46 |             prompt_template_text = """Extract at most 10 keywords that could be used as features in a search index from this Polish product description.
 47 | 
 48 |         {text}
 49 |         """
 50 | 
 51 |             # WHEN
 52 |             parsed_responses = models["azure_open_ai"].generate(
 53 |                 prompt=prompt_template_text,
 54 |                 input_data=input_data,
 55 |                 output_data_model_class=KeywordsOutputClass,
 56 |                 system_prompt="This is a system prompt."
 57 |             )
 58 |             parsed_responses = sorted(parsed_responses, key=lambda key: key.input_data.id)
 59 | 
 60 |             # THEN
 61 |             expected_output = io_utils.load_csv("./tests/resources/test_end_to_end_expected_output.csv")
 62 |             expected_output = sorted(expected_output, key=lambda example: example[IODataConstants.ID])
 63 |             for idx in range(len(expected_output)):
 64 |                 expected_output[idx]["response"] = eval(expected_output[idx]["response"])
 65 | 
 66 |             assert list(map(lambda output: output[IODataConstants.ID], expected_output)) == list(
 67 |                 map(lambda example: example.input_data.id, parsed_responses))
 68 | 
 69 |             assert list(map(lambda output: output[IODataConstants.TEXT], expected_output)) == list(
 70 |                 map(lambda example: example.input_data.input_mappings["text"], parsed_responses))
 71 | 
 72 |             assert list(map(lambda output: output[IODataConstants.RESPONSE_STR_NAME], expected_output)) == list(
 73 |                 map(lambda example: example.response.keywords, parsed_responses))
 74 | 
 75 |             assert list(map(lambda output: int(output[IODataConstants.PROMPT_TOKENS_NUMBER]), expected_output)) == list(
 76 |                 map(lambda example: example.number_of_prompt_tokens, parsed_responses))
 77 | 
 78 |             assert list(
 79 |                 map(lambda output: int(output[IODataConstants.GENERATED_TOKENS_NUMBER]), expected_output)) == list(
 80 |                 map(lambda example: example.number_of_generated_tokens, parsed_responses))
 81 | 
 82 |     def test_prompt_is_not_modified_for_open_source_models(self, models, mocker):
 83 |         # GIVEN
 84 |         open_source_models = ["azure_llama2", "azure_mistral", "vertex_gemma"]
 85 | 
 86 |         with respx.mock:
 87 |             respx.post(
 88 |                 url=re.compile(f"^https:\/\/dummy-endpoint.*$")).mock(
 89 |                 return_value=Response(status_code=200, json={
 90 |                     "choices": [{
 91 |                         "message": {
 92 |                             "content": "{\"keywords\": [\"Indywidualna racja żywnościowa\", \"wojskowa\", \"S-R-9\", \"set nr 9\", \"Makaron po bolońsku\", \"Konserwa tyrolska\", \"Suchary\", \"Koncentrat napoju herbacianego instant o smaku owoców leśnych\", \"Dżem malinowy\", \"Baton zbożowo-owocowy o smaku figowym\"]}",
 93 |                             "role": ""
 94 |                         }
 95 |                     }],
 96 |                     "usage": {}
 97 |                 },
 98 |                                       ))
 99 | 
100 |         input_data = io_utils.load_csv_to_input_data(
101 |             limit=5,
102 |             path="./tests/resources/test_input_data.csv"
103 |         )
104 |         prompt_template_text = """Extract at most 10 keywords that could be used as features in a search index from this Polish product description.
105 | 
106 |         {text}
107 |         """
108 |         prompt_template_spy = mocker.spy(ChatPromptTemplate, "from_messages")
109 | 
110 |         # WHEN & THEN
111 |         for model_name, model in models.items():
112 |             model.generate(
113 |                 prompt=prompt_template_text,
114 |                 input_data=input_data,
115 |                 output_data_model_class=KeywordsOutputClass,
116 |                 system_prompt=None if model_name == "azure_mistral" else "This is a system prompt."
117 |             )
118 | 
119 |             if model_name in open_source_models:
120 |                 messages = [
121 |                     HumanMessagePromptTemplate(
122 |                         prompt=PromptTemplate(
123 |                             input_variables=["text"],
124 |                             template=prompt_template_text
125 |                         )
126 |                     )
127 |                 ]
128 |                 if model_name != "azure_mistral":
129 |                     messages = [
130 |                                    SystemMessagePromptTemplate(
131 |                                        prompt=PromptTemplate(
132 |                                            input_variables=[],
133 |                                            template="This is a system prompt."
134 |                                        )
135 |                                    )
136 |                                ] + messages
137 |                 prompt_template_spy.assert_called_with(messages)
138 |             else:
139 |                 prompt_template_spy.assert_called_with([
140 |                     SystemMessagePromptTemplate(
141 |                         prompt=PromptTemplate(
142 |                             input_variables=[],
143 |                             template="This is a system prompt."
144 |                         )
145 |                     ),
146 |                     HumanMessagePromptTemplate(
147 |                         prompt=PromptTemplate(
148 |                             input_variables=["text"],
149 |                             partial_variables={
150 |                                 'output_data_model': 'The output should be formatted as a JSON instance that conforms to the JSON schema below.\n\nAs an example, for the schema {"properties": {"foo": {"title": "Foo", "description": "a list of strings", "type": "array", "items": {"type": "string"}}}, "required": ["foo"]}\nthe object {"foo": ["bar", "baz"]} is a well-formatted instance of the schema. The object {"properties": {"foo": ["bar", "baz"]}} is not well-formatted.\n\nHere is the output schema:\n```\n{"properties": {"keywords": {"description": "List of keywords", "items": {"type": "string"}, "title": "Keywords", "type": "array"}}, "required": ["keywords"]}\n```'
151 |                             },
152 |                             template=f"{prompt_template_text}\n\n{{output_data_model}}"
153 |                         )
154 |                     )
155 |                 ])
156 | 
157 |     def test_gemini_version_is_passed_to_model(self):
158 |         # GIVEN
159 |         model_config = VertexAIConfiguration(
160 |             cloud_project="dummy-project-id",
161 |             cloud_location="us-central1",
162 |             gemini_model_name="gemini-1.0-pro-001"
163 |         )
164 | 
165 |         # WHEN
166 |         gemini_model = VertexAIGeminiModel(config=model_config)
167 | 
168 |         # THEN
169 |         assert gemini_model._llm.model_name == "gemini-1.0-pro-001"
170 | 
171 |     def test_model_times_out(
172 |             self,
173 |             models
174 |     ):
175 |         # GIVEN
176 |         with respx.mock:
177 |             respx.post(re.compile(f"^https:\/\/dummy-endpoint.*$")).mock(
178 |                 side_effect=httpx.TimeoutException("Request timed out")
179 |             )
180 | 
181 |             # WHEN
182 |             responses = models["azure_open_ai"].generate("Some prompt")
183 | 
184 |             # THEN
185 |             assert responses[0].response is None
186 |             assert "Request timed out" in responses[0].error
187 |     def test_gemini_specific_args_are_passed_to_model(self):
188 |         gemini_model_name = "gemini-1.5-pro-001"
189 |         gemini_safety_settings = {
190 |             HarmCategory.HARM_CATEGORY_UNSPECIFIED: HarmBlockThreshold.BLOCK_NONE,
191 |             HarmCategory.HARM_CATEGORY_DANGEROUS_CONTENT: HarmBlockThreshold.BLOCK_MEDIUM_AND_ABOVE,
192 |             HarmCategory.HARM_CATEGORY_HATE_SPEECH: HarmBlockThreshold.BLOCK_ONLY_HIGH,
193 |             HarmCategory.HARM_CATEGORY_HARASSMENT: HarmBlockThreshold.BLOCK_LOW_AND_ABOVE,
194 |             HarmCategory.HARM_CATEGORY_SEXUALLY_EXPLICIT: HarmBlockThreshold.BLOCK_NONE,
195 |         }
196 |         model_config = VertexAIConfiguration(
197 |             cloud_project="dummy-project-id",
198 |             cloud_location="us-central1",
199 |             gemini_model_name=gemini_model_name,
200 |             gemini_safety_settings=gemini_safety_settings
201 |         )
202 |         # WHEN
203 |         gemini_model = VertexAIGeminiModel(config=model_config)
204 | 
205 |         # THEN
206 |         assert gemini_model._llm.model_name == gemini_model_name
207 |         assert gemini_model._llm.safety_settings == gemini_safety_settings
208 | 
209 |     @pytest.mark.parametrize(
210 |         "model_name", [
211 |             "gemini-1.0-pro", "gemini-1.5-pro", "gemini-1.5-flash","gemini-1.0-pro-001", "gemini-1.0-pro-002",
212 |             "gemini-1.5-pro-001", "gemini-1.5-flash-001", "gemini-1.5-pro-preview-0514"
213 |         ]
214 |     )
215 |     def test_correct_gemini_model_name_work(self, model_name):
216 |         # GIVEN
217 |         model_config = VertexAIConfiguration(
218 |             cloud_project="dummy-project-id",
219 |             cloud_location="us-central1",
220 |             gemini_model_name=model_name,
221 |         )
222 | 
223 |         # WHEN & THEN
224 |         VertexAIGeminiModel(config=model_config)
225 | 
226 |     @pytest.mark.parametrize(
227 |         "model_name", [
228 |             "gemini-2.0-flash-lite", "gemini-2.0-flash", "ggemini-2.5-pro-exp-03-25","gemini-x"
229 |         ]
230 |     )
231 |     def test_default_tokenizer_fallback(self, caplog, model_name):
232 |         # GIVEN
233 |         model_config = VertexAIConfiguration(
234 |             cloud_project="dummy-project-id",
235 |             cloud_location="us-central1",
236 |             gemini_model_name=model_name,
237 |         )
238 | 
239 |         # WHEN
240 |         with caplog.at_level(logging.INFO):
241 |             VertexAIGeminiModel(config=model_config)
242 | 
243 |         # THEN
244 |         assert (
245 |                 f"Model {model_name} is not supported for tokenization, using default tokenizer:"
246 |                 f" {GeminiModelDefaults.GCP_MODEL_NAME}"
247 |                 in caplog.text
248 |         )
249 | 
250 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
  1 |                                  Apache License
  2 |                            Version 2.0, January 2004
  3 |                         http://www.apache.org/licenses/
  4 | 
  5 |    TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
  6 | 
  7 |    1. Definitions.
  8 | 
  9 |       "License" shall mean the terms and conditions for use, reproduction,
 10 |       and distribution as defined by Sections 1 through 9 of this document.
 11 | 
 12 |       "Licensor" shall mean the copyright owner or entity authorized by
 13 |       the copyright owner that is granting the License.
 14 | 
 15 |       "Legal Entity" shall mean the union of the acting entity and all
 16 |       other entities that control, are controlled by, or are under common
 17 |       control with that entity. For the purposes of this definition,
 18 |       "control" means (i) the power, direct or indirect, to cause the
 19 |       direction or management of such entity, whether by contract or
 20 |       otherwise, or (ii) ownership of fifty percent (50%) or more of the
 21 |       outstanding shares, or (iii) beneficial ownership of such entity.
 22 | 
 23 |       "You" (or "Your") shall mean an individual or Legal Entity
 24 |       exercising permissions granted by this License.
 25 | 
 26 |       "Source" form shall mean the preferred form for making modifications,
 27 |       including but not limited to software source code, documentation
 28 |       source, and configuration files.
 29 | 
 30 |       "Object" form shall mean any form resulting from mechanical
 31 |       transformation or translation of a Source form, including but
 32 |       not limited to compiled object code, generated documentation,
 33 |       and conversions to other media types.
 34 | 
 35 |       "Work" shall mean the work of authorship, whether in Source or
 36 |       Object form, made available under the License, as indicated by a
 37 |       copyright notice that is included in or attached to the work
 38 |       (an example is provided in the Appendix below).
 39 | 
 40 |       "Derivative Works" shall mean any work, whether in Source or Object
 41 |       form, that is based on (or derived from) the Work and for which the
 42 |       editorial revisions, annotations, elaborations, or other modifications
 43 |       represent, as a whole, an original work of authorship. For the purposes
 44 |       of this License, Derivative Works shall not include works that remain
 45 |       separable from, or merely link (or bind by name) to the interfaces of,
 46 |       the Work and Derivative Works thereof.
 47 | 
 48 |       "Contribution" shall mean any work of authorship, including
 49 |       the original version of the Work and any modifications or additions
 50 |       to that Work or Derivative Works thereof, that is intentionally
 51 |       submitted to Licensor for inclusion in the Work by the copyright owner
 52 |       or by an individual or Legal Entity authorized to submit on behalf of
 53 |       the copyright owner. For the purposes of this definition, "submitted"
 54 |       means any form of electronic, verbal, or written communication sent
 55 |       to the Licensor or its representatives, including but not limited to
 56 |       communication on electronic mailing lists, source code control systems,
 57 |       and issue tracking systems that are managed by, or on behalf of, the
 58 |       Licensor for the purpose of discussing and improving the Work, but
 59 |       excluding communication that is conspicuously marked or otherwise
 60 |       designated in writing by the copyright owner as "Not a Contribution."
 61 | 
 62 |       "Contributor" shall mean Licensor and any individual or Legal Entity
 63 |       on behalf of whom a Contribution has been received by Licensor and
 64 |       subsequently incorporated within the Work.
 65 | 
 66 |    2. Grant of Copyright License. Subject to the terms and conditions of
 67 |       this License, each Contributor hereby grants to You a perpetual,
 68 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 69 |       copyright license to reproduce, prepare Derivative Works of,
 70 |       publicly display, publicly perform, sublicense, and distribute the
 71 |       Work and such Derivative Works in Source or Object form.
 72 | 
 73 |    3. Grant of Patent License. Subject to the terms and conditions of
 74 |       this License, each Contributor hereby grants to You a perpetual,
 75 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 76 |       (except as stated in this section) patent license to make, have made,
 77 |       use, offer to sell, sell, import, and otherwise transfer the Work,
 78 |       where such license applies only to those patent claims licensable
 79 |       by such Contributor that are necessarily infringed by their
 80 |       Contribution(s) alone or by combination of their Contribution(s)
 81 |       with the Work to which such Contribution(s) was submitted. If You
 82 |       institute patent litigation against any entity (including a
 83 |       cross-claim or counterclaim in a lawsuit) alleging that the Work
 84 |       or a Contribution incorporated within the Work constitutes direct
 85 |       or contributory patent infringement, then any patent licenses
 86 |       granted to You under this License for that Work shall terminate
 87 |       as of the date such litigation is filed.
 88 | 
 89 |    4. Redistribution. You may reproduce and distribute copies of the
 90 |       Work or Derivative Works thereof in any medium, with or without
 91 |       modifications, and in Source or Object form, provided that You
 92 |       meet the following conditions:
 93 | 
 94 |       (a) You must give any other recipients of the Work or
 95 |           Derivative Works a copy of this License; and
 96 | 
 97 |       (b) You must cause any modified files to carry prominent notices
 98 |           stating that You changed the files; and
 99 | 
100 |       (c) You must retain, in the Source form of any Derivative Works
101 |           that You distribute, all copyright, patent, trademark, and
102 |           attribution notices from the Source form of the Work,
103 |           excluding those notices that do not pertain to any part of
104 |           the Derivative Works; and
105 | 
106 |       (d) If the Work includes a "NOTICE" text file as part of its
107 |           distribution, then any Derivative Works that You distribute must
108 |           include a readable copy of the attribution notices contained
109 |           within such NOTICE file, excluding those notices that do not
110 |           pertain to any part of the Derivative Works, in at least one
111 |           of the following places: within a NOTICE text file distributed
112 |           as part of the Derivative Works; within the Source form or
113 |           documentation, if provided along with the Derivative Works; or,
114 |           within a display generated by the Derivative Works, if and
115 |           wherever such third-party notices normally appear. The contents
116 |           of the NOTICE file are for informational purposes only and
117 |           do not modify the License. You may add Your own attribution
118 |           notices within Derivative Works that You distribute, alongside
119 |           or as an addendum to the NOTICE text from the Work, provided
120 |           that such additional attribution notices cannot be construed
121 |           as modifying the License.
122 | 
123 |       You may add Your own copyright statement to Your modifications and
124 |       may provide additional or different license terms and conditions
125 |       for use, reproduction, or distribution of Your modifications, or
126 |       for any such Derivative Works as a whole, provided Your use,
127 |       reproduction, and distribution of the Work otherwise complies with
128 |       the conditions stated in this License.
129 | 
130 |    5. Submission of Contributions. Unless You explicitly state otherwise,
131 |       any Contribution intentionally submitted for inclusion in the Work
132 |       by You to the Licensor shall be under the terms and conditions of
133 |       this License, without any additional terms or conditions.
134 |       Notwithstanding the above, nothing herein shall supersede or modify
135 |       the terms of any separate license agreement you may have executed
136 |       with Licensor regarding such Contributions.
137 | 
138 |    6. Trademarks. This License does not grant permission to use the trade
139 |       names, trademarks, service marks, or product names of the Licensor,
140 |       except as required for reasonable and customary use in describing the
141 |       origin of the Work and reproducing the content of the NOTICE file.
142 | 
143 |    7. Disclaimer of Warranty. Unless required by applicable law or
144 |       agreed to in writing, Licensor provides the Work (and each
145 |       Contributor provides its Contributions) on an "AS IS" BASIS,
146 |       WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 |       implied, including, without limitation, any warranties or conditions
148 |       of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 |       PARTICULAR PURPOSE. You are solely responsible for determining the
150 |       appropriateness of using or redistributing the Work and assume any
151 |       risks associated with Your exercise of permissions under this License.
152 | 
153 |    8. Limitation of Liability. In no event and under no legal theory,
154 |       whether in tort (including negligence), contract, or otherwise,
155 |       unless required by applicable law (such as deliberate and grossly
156 |       negligent acts) or agreed to in writing, shall any Contributor be
157 |       liable to You for damages, including any direct, indirect, special,
158 |       incidental, or consequential damages of any character arising as a
159 |       result of this License or out of the use or inability to use the
160 |       Work (including but not limited to damages for loss of goodwill,
161 |       work stoppage, computer failure or malfunction, or any and all
162 |       other commercial damages or losses), even if such Contributor
163 |       has been advised of the possibility of such damages.
164 | 
165 |    9. Accepting Warranty or Additional Liability. While redistributing
166 |       the Work or Derivative Works thereof, You may choose to offer,
167 |       and charge a fee for, acceptance of support, warranty, indemnity,
168 |       or other liability obligations and/or rights consistent with this
169 |       License. However, in accepting such obligations, You may act only
170 |       on Your own behalf and on Your sole responsibility, not on behalf
171 |       of any other Contributor, and only if You agree to indemnify,
172 |       defend, and hold each Contributor harmless for any liability
173 |       incurred by, or claims asserted against, such Contributor by reason
174 |       of your accepting any such warranty or additional liability.
175 | 
176 |    END OF TERMS AND CONDITIONS
177 | 
178 |    APPENDIX: How to apply the Apache License to your work.
179 | 
180 |       To apply the Apache License to your work, attach the following
181 |       boilerplate notice, with the fields enclosed by brackets "[]"
182 |       replaced with your own identifying information. (Don't include
183 |       the brackets!)  The text should be enclosed in the appropriate
184 |       comment syntax for the file format. We also recommend that a
185 |       file or class name and description of purpose be included on the
186 |       same "printed page" as the copyright notice for easier
187 |       identification within third-party archives.
188 | 
189 |    Copyright [yyyy] [name of copyright owner]
190 | 
191 |    Licensed under the Apache License, Version 2.0 (the "License");
192 |    you may not use this file except in compliance with the License.
193 |    You may obtain a copy of the License at
194 | 
195 |        http://www.apache.org/licenses/LICENSE-2.0
196 | 
197 |    Unless required by applicable law or agreed to in writing, software
198 |    distributed under the License is distributed on an "AS IS" BASIS,
199 |    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200 |    See the License for the specific language governing permissions and
201 |    limitations under the License.
202 | 


--------------------------------------------------------------------------------
/allms/models/abstract.py:
--------------------------------------------------------------------------------
  1 | import asyncio
  2 | import logging
  3 | import re
  4 | import typing
  5 | import urllib
  6 | from abc import ABC, abstractmethod
  7 | from functools import partial
  8 | from urllib.error import URLError
  9 | 
 10 | import google
 11 | import openai
 12 | from google.api_core.exceptions import InvalidArgument
 13 | from langchain.chains import LLMChain
 14 | from langchain.chat_models.base import BaseChatModel
 15 | from langchain.output_parsers import PydanticOutputParser
 16 | from langchain.prompts import ChatPromptTemplate
 17 | from langchain_core.language_models.llms import create_base_retry_decorator
 18 | from langchain_core.prompts import HumanMessagePromptTemplate, SystemMessagePromptTemplate
 19 | from langchain_core.prompts.prompt import PromptTemplate
 20 | from pydantic import BaseModel
 21 | 
 22 | import allms.exceptions.validation_input_data_exceptions as input_exception_message
 23 | import allms.models as models
 24 | from allms.chains.long_text_processing_chain import (
 25 |     LongTextProcessingChain,
 26 |     load_long_text_processing_chain
 27 | )
 28 | from allms.constants.input_data import IODataConstants
 29 | from allms.constants.prompt import PromptConstants
 30 | from allms.defaults.general_defaults import GeneralDefaults
 31 | from allms.defaults.long_text_chain import LongTextChainDefaults
 32 | from allms.domain.enumerables import AggregationLogicForLongInputData, LanguageModelTask
 33 | from allms.domain.input_data import InputData
 34 | from allms.domain.prompt_dto import SummaryOutputClass, KeywordsOutputClass
 35 | from allms.domain.response import ResponseData
 36 | from allms.models.vertexai_base import GCPInvalidRequestError
 37 | from allms.utils.long_text_processing_utils import get_max_allowed_number_of_tokens
 38 | from allms.utils.response_parsing_utils import ResponseParser
 39 | 
 40 | logger = logging.getLogger(__name__)
 41 | 
 42 | 
 43 | class AbstractModel(ABC):
 44 |     def __init__(
 45 |             self,
 46 |             temperature: float,
 47 |             max_output_tokens: int,
 48 |             model_total_max_tokens: int,
 49 |             event_loop: typing.Optional[asyncio.AbstractEventLoop] = None,
 50 |             max_concurrency: int = GeneralDefaults.MAX_CONCURRENCY,
 51 |             max_retries: int = GeneralDefaults.MAX_RETRIES
 52 |     ):
 53 |         self._model_total_max_tokens = model_total_max_tokens
 54 |         self._max_output_tokens = max_output_tokens
 55 |         self._temperature = temperature
 56 |         self._semaphore = asyncio.Semaphore(max_concurrency)
 57 | 
 58 |         # TODO: To be changed after implementing support for long sequences
 59 |         self._task = LanguageModelTask.KEYWORDS
 60 |         self._is_long_text_bypass_enabled: bool = False  # Should be false till we fully implement support for long sequences in our package
 61 |         self._aggregation_strategy: AggregationLogicForLongInputData = AggregationLogicForLongInputData.SIMPLE_CONCATENATION
 62 |         self._parser: typing.Optional[PydanticOutputParser] = None
 63 |         self._json_pattern = re.compile(r"{.*?}", re.DOTALL)
 64 |         self._is_json_format_injected_into_prompt: bool = True
 65 | 
 66 |         if max_output_tokens >= model_total_max_tokens:
 67 |             raise ValueError("max_output_tokens has to be lower than model_total_max_tokens")
 68 | 
 69 |         self._llm = self._create_llm()
 70 | 
 71 |         if not event_loop:
 72 |             try:
 73 |                 event_loop = asyncio.get_running_loop()
 74 |             except RuntimeError as error:
 75 |                 event_loop = asyncio.new_event_loop()
 76 |                 asyncio.set_event_loop(event_loop)
 77 |         self._event_loop = event_loop
 78 | 
 79 |         self._predict_example = create_base_retry_decorator(
 80 |             error_types=[
 81 |                 openai.RateLimitError, openai.APIError, openai.Timeout,
 82 |                 openai.APIConnectionError, openai.InternalServerError,
 83 |                 google.api_core.exceptions.ResourceExhausted, urllib.error.HTTPError
 84 |             ],
 85 |             max_retries=max_retries,
 86 |         )(self._predict_example)
 87 | 
 88 |     @abstractmethod
 89 |     def _create_llm(self) -> BaseChatModel:
 90 |         ...
 91 | 
 92 |     def _get_prompt_tokens_number(self, prompt: ChatPromptTemplate, input_data: InputData) -> int:
 93 |         return self._llm.get_num_tokens(prompt.format_prompt(**input_data.input_mappings).to_string())
 94 | 
 95 |     def _get_model_response_tokens_number(self, model_response: typing.Optional[str]) -> int:
 96 |         if model_response:
 97 |             return self._llm.get_num_tokens(model_response)
 98 |         return 0
 99 | 
100 |     def generate(
101 |             self,
102 |             prompt: str,
103 |             input_data: typing.Optional[typing.List[InputData]] = None,
104 |             output_data_model_class: typing.Optional[typing.Type[BaseModel]] = None,
105 |             system_prompt: typing.Optional[str] = None
106 |     ) -> typing.List[ResponseData]:
107 |         model_responses = self._event_loop.run_until_complete(
108 |             self._generate(
109 |                 prompt=prompt,
110 |                 input_data=input_data,
111 |                 output_data_model_class=output_data_model_class,
112 |                 system_prompt=system_prompt
113 |             )
114 |         )
115 | 
116 |         if output_data_model_class:
117 |             return ResponseParser(self._parser).parse_model_output(model_responses)
118 |         return model_responses
119 | 
120 |     async def _generate(
121 |             self,
122 |             prompt: str,
123 |             input_data: typing.Optional[typing.List[InputData]] = None,
124 |             output_data_model_class: typing.Optional[typing.Type[BaseModel]] = None,
125 |             system_prompt: typing.Optional[str] = None
126 |     ) -> typing.List[ResponseData]:
127 |         self._validate_system_prompt(system_prompt=system_prompt)
128 |         self._validate_input(prompt=prompt, input_data=input_data)
129 |         if input_data is None:
130 |             # Prompt without symbolic variables is passed - create input_data accordingly
131 |             input_data = [InputData(input_mappings={}, id=IODataConstants.DEFAULT_ID)]
132 | 
133 |         prompt_template_args = {
134 |             PromptConstants.TEMPLATE_STR: prompt,
135 |             PromptConstants.INPUT_VARIABLES_STR: list(input_data[0].get_input_keys())
136 |         }
137 | 
138 |         if output_data_model_class:
139 |             self._parser = PydanticOutputParser(pydantic_object=output_data_model_class)
140 | 
141 |             if self._is_json_format_injected_into_prompt:
142 |                 prompt_template_args[PromptConstants.PARTIAL_VARIABLES_STR] = {
143 |                     PromptConstants.OUTPUT_DATA_MODEL: self._parser.get_format_instructions(),
144 |                 }
145 |                 prompt_template_args[PromptConstants.TEMPLATE_STR] = self._add_output_data_format(prompt=prompt)
146 | 
147 |         chat_prompts = await self._build_chat_prompts(prompt_template_args, system_prompt)
148 | 
149 |         prompt_template = ChatPromptTemplate.from_messages(chat_prompts)
150 | 
151 |         chain = self._get_chain(prompt_template)
152 |         long_chain = self._get_chain_for_long_text(prompt_template)
153 | 
154 |         predict_example_any_length_partial = partial(
155 |             self._predict_example_of_any_length,
156 |             prompt_template=prompt_template,
157 |             standard_chain=chain,
158 |             long_chain=long_chain
159 |         )
160 | 
161 |         logger.info("Generating responses...")
162 |         results = list(map(lambda data: predict_example_any_length_partial(input_data=data), input_data))
163 | 
164 |         responses = await asyncio.gather(*results)
165 | 
166 |         return responses
167 | 
168 |     async def _build_chat_prompts(
169 |             self,
170 |             prompt_template_args: dict,
171 |             system_prompt: SystemMessagePromptTemplate
172 |     ) -> typing.List[typing.Union[SystemMessagePromptTemplate, HumanMessagePromptTemplate]]:
173 |         human_message = HumanMessagePromptTemplate(prompt=PromptTemplate(**prompt_template_args))
174 |         if not system_prompt:
175 |             return [human_message]
176 |         system_message_template = SystemMessagePromptTemplate.from_template(system_prompt)
177 | 
178 |         return [system_message_template, human_message]
179 | 
180 |     @staticmethod
181 |     def _add_output_data_format(prompt: str) -> str:
182 |         return f"{prompt}{PromptConstants.OUTPUT_DATA_MODEL_CLASS_SEPARATOR}{{{PromptConstants.OUTPUT_DATA_MODEL}}}"
183 | 
184 |     def _validate_input_data_len(
185 |             self,
186 |             input_data: InputData,
187 |             number_of_prompt_tokens: int
188 |     ):
189 |         if number_of_prompt_tokens > self._model_total_max_tokens:
190 |             raise ValueError(
191 |                 f"Prompt is too long. Entire prompt has {number_of_prompt_tokens} tokens, where the max allowed number "
192 |                 f"of tokens of the model is {self._model_total_max_tokens}. This leaves no space for the model to "
193 |                 f"generate a response and will lead to errors. Example id: {input_data.id}"
194 |             )
195 |         elif number_of_prompt_tokens + self._max_output_tokens > self._model_total_max_tokens:
196 |             logger.warning(
197 |                 f"Number of prompt tokens plus generated tokens may exceed the the max allowed number of tokens of the "
198 |                 f"model. Entire prompt has {number_of_prompt_tokens} tokens, the max number of tokens to generate is "
199 |                 f"{self._max_output_tokens} and the max allowed number of tokens of the model is "
200 |                 f"{self._model_total_max_tokens}. Consider lowering the max_output_tokens param or truncating the "
201 |                 f"input, because otherwise it may lead to unexpected errors. Example id: {input_data.id}"
202 |             )
203 | 
204 |     def _predict_example_of_any_length(
205 |             self,
206 |             input_data: InputData,
207 |             prompt_template: ChatPromptTemplate,
208 |             standard_chain: LLMChain,
209 |             long_chain: LLMChain
210 |     ) -> ResponseData:
211 |         number_of_prompt_tokens = self._get_prompt_tokens_number(
212 |             prompt=prompt_template,
213 |             input_data=input_data
214 |         )
215 |         max_token_limit = get_max_allowed_number_of_tokens(self._model_total_max_tokens, self._max_output_tokens)
216 |         is_example_too_long = number_of_prompt_tokens > max_token_limit
217 | 
218 |         predict_example_partial = partial(
219 |             self._predict_example,
220 |             input_data=input_data,
221 |             prompt_tokens_number=number_of_prompt_tokens
222 |         )
223 |         if is_example_too_long and self._is_long_text_bypass_enabled:
224 |             return predict_example_partial(chain=long_chain)
225 |         return predict_example_partial(chain=standard_chain)
226 | 
227 |     async def _predict_example(
228 |             self,
229 |             chain: LLMChain,
230 |             input_data: InputData,
231 |             prompt_tokens_number: int
232 |     ) -> ResponseData:
233 |         error_message: typing.Optional[str] = None
234 |         number_of_input_mappings = len(input_data.input_mappings)
235 | 
236 |         try:
237 |             self._validate_input_data_len(input_data=input_data, number_of_prompt_tokens=prompt_tokens_number)
238 |         except ValueError as value_error:
239 |             logger.info(f"Error for id {input_data.id} has occurred. Message: {value_error} ")
240 |             error_message = f"{IODataConstants.VALUE_ERROR_MESSAGE}: {value_error}"
241 |             return ResponseData(
242 |                 input_data=None if number_of_input_mappings == 0 else input_data,
243 |                 response=None,
244 |                 number_of_prompt_tokens=prompt_tokens_number,
245 |                 number_of_generated_tokens=0,
246 |                 error=error_message
247 |             )
248 | 
249 |         try:
250 |             async with self._semaphore:
251 |                 if number_of_input_mappings == 0:
252 |                     # Workaround when prompt without symbolic variables is passed - arun() can't be called without any arg
253 |                     model_response = chain.run({}) if hasattr(chain.llm, "api_transport") and chain.llm.api_transport == "rest" else await chain.arun({})
254 |                 else:
255 |                     model_response = chain.run(
256 |                         **input_data.input_mappings) if hasattr(chain.llm, "api_transport") and chain.llm.api_transport == "rest" else await chain.arun(
257 |                         **input_data.input_mappings)
258 |         except openai.InternalServerError as invalid_request_error:
259 |             logger.info(f"Error for id {input_data.id} has occurred. Message: {invalid_request_error} ")
260 |             if invalid_request_error.code == "content_filter":
261 |                 model_response = None
262 |                 error_message = f"{IODataConstants.CONTENT_FILTER_MESSAGE}: {invalid_request_error}"
263 |             else:
264 |                 model_response = None
265 |                 error_message = f"{IODataConstants.ERROR_MESSAGE_STR}: {invalid_request_error}"
266 | 
267 |         except (InvalidArgument, ValueError, TimeoutError, openai.APIError, GCPInvalidRequestError,
268 |                 openai.APITimeoutError) as other_error:
269 |             model_response = None
270 |             logger.info(f"Error for id {input_data.id} has occurred. Message: {other_error} ")
271 |             error_message = f"{type(other_error).__name__}: {other_error}"
272 | 
273 |         return ResponseData(
274 |             input_data=None if number_of_input_mappings == 0 else input_data,
275 |             response=model_response,
276 |             number_of_prompt_tokens=prompt_tokens_number,
277 |             number_of_generated_tokens=self._get_model_response_tokens_number(model_response),
278 |             error=error_message
279 |         )
280 | 
281 |     def _get_number_of_tokens_in_prompt(self, prompt: PromptTemplate, input_data: InputData) -> int:
282 |         return self._llm.get_num_tokens(prompt.format_prompt(**input_data.input_mappings).to_string())
283 | 
284 |     def _get_chain(self, prompt: PromptTemplate) -> LLMChain:
285 |         return LLMChain(
286 |             llm=self._llm,
287 |             prompt=prompt,
288 |         )
289 | 
290 |     # TODO: When adding support for long documents, we'll need to rethink how output_data_model will be passed to the
291 |     # TODO: aggregation prompt
292 |     def _get_chain_for_long_text(
293 |             self,
294 |             prompt_template: PromptTemplate,
295 |     ) -> LongTextProcessingChain:
296 |         parser = PydanticOutputParser(
297 |             pydantic_object=SummaryOutputClass if self._task == LanguageModelTask.SUMMARY else KeywordsOutputClass)
298 |         reduce_prompt_template = PromptTemplate(
299 |             template=LongTextChainDefaults.AGGREGATION_PROMPT,
300 |             input_variables=["text"],
301 |             partial_variables={PromptConstants.OUTPUT_DATA_MODEL: parser.get_format_instructions()}
302 |         )
303 | 
304 |         return load_long_text_processing_chain(
305 |             task=self._task,
306 |             llm=self._llm,
307 |             model_total_max_tokens=self._model_total_max_tokens,
308 |             max_output_tokens=self._max_output_tokens,
309 |             map_prompt=prompt_template,
310 |             reduce_prompt=reduce_prompt_template,
311 |             aggregation_strategy=self._aggregation_strategy
312 |         )
313 | 
314 |     def _validate_input(self, prompt: str, input_data: typing.Optional[typing.List[InputData]] = None) -> None:
315 |         # Extracts text inside the {} but escapes the text inside {{}}
316 |         # This behaviour allows to pass JSON-like strings to the prompt
317 |         # reference: https://github.com/langchain-ai/langchain/issues/1660#issuecomment-1469320129
318 |         prompt_input_variables_set = AbstractModel._extract_input_variables_from_prompt(prompt)
319 |         if PromptConstants.OUTPUT_DATA_MODEL in prompt_input_variables_set:
320 |             prompt_input_variables_set.remove(PromptConstants.OUTPUT_DATA_MODEL)
321 | 
322 |         if input_data:
323 |             for data in input_data:
324 |                 self._validate_input_data(prompt_input_variables_set, data)
325 |         elif len(prompt_input_variables_set) > 0:
326 |             raise ValueError(
327 |                 input_exception_message.get_prompt_contains_input_key_when_missing_input_data())
328 | 
329 |     def _validate_system_prompt(self, system_prompt: typing.Optional[str] = None) -> None:
330 |         if isinstance(self, models.AzureMistralModel) and system_prompt is not None:
331 |             raise ValueError(input_exception_message.get_system_prompt_is_not_supported_by_model())
332 |         elif system_prompt:
333 |             prompt_input_variables_set = AbstractModel._extract_input_variables_from_prompt(system_prompt)
334 |             if prompt_input_variables_set:
335 |                 raise ValueError(input_exception_message.get_system_prompt_contains_input_variables())
336 | 
337 |     @staticmethod
338 |     def _extract_input_variables_from_prompt(prompt: str) -> typing.Set[str]:
339 |         input_variables_pattern = r'(?<!\{)\{([^{}]+)\}(?!\})'
340 |         input_variables_set = set(re.findall(input_variables_pattern, prompt))
341 |         return input_variables_set
342 | 
343 |     @staticmethod
344 |     def _validate_input_data(
345 |             prompt_input_variables: typing.Set[str],
346 |             input_data: typing.Optional[InputData] = None
347 |     ) -> None:
348 |         if len(input_data.input_mappings.keys()) > 0 and len(prompt_input_variables) == 0:
349 |             raise ValueError(input_exception_message.get_missing_input_data_in_prompt_message(input_data.id))
350 | 
351 |         if len(input_data.input_mappings.keys()) == 0 and len(prompt_input_variables) > 0:
352 |             raise ValueError(input_exception_message.get_missing_input_data_in_input_data_message(input_data.id))
353 | 
354 |         if len(input_data.input_mappings.keys()) != len(prompt_input_variables):
355 |             raise ValueError(input_exception_message.get_different_number_of_inputs_message(input_data.id))
356 | 
357 |         if not prompt_input_variables == set(input_data.get_input_keys()):
358 |             raise ValueError(input_exception_message.get_different_input_keys_message(input_data.id))
359 | 


--------------------------------------------------------------------------------
/examples/introduction.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "source": [
  6 |     "# Introduction\n",
  7 |     "\n",
  8 |     "Follow this tutorial to get to know the most important features of allms!\n",
  9 |     "\n"
 10 |    ],
 11 |    "metadata": {
 12 |     "collapsed": false
 13 |    },
 14 |    "id": "d6cb6b8c8fdca3cd"
 15 |   },
 16 |   {
 17 |    "cell_type": "markdown",
 18 |    "id": "7bcb1d86-2487-4ca1-9d03-19bd3ad1a097",
 19 |    "metadata": {},
 20 |    "source": [
 21 |     "# Import and utils"
 22 |    ]
 23 |   },
 24 |   {
 25 |    "cell_type": "code",
 26 |    "execution_count": 1,
 27 |    "id": "6e0a9b56-8099-4b2e-a881-01af966ed59d",
 28 |    "metadata": {
 29 |     "ExecuteTime": {
 30 |      "end_time": "2024-01-04T16:03:35.407204Z",
 31 |      "start_time": "2024-01-04T16:03:35.401117Z"
 32 |     }
 33 |    },
 34 |    "outputs": [],
 35 |    "source": [
 36 |     "# This allows to run asynchronous code in a Jupyter notebook\n",
 37 |     "import nest_asyncio\n",
 38 |     "\n",
 39 |     "nest_asyncio.apply()"
 40 |    ]
 41 |   },
 42 |   {
 43 |    "cell_type": "markdown",
 44 |    "id": "3b81480b-06ad-4f7e-9fe6-731baf9c80ef",
 45 |    "metadata": {},
 46 |    "source": [
 47 |     "## Setting up your LLM\n",
 48 |     "\n",
 49 |     "To start working with `allms` you need to import one of the supported models and configure it. Make sure to have access to an Azure OpenAI endpoint and dispose of the needed information. In this tutorial we are going to use a GPT model."
 50 |    ]
 51 |   },
 52 |   {
 53 |    "cell_type": "markdown",
 54 |    "id": "9c0fba84-c906-4c40-9fcb-15f7fefd2b82",
 55 |    "metadata": {},
 56 |    "source": []
 57 |   },
 58 |   {
 59 |    "cell_type": "code",
 60 |    "execution_count": 3,
 61 |    "id": "cecb6d45-52bb-4530-bfd9-99848b40e106",
 62 |    "metadata": {
 63 |     "ExecuteTime": {
 64 |      "end_time": "2024-01-04T16:03:39.700051Z",
 65 |      "start_time": "2024-01-04T16:03:35.414123Z"
 66 |     }
 67 |    },
 68 |    "outputs": [],
 69 |    "source": [
 70 |     "from allms.models import AzureOpenAIModel\n",
 71 |     "from allms.domain.configuration import AzureOpenAIConfiguration\n",
 72 |     "\n",
 73 |     "configuration = AzureOpenAIConfiguration(\n",
 74 |     "    api_key=\"your-secret-api-key\",\n",
 75 |     "    base_url=\"https://endpoint.openai.azure.com/\",\n",
 76 |     "    api_version=\"2023-03-15-preview\",\n",
 77 |     "    deployment=\"gpt-35-turbo\",\n",
 78 |     "    model_name=\"gpt-3.5-turbo\"\n",
 79 |     ")\n",
 80 |     "\n",
 81 |     "model = AzureOpenAIModel(config=configuration)"
 82 |    ]
 83 |   },
 84 |   {
 85 |    "cell_type": "markdown",
 86 |    "id": "d4afb572-c2a8-4e00-95a7-d7f7bdf2dc84",
 87 |    "metadata": {},
 88 |    "source": [
 89 |     "## Basic usage"
 90 |    ]
 91 |   },
 92 |   {
 93 |    "cell_type": "markdown",
 94 |    "id": "4278435d-a259-408c-85dc-329b38e617d5",
 95 |    "metadata": {},
 96 |    "source": [
 97 |     "The model has a `generate()` method that is responsible for running the generations. In the most basic case, you can simply provide it with a prompt and it’ll return generated content. "
 98 |    ]
 99 |   },
100 |   {
101 |    "cell_type": "code",
102 |    "execution_count": 5,
103 |    "id": "c2c243b4-51af-4bfd-a0a0-d9787f4d19e5",
104 |    "metadata": {
105 |     "ExecuteTime": {
106 |      "end_time": "2024-01-04T16:03:47.961341Z",
107 |      "start_time": "2024-01-04T16:03:47.030222Z"
108 |     }
109 |    },
110 |    "outputs": [
111 |     {
112 |      "data": {
113 |       "text/plain": "[ResponseData(response='The capital of Poland is Warsaw.', input_data=None, number_of_prompt_tokens=7, number_of_generated_tokens=7, error=None)]"
114 |      },
115 |      "execution_count": 5,
116 |      "metadata": {},
117 |      "output_type": "execute_result"
118 |     }
119 |    ],
120 |    "source": [
121 |     "model.generate(\"What is the capital of Poland?\")"
122 |    ]
123 |   },
124 |   {
125 |    "cell_type": "markdown",
126 |    "id": "e3997f4f-c26f-4af2-b1b9-3c6f35217aa5",
127 |    "metadata": {},
128 |    "source": [
129 |     "This was an example of the most basic usage. But what if you wanted to run a single prompt multiple times, but with slightly changed data? For example, you have a dataset of reviews and you want to classify each of them as positive or negative. You can use batch mode to do this."
130 |    ]
131 |   },
132 |   {
133 |    "cell_type": "markdown",
134 |    "id": "c6ab1163-bc77-4234-8095-31f0592af3bc",
135 |    "metadata": {},
136 |    "source": [
137 |     "## Batch mode"
138 |    ]
139 |   },
140 |   {
141 |    "cell_type": "markdown",
142 |    "id": "a3d3beb8-0b94-40fd-bca7-2b213362ef50",
143 |    "metadata": {},
144 |    "source": [
145 |     "Let's say you have a dataset with 3 reviews and you want to classify each of them as positive or negative. To do so:\n",
146 |     "- create a `prompt` and inside it use symbolic variable `{review}`, which will later be replaced by actual reviews coming from the dataset.\n",
147 |     "- create `input_data`. `input_data` is simply a list of `InputData`, where each `InputData` is a single example and it's a dataclass with two fields:\n",
148 |     "  - `input_mappings` - a dictionary mapping symbolic variables used in the prompt to the actual review.\n",
149 |     "  - `id` - is needed because requests are made asynchronously, so the output order will not always be the same as the input order.\n",
150 |     "- run the generation by calling the `generate()` method with the `prompt` and `input_data` as arguments. \n",
151 |     "\n",
152 |     "This will automatically run the generation in async mode, so it'll be much faster than a normal, sequential calling. Additionally, it'll automatically retry requests in case of failure. "
153 |    ]
154 |   },
155 |   {
156 |    "cell_type": "code",
157 |    "execution_count": 6,
158 |    "id": "eeb8c703-8203-46fb-b3c2-c0836ad2c349",
159 |    "metadata": {
160 |     "ExecuteTime": {
161 |      "end_time": "2024-01-04T16:03:48.720192Z",
162 |      "start_time": "2024-01-04T16:03:48.234700Z"
163 |     }
164 |    },
165 |    "outputs": [
166 |     {
167 |      "data": {
168 |       "text/plain": "{'review_id=0': 'The review is positive.',\n 'review_id=1': 'The review is positive.',\n 'review_id=2': 'The review is negative.'}"
169 |      },
170 |      "execution_count": 6,
171 |      "metadata": {},
172 |      "output_type": "execute_result"
173 |     }
174 |    ],
175 |    "source": [
176 |     "from allms.domain.input_data import InputData\n",
177 |     "\n",
178 |     "\n",
179 |     "positive_review_0 = \"Very good coffee, lightly roasted, with good aroma and taste. The taste of sourness is barely noticeable (which is good because I don't like sour coffees). After grinding, the aroma spreads throughout the room. I recommend it to all those who do not like strongly roasted and pitch-black coffees. A very good solution is to close the package with string, which allows you to preserve the aroma and freshness.\"\n",
180 |     "positive_review_1 = \"Delicious coffee!! Delicate, just the way I like it, and the smell after opening is amazing. It smells freshly roasted. Faithful to Lavazza coffee for years, I decided to look for other flavors. Based on the reviews, I blindly bought it and it was a 10-shot, it outperformed Lavazze in taste. For me the best.\"\n",
181 |     "negative_review = \"Marketing is doing its job and I was tempted too, but this coffee is nothing above the level of coffees from the supermarket. And the method of brewing or grinding does not help here. The coffee is simply weak - both in terms of strength and taste. I do not recommend.\"\n",
182 |     "\n",
183 |     "prompt = \"You'll be provided with a review of a coffe. Decide if the review is positive or negative. Review: {review}\"\n",
184 |     "input_data = [\n",
185 |     "    InputData(input_mappings={\"review\": positive_review_0}, id=\"0\"),\n",
186 |     "    InputData(input_mappings={\"review\": positive_review_1}, id=\"1\"),\n",
187 |     "    InputData(input_mappings={\"review\": negative_review}, id=\"2\")\n",
188 |     "]\n",
189 |     "\n",
190 |     "responses = model.generate(prompt=prompt, input_data=input_data)\n",
191 |     "\n",
192 |     "{f\"review_id={response.input_data.id}\": response.response for response in responses}"
193 |    ]
194 |   },
195 |   {
196 |    "cell_type": "markdown",
197 |    "id": "52cc821b-1d30-4220-be2e-7d4464c3d605",
198 |    "metadata": {},
199 |    "source": [
200 |     "### Multiple symbolic variables"
201 |    ]
202 |   },
203 |   {
204 |    "cell_type": "markdown",
205 |    "id": "82d20f2a-cbf3-4b15-a4d0-07c7a6cbf771",
206 |    "metadata": {},
207 |    "source": [
208 |     "The example above showed a prompt with only one symbolic variable used in it. But you can use as many of them as you want.\n",
209 |     "\n",
210 |     "Let’s say you have two reviews: one positive and one negative, and you want the model to tell which one of them is positive. To do so:\n",
211 |     "- create a prompt as shown in the cell below. Two symbolic variables are used inside it: `{first_review}` and `{second_review}`.\n",
212 |     "- create `input_data`. It looks similar to the example above - it's a list of `InputData`, but here the `input_mappings` fields have two entries, one per single symbolic variable used in the prompt.\n",
213 |     "- same as above, generation is ran by calling the `generate()` method."
214 |    ]
215 |   },
216 |   {
217 |    "cell_type": "code",
218 |    "execution_count": 7,
219 |    "id": "c4ddfdcd-21c1-43d5-9f88-97868da710cb",
220 |    "metadata": {
221 |     "ExecuteTime": {
222 |      "end_time": "2024-01-04T16:03:50.249349Z",
223 |      "start_time": "2024-01-04T16:03:49.587683Z"
224 |     }
225 |    },
226 |    "outputs": [
227 |     {
228 |      "data": {
229 |       "text/plain": "{'example_id=0': 'The first review is positive.',\n 'example_id=1': 'The second review is positive.'}"
230 |      },
231 |      "execution_count": 7,
232 |      "metadata": {},
233 |      "output_type": "execute_result"
234 |     }
235 |    ],
236 |    "source": [
237 |     "prompt = \"\"\"You'll be provided with two reviews of a coffee. Decide which one is positive.\n",
238 |     "\n",
239 |     "First review: {first_review}\n",
240 |     "Second review: {second_review}\"\"\"\n",
241 |     "input_data = [\n",
242 |     "    InputData(input_mappings={\"first_review\": positive_review_0, \"second_review\": negative_review}, id=\"0\"),\n",
243 |     "    InputData(input_mappings={\"first_review\": negative_review, \"second_review\": positive_review_1}, id=\"1\"),\n",
244 |     "]\n",
245 |     "\n",
246 |     "responses = model.generate(prompt=prompt, input_data=input_data)\n",
247 |     "{f\"example_id={response.input_data.id}\": response.response for response in responses}"
248 |    ]
249 |   },
250 |   {
251 |    "cell_type": "markdown",
252 |    "id": "1a64a6a8-28c8-4ce3-9c7c-bd1ffc56ed24",
253 |    "metadata": {},
254 |    "source": [
255 |     "## Forcing model response format"
256 |    ]
257 |   },
258 |   {
259 |    "cell_type": "markdown",
260 |    "id": "c16211a8-c937-4f9e-8c61-286b1278f004",
261 |    "metadata": {},
262 |    "source": [
263 |     "This is one of the most interesting features of our library. In a production setup, it's often the case that we want the model to return generated content in a format that will later be easy to ingest by the rest of our pipeline - for example, json with some predefined fields. With our library it’s really easy to achieve."
264 |    ]
265 |   },
266 |   {
267 |    "cell_type": "markdown",
268 |    "id": "85da7d66-7b05-4b78-838f-82ab1c8968c6",
269 |    "metadata": {},
270 |    "source": [
271 |     "Let’s say that again you have a review of a coffee, and you want the model to generate information that might be interesting for you, and additionally you want it to return them in the format provided by you. To do so, first you have to create a dataclass that defines the output format and the information you want the model to generate. Each field of this dataclass must have a type defined and also a description provided that describes what given field means. The better the description, the better the model will understand what it should generate for a given field."
272 |    ]
273 |   },
274 |   {
275 |    "cell_type": "code",
276 |    "execution_count": 8,
277 |    "id": "62fcaf9d-b5c5-4e6c-a8b6-077a8bc9288f",
278 |    "metadata": {
279 |     "ExecuteTime": {
280 |      "end_time": "2024-01-04T16:03:52.193625Z",
281 |      "start_time": "2024-01-04T16:03:52.187122Z"
282 |     }
283 |    },
284 |    "outputs": [],
285 |    "source": [
286 |     "import typing\n",
287 |     "    \n",
288 |     "from pydantic import BaseModel, Field\n",
289 |     "    \n",
290 |     "class ReviewOutputDataModel(BaseModel):\n",
291 |     "    summary: str = Field(description=\"Summary of a product description\")\n",
292 |     "    should_buy: bool = Field(description=\"Recommendation whether I should buy the product or not\")\n",
293 |     "    brand_name: str = Field(description=\"Brand of the coffee\")\n",
294 |     "    aroma:str = Field(description=\"Description of the coffee aroma\")\n",
295 |     "    cons: typing.List[str] = Field(description=\"List of cons of the coffee\")"
296 |    ]
297 |   },
298 |   {
299 |    "cell_type": "markdown",
300 |    "id": "19c9635d-93aa-4549-87db-366ef914acb6",
301 |    "metadata": {},
302 |    "source": [
303 |     "The next thing is to create a prompt, which can be pretty simple as shown in the cell below, and the `input_data` for the model. To force the model to generate a response in a given format, you have to call the `generate()` method with `prompt`, `input_data` and with one additional argument called `output_data_model_class`. The `ReviewOutputDataModel` class defined above should be provided to this argument. This automatically tells the model to output predictions in the format defined by this dataclass."
304 |    ]
305 |   },
306 |   {
307 |    "cell_type": "code",
308 |    "execution_count": 9,
309 |    "id": "dd633a75-ce33-4bad-a298-61ed6c4e8de4",
310 |    "metadata": {
311 |     "ExecuteTime": {
312 |      "end_time": "2024-01-04T16:03:54.571172Z",
313 |      "start_time": "2024-01-04T16:03:53.648911Z"
314 |     }
315 |    },
316 |    "outputs": [],
317 |    "source": [
318 |     "review = \"Marketing is doing its job and I was tempted too, but this Blue Orca coffee is nothing above the level of coffees from the supermarket. And the method of brewing or grinding does not help here. The coffee is simply weak - both in terms of strength and taste. I do not recommend.\"\n",
319 |     "    \n",
320 |     "prompt = \"Summarize review of the coffee. Review: {review}\"\n",
321 |     "input_data = [\n",
322 |     "    InputData(input_mappings={\"review\": review}, id=\"0\")\n",
323 |     "]\n",
324 |     "\n",
325 |     "responses = model.generate(\n",
326 |     "    prompt=prompt, \n",
327 |     "    input_data=input_data,\n",
328 |     "    output_data_model_class=ReviewOutputDataModel\n",
329 |     ")\n",
330 |     "response = responses[0].response"
331 |    ]
332 |   },
333 |   {
334 |    "cell_type": "markdown",
335 |    "id": "3a322c89-c2ca-462f-ba77-8120b5e0945a",
336 |    "metadata": {},
337 |    "source": [
338 |     "The results below show that the predictions are indeed returned in the format defined above. "
339 |    ]
340 |   },
341 |   {
342 |    "cell_type": "code",
343 |    "execution_count": 10,
344 |    "id": "9f9996ec-141f-45a6-a900-71efd5fe3a96",
345 |    "metadata": {
346 |     "ExecuteTime": {
347 |      "end_time": "2024-01-04T16:03:55.095035Z",
348 |      "start_time": "2024-01-04T16:03:55.078664Z"
349 |     }
350 |    },
351 |    "outputs": [
352 |     {
353 |      "data": {
354 |       "text/plain": "__main__.ReviewOutputDataModel"
355 |      },
356 |      "execution_count": 10,
357 |      "metadata": {},
358 |      "output_type": "execute_result"
359 |     }
360 |    ],
361 |    "source": [
362 |     "type(response)"
363 |    ]
364 |   },
365 |   {
366 |    "cell_type": "code",
367 |    "execution_count": 11,
368 |    "id": "01bb6d5c-80bc-43e7-97ac-252b98b45262",
369 |    "metadata": {
370 |     "ExecuteTime": {
371 |      "end_time": "2024-01-04T16:03:55.867763Z",
372 |      "start_time": "2024-01-04T16:03:55.854264Z"
373 |     }
374 |    },
375 |    "outputs": [
376 |     {
377 |      "data": {
378 |       "text/plain": "{'summary': 'The Blue Orca coffee is nothing above the level of coffees from the supermarket. It is weak in terms of strength and taste.',\n 'should_buy': False,\n 'brand_name': 'Blue Orca',\n 'aroma': 'Not mentioned in the review',\n 'cons': ['Weak in terms of strength', 'Weak in terms of taste']}"
379 |      },
380 |      "execution_count": 11,
381 |      "metadata": {},
382 |      "output_type": "execute_result"
383 |     }
384 |    ],
385 |    "source": [
386 |     "response.dict()"
387 |    ]
388 |   },
389 |   {
390 |    "cell_type": "code",
391 |    "execution_count": 12,
392 |    "id": "767ff534-85a6-4d2c-b71b-c869f9343623",
393 |    "metadata": {
394 |     "ExecuteTime": {
395 |      "end_time": "2024-01-04T16:03:56.968171Z",
396 |      "start_time": "2024-01-04T16:03:56.958045Z"
397 |     }
398 |    },
399 |    "outputs": [
400 |     {
401 |      "data": {
402 |       "text/plain": "ReviewOutputDataModel(summary='The Blue Orca coffee is nothing above the level of coffees from the supermarket. It is weak in terms of strength and taste.', should_buy=False, brand_name='Blue Orca', aroma='Not mentioned in the review', cons=['Weak in terms of strength', 'Weak in terms of taste'])"
403 |      },
404 |      "execution_count": 12,
405 |      "metadata": {},
406 |      "output_type": "execute_result"
407 |     }
408 |    ],
409 |    "source": [
410 |     "response"
411 |    ]
412 |   },
413 |   {
414 |    "cell_type": "markdown",
415 |    "id": "64fef78d-1ae4-4477-8096-4b52fa4e8d16",
416 |    "metadata": {},
417 |    "source": [
418 |     "This is really interesting feature, because it gives the possibility to do several tasks at once. In the above example, there was summarization, classification, entity extraction and so on. To add another one, simply add a new field to the dataclass. For example, if you'd like to know the pros of the coffee, you just need to add one additional field `pros` to the dataclass, describe it properly, re-run everything and you'll get the results. So as you can see, it significantly reduces the need to do extensive prompt engineering. You just define it in the code as an additional field and you’re done."
419 |    ]
420 |   }
421 |  ],
422 |  "metadata": {
423 |   "kernelspec": {
424 |    "display_name": "Python 3 (ipykernel)",
425 |    "language": "python",
426 |    "name": "python3"
427 |   },
428 |   "language_info": {
429 |    "codemirror_mode": {
430 |     "name": "ipython",
431 |     "version": 3
432 |    },
433 |    "file_extension": ".py",
434 |    "mimetype": "text/x-python",
435 |    "name": "python",
436 |    "nbconvert_exporter": "python",
437 |    "pygments_lexer": "ipython3",
438 |    "version": "3.10.13"
439 |   }
440 |  },
441 |  "nbformat": 4,
442 |  "nbformat_minor": 5
443 | }
444 | 


--------------------------------------------------------------------------------