├── allms ├── __init__.py ├── utils │ ├── __init__.py │ ├── logger_utils.py │ ├── io_utils.py │ ├── response_parsing_utils.py │ └── long_text_processing_utils.py ├── chains │ ├── __init__.py │ └── long_text_processing_chain.py ├── constants │ ├── __init__.py │ ├── prompt.py │ ├── vertex_ai.py │ ├── azure.py │ └── input_data.py ├── defaults │ ├── __init__.py │ ├── general_defaults.py │ ├── long_text_chain.py │ ├── azure_defaults.py │ └── vertex_ai.py ├── domain │ ├── __init__.py │ ├── input_data.py │ ├── prompt_dto.py │ ├── response.py │ ├── enumerables.py │ └── configuration.py ├── exceptions │ ├── __init__.py │ └── validation_input_data_exceptions.py └── models │ ├── azure_base.py │ ├── __init__.py │ ├── azure_openai.py │ ├── vertexai_palm.py │ ├── vertexai_gemma.py │ ├── azure_mistral.py │ ├── azure_llama2.py │ ├── vertexai_base.py │ ├── vertexai_gemini.py │ └── abstract.py ├── .gitignore ├── docs ├── assets │ └── images │ │ └── logo.png ├── faq.md ├── index.md ├── usage │ ├── basic.md │ ├── deploy_open_source_models.md │ ├── error_handling.md │ ├── forcing_response_format.md │ └── advanced.md ├── api │ ├── input_output_dataclasses.md │ └── models │ │ ├── azure_mistral_model.md │ │ ├── azure_llama2_model.md │ │ ├── vertexai_palm_model.md │ │ ├── vertexai_gemma.md │ │ ├── azure_openai_model.md │ │ └── vertexai_gemini_model.md └── installation_and_quick_start.md ├── resources └── images │ └── map_reduce_like_summary.png ├── .github ├── pull_request_template.md ├── dependabot.yaml └── workflows │ ├── release.yml │ ├── docs.yml │ └── build.yaml ├── tests ├── test_available_models_added_to_all.py ├── test_utf_characters_data.py ├── conftest.py ├── resources │ ├── test_input_data.csv │ └── test_end_to_end_expected_output.csv ├── test_output_parser.py ├── test_model_behavior_for_different_input_data.py └── test_end_to_end.py ├── Makefile ├── mkdocs.yml ├── pyproject.toml ├── README.md ├── LICENSE └── examples └── introduction.ipynb /allms/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /allms/utils/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /allms/chains/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /allms/constants/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /allms/defaults/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /allms/domain/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /allms/exceptions/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .idea/ 2 | __pycache__ 3 | credentials 4 | .DS_Store -------------------------------------------------------------------------------- /docs/assets/images/logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allegro/allms/HEAD/docs/assets/images/logo.png -------------------------------------------------------------------------------- /resources/images/map_reduce_like_summary.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allegro/allms/HEAD/resources/images/map_reduce_like_summary.png -------------------------------------------------------------------------------- /allms/defaults/general_defaults.py: -------------------------------------------------------------------------------- 1 | from dataclasses import dataclass 2 | 3 | 4 | @dataclass 5 | class GeneralDefaults: 6 | MAX_RETRIES = 8 7 | MAX_CONCURRENCY = 1000 8 | -------------------------------------------------------------------------------- /.github/pull_request_template.md: -------------------------------------------------------------------------------- 1 | ### Feature Description 2 | 3 | Few words on what the task is about... 4 | 5 | #### Added 6 | 7 | #### Changed 8 | 9 | #### Fixed 10 | 11 | #### Removed 12 | -------------------------------------------------------------------------------- /tests/test_available_models_added_to_all.py: -------------------------------------------------------------------------------- 1 | from allms import models 2 | 3 | 4 | class TestAvailableModelsAddedToAll: 5 | 6 | def test_available_models_added_to_all(self): 7 | for model in models.get_available_models().values(): 8 | assert model.__name__ in models.__all__ 9 | -------------------------------------------------------------------------------- /allms/constants/prompt.py: -------------------------------------------------------------------------------- 1 | class PromptConstants: 2 | OUTPUT_DATA_MODEL = "output_data_model" 3 | TEMPLATE_STR = "template" 4 | INPUT_VARIABLES_STR = "input_variables" 5 | PARTIAL_VARIABLES_STR = "partial_variables" 6 | TEXT_STR = "text" 7 | OUTPUT_DATA_MODEL_CLASS_SEPARATOR = "\n\n" 8 | -------------------------------------------------------------------------------- /allms/domain/input_data.py: -------------------------------------------------------------------------------- 1 | import typing 2 | 3 | from pydantic import BaseModel 4 | 5 | 6 | class InputData(BaseModel): 7 | input_mappings: typing.Dict[str, str] 8 | id: str 9 | 10 | def get_input_keys(self) -> typing.List[str]: 11 | return list(self.input_mappings.keys()) 12 | -------------------------------------------------------------------------------- /allms/constants/vertex_ai.py: -------------------------------------------------------------------------------- 1 | class VertexModelConstants: 2 | RESPONSE_SEPARATOR = "" 3 | RESPONSE_BLOCKED_STR = "" 4 | 5 | GCP_PROJECT_ID_STR_NAME = "GCP_PROJECT_ID" 6 | GCP_LLM_REGION_STR_NAME = "GCP_LLM_REGION" 7 | GCP_MODEL_NAME_STR_NAME = "GCP_MODEL_NAME" 8 | -------------------------------------------------------------------------------- /allms/utils/logger_utils.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import sys 3 | 4 | 5 | def setup_logger() -> None: 6 | log_format = '[%(levelname)s] %(asctime)s %(filename)s (%(lineno)d)\t- %(message)s' 7 | log_dateformat = '%Y-%m-%d %H:%M:%S' 8 | logging.basicConfig(format=log_format, datefmt=log_dateformat, stream=sys.stdout, level=logging.INFO) 9 | -------------------------------------------------------------------------------- /.github/dependabot.yaml: -------------------------------------------------------------------------------- 1 | # https://docs.github.com/en/free-pro-team@latest/github/administering-a-repository/keeping-your-dependencies-updated-automatically 2 | version: 2 3 | registries: 4 | updates: 5 | - package-ecosystem: "github-actions" 6 | directory: "/" 7 | schedule: 8 | interval: "weekly" 9 | - package-ecosystem: "pip" 10 | directory: "/" 11 | schedule: 12 | interval: "weekly" -------------------------------------------------------------------------------- /allms/domain/prompt_dto.py: -------------------------------------------------------------------------------- 1 | from pydantic import BaseModel, Field 2 | from typing import List, Union 3 | 4 | 5 | class SummaryOutputClass(BaseModel): 6 | summary: str = Field(description="Summary of a product description") 7 | 8 | 9 | class KeywordsOutputClass(BaseModel): 10 | keywords: List[str] = Field(description="List of keywords") 11 | 12 | 13 | class AggregateOutputClass(BaseModel): 14 | summaries: List[Union[SummaryOutputClass, KeywordsOutputClass]] = Field(description="List of aggregated outputs") 15 | -------------------------------------------------------------------------------- /allms/defaults/long_text_chain.py: -------------------------------------------------------------------------------- 1 | class LongTextChainDefaults: 2 | OVERLAP_SIZE = 50 3 | AGGREGATOR_DEFAULT_STR_SEPARATOR = ", " 4 | 5 | # TODO Refactor along with adding support for LongDocument processing 6 | AGGREGATION_PROMPT = """You're an AI agent that combines product summaries. Write a summary of the provided summaries. Keep the most important information and discard 7 | redundant information. The input is a JSON object. 8 | 9 | {output_data_model} 10 | 11 | {text} 12 | """ 13 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | export TWINE_USERNAME?=__token__ 2 | export TWINE_PASSWORD?=your_token 3 | 4 | install-poetry:: 5 | python3 -m pip install pip --upgrade 6 | python3 -m pip install poetry==1.5.0 7 | 8 | install-env:: 9 | poetry install --all-extras --ansi --no-root 10 | 11 | build:: 12 | poetry run python -m build --sdist --wheel . 13 | 14 | linter:: 15 | poetry run pylint allms --reports=no --output-format=colorized --fail-under=8.0 16 | 17 | tests:: 18 | poetry run python -m pytest -s --verbose 19 | 20 | publish:: 21 | poetry run python -m twine upload --verbose dist/* 22 | 23 | docs:: 24 | poetry run mkdocs build 25 | -------------------------------------------------------------------------------- /allms/constants/azure.py: -------------------------------------------------------------------------------- 1 | class AzureOpenAIConstants: 2 | OPENAI_API_TYPE_STR_NAME: str = "OPENAI_API_TYPE" 3 | OPENAI_API_BASE_STR_NAME: str = "OPENAI_API_BASE" 4 | OPENAI_API_VERSION_STR_NAME: str = "OPENAI_API_VERSION" 5 | OPENAI_DEPLOYMENT_NAME_STR_NAME: str = "OPENAI_DEPLOYMENT_NAME" 6 | OPENAI_API_KEY_STR_NAME: str = "OPENAI_API_KEY" 7 | OPENAI_MODEL_NAME_STR_NAME: str = "OPENAI_MODEL_NAME" 8 | 9 | 10 | class AzureMLOnlineEndpointConstants: 11 | AZURE_API_KEY_STR_NAME = "AZURE_API_KEY" 12 | AZURE_ENDPOINT_URL_STR_NAME = "AZURE_ENDPOINT_URL" 13 | AZURE_DEPLOYMENT_NAME_STR_NAME = "AZURE_DEPLOYMENT_NAME" 14 | -------------------------------------------------------------------------------- /allms/defaults/azure_defaults.py: -------------------------------------------------------------------------------- 1 | from dataclasses import dataclass 2 | 3 | 4 | @dataclass 5 | class AzureGptTurboDefaults: 6 | OPENAI_API_TYPE: str = "azure" 7 | OPENAI_MODEL_NAME: str = "gpt-3.5-turbo" 8 | 9 | # These values were chosen based on the default values used by the LLM provider 10 | MODEL_TOTAL_MAX_TOKENS: int = 4096 11 | TEMPERATURE = 0.0 12 | MAX_OUTPUT_TOKENS: int = 512 13 | REQUEST_TIMEOUT_S = 60 14 | 15 | 16 | @dataclass 17 | class AzureLlama2Defaults: 18 | MODEL_TOTAL_MAX_TOKENS: int = 4096 19 | MAX_OUTPUT_TOKENS: int = 512 20 | TEMPERATURE = 0.0 21 | TOP_P = 1.0 22 | 23 | 24 | @dataclass 25 | class AzureMistralAIDefaults: 26 | MODEL_TOTAL_MAX_TOKENS: int = 8192 27 | MAX_OUTPUT_TOKENS: int = 1024 28 | TEMPERATURE = 0.0 29 | TOP_P = 1.0 30 | -------------------------------------------------------------------------------- /allms/defaults/vertex_ai.py: -------------------------------------------------------------------------------- 1 | class PalmModelDefaults: 2 | # These values were chosen based on the default values used by the LLM provider 3 | GCP_MODEL_NAME = "text-bison@001" 4 | MODEL_TOTAL_MAX_TOKENS = 8192 5 | MAX_OUTPUT_TOKENS = 1024 6 | TEMPERATURE = 0.0 7 | TOP_P = 0.95 8 | TOP_K = 40 9 | VERBOSE = True 10 | 11 | 12 | class GeminiModelDefaults: 13 | GCP_MODEL_NAME = "gemini-1.5-flash-001" 14 | MODEL_TOTAL_MAX_TOKENS = 30720 15 | MAX_OUTPUT_TOKENS = 2048 16 | TEMPERATURE = 0.0 17 | TOP_P = 0.95 18 | TOP_K = 40 19 | VERBOSE = True 20 | 21 | 22 | class GemmaModelDefaults: 23 | GCP_MODEL_NAME = "gemma" 24 | MODEL_TOTAL_MAX_TOKENS = 8192 25 | MAX_OUTPUT_TOKENS = 1024 26 | TEMPERATURE = 0.0 27 | TOP_P = 0.95 28 | TOP_K = 40 29 | VERBOSE = True 30 | -------------------------------------------------------------------------------- /allms/domain/response.py: -------------------------------------------------------------------------------- 1 | import typing 2 | 3 | from pydantic import BaseModel 4 | 5 | from allms.domain.input_data import InputData 6 | 7 | 8 | class ResponseParsingOutput(BaseModel): 9 | response: typing.Optional[typing.Any] 10 | error_message: typing.Optional[str] 11 | 12 | 13 | class ResponseData(BaseModel): 14 | response: typing.Optional[typing.Any] = None 15 | input_data: typing.Optional[InputData] = None 16 | 17 | number_of_prompt_tokens: typing.Optional[int] = None 18 | number_of_generated_tokens: typing.Optional[int] = None 19 | error: typing.Optional[str] = None 20 | 21 | # Without this, only classes inheriting from the pydantic BaseModel are allowed as field types. Exception isn't 22 | # such a class and that's why we need it. 23 | class Config: 24 | arbitrary_types_allowed = True 25 | -------------------------------------------------------------------------------- /allms/domain/enumerables.py: -------------------------------------------------------------------------------- 1 | from enum import Enum 2 | from typing import List 3 | 4 | 5 | class ListConvertableEnum(Enum): 6 | @classmethod 7 | def get_values(cls) -> List[str]: 8 | return list(map(lambda field: field.value, cls)) 9 | 10 | 11 | class AggregationLogicForLongInputData(str, ListConvertableEnum): 12 | SIMPLE_CONCATENATION = "SIMPLE_CONCATENATION" 13 | REDUCE_BY_LLM_PROMPTING = "REDUCE_BY_LLM_PROMPTING" 14 | 15 | 16 | class AvailableModels(str, ListConvertableEnum): 17 | AZURE_OPENAI_MODEL = "azure_openai" 18 | AZURE_LLAMA2_MODEL = "azure_llama2" 19 | AZURE_MISTRAL_MODEL = "azure_mistral" 20 | VERTEXAI_PALM2_MODEL = "vertexai_palm2" 21 | VERTEXAI_GEMINI_MODEL = "vertexai_gemini" 22 | VERTEXAI_GEMMA_MODEL = "vertexai_gemma" 23 | 24 | 25 | class LanguageModelTask(str, ListConvertableEnum): 26 | SUMMARY = "SUMMARY" 27 | KEYWORDS = "KEYWORDS" 28 | -------------------------------------------------------------------------------- /mkdocs.yml: -------------------------------------------------------------------------------- 1 | site_name: allms 2 | theme: readthedocs 3 | nav: 4 | - Introduction: index.md 5 | - Installation & Quickstart: installation_and_quick_start.md 6 | - F.A.Q: faq.md 7 | - User guide: 8 | - Basic: usage/basic.md 9 | - Advanced: usage/advanced.md 10 | - Forcing Structured Output Format: usage/forcing_response_format.md 11 | - How to Deploy Open-source Models on Azure and GCP?: usage/deploy_open_source_models.md 12 | - API: 13 | - Domain: api/input_output_dataclasses.md 14 | - Models: 15 | - Azure Llama2: api/models/azure_llama2_model.md 16 | - Azure Mistral: api/models/azure_mistral_model.md 17 | - Azure OpenAI GPT: api/models/azure_openai_model.md 18 | - VertexAI PaLM2: api/models/vertexai_palm_model.md 19 | - VertexAI Gemini: api/models/vertexai_gemini_model.md 20 | - VertexAI Gemma: api/models/vertexai_gemma.md -------------------------------------------------------------------------------- /.github/workflows/release.yml: -------------------------------------------------------------------------------- 1 | name: Release 2 | on: 3 | release: 4 | types: [created] 5 | workflow_dispatch: 6 | jobs: 7 | release: 8 | runs-on: ubuntu-latest 9 | steps: 10 | - uses: actions/checkout@v2 11 | - name: Set up Python 3.10.x 12 | uses: actions/setup-python@v2 13 | with: 14 | python-version: "3.10" 15 | env: 16 | AGENT_TOOLSDIRECTORY: /opt/hostedtoolcache 17 | RUNNER_TOOL_CACHE: /opt/hostedtoolcache 18 | - run: python -m pip install build 19 | - name: Install poetry 20 | run: make install-poetry 21 | - name: Install dependencies 22 | run: make install-env 23 | - name: Build allms package 24 | run: make build 25 | - name: Publish allms package to PyPI 26 | env: 27 | TWINE_PASSWORD: ${{ secrets.PYPI_API_TOKEN }} 28 | run: | 29 | make publish 30 | 31 | -------------------------------------------------------------------------------- /tests/test_utf_characters_data.py: -------------------------------------------------------------------------------- 1 | import html.entities 2 | from unittest.mock import patch 3 | 4 | import pytest 5 | 6 | 7 | class TestModelBehaviorForSpecialCharacters: 8 | @patch("langchain.chains.base.Chain.arun") 9 | @patch("langchain_google_vertexai.llms.VertexAI.get_num_tokens") 10 | @pytest.mark.parametrize("input_character", list(html.entities.entitydefs.values())) 11 | def test_model_is_not_broken_by_special_characters(self, tokens_mock, arun_mock, input_character, models): 12 | # GIVEN 13 | arun_mock.return_value = f"{input_character}" 14 | tokens_mock.return_value = 1 15 | 16 | # WHEN & THEN 17 | for model in models.values(): 18 | response = model.generate( 19 | f"This is prompt with broken sign {input_character} and the model should work.") 20 | assert response[0].error is None 21 | assert response[0].response == input_character 22 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [tool.poetry] 2 | name = "allms" 3 | version = "1.0.13" 4 | description = "" 5 | authors = ["Allegro Opensource "] 6 | readme = "README.md" 7 | packages = [{include = "allms"}] 8 | 9 | [tool.poetry.dependencies] 10 | python = ">=3.9.0,<4.0" 11 | fsspec = "^2025.3.0" 12 | google-cloud-aiplatform = "1.85.0" 13 | pydash = "^8.0.5" 14 | transformers = "^4.49.0" 15 | pydantic = "^2.10.6" 16 | langchain = "0.3.21" 17 | tiktoken = "^0.9.0" 18 | openai = "1.68.0" 19 | pytest-mock = "^3.14.0" 20 | respx = "^0.22.0" 21 | langchain-community = "^0.3.20" 22 | langchain-google-vertexai = "^2.0.15" 23 | sentencepiece = "^0.2.0" 24 | langchain-openai = "^0.3.9" 25 | 26 | [tool.poetry.group.dev.dependencies] 27 | pytest = "^8.3.5" 28 | pylint = "^3.3.6" 29 | mkdocs = "^1.6.1" 30 | build = "^1.2.2.post1" 31 | twine = "^6.1.0" 32 | 33 | [build-system] 34 | requires = ["poetry-core"] 35 | build-backend = "poetry.core.masonry.api" 36 | -------------------------------------------------------------------------------- /allms/constants/input_data.py: -------------------------------------------------------------------------------- 1 | import typing 2 | 3 | 4 | class IODataConstants: 5 | TEXT = "text" 6 | ID = "id" 7 | 8 | PROMPT_TOKENS_NUMBER = "number_of_prompt_tokens" 9 | GENERATED_TOKENS_NUMBER = "number_of_generated_tokens" 10 | 11 | RESPONSE_STR_NAME = "response" 12 | 13 | ERROR_MESSAGE_STR = "Response error" 14 | VALUE_ERROR_MESSAGE = "Value Error has occurred" 15 | INVALID_ARGUMENT_MESSAGE = "Invalid Argument Exception" 16 | CONTENT_FILTER_MESSAGE = "Content Filter Message" 17 | TIMEOUT_ERROR_MESSAGE = "Timeout Error" 18 | 19 | SUPPORTED_INPUT_DATA_FORMAT = "csv" 20 | DEFAULT_ID = "DEFAULT_ID" 21 | 22 | @staticmethod 23 | def get_columns_for_df_with_responses(input_keys: typing.List[str]) -> typing.List[str]: 24 | return input_keys + [ 25 | IODataConstants.ID, 26 | IODataConstants.RESPONSE_STR_NAME, 27 | IODataConstants.PROMPT_TOKENS_NUMBER, 28 | IODataConstants.GENERATED_TOKENS_NUMBER 29 | ] 30 | -------------------------------------------------------------------------------- /allms/models/azure_base.py: -------------------------------------------------------------------------------- 1 | import asyncio 2 | import typing 3 | from concurrent.futures import ThreadPoolExecutor 4 | 5 | from langchain.callbacks.manager import AsyncCallbackManagerForLLMRun 6 | from langchain_community.chat_models.azureml_endpoint import AzureMLChatOnlineEndpoint 7 | 8 | 9 | class AzureMLOnlineEndpointAsync(AzureMLChatOnlineEndpoint): 10 | 11 | async def _acall( 12 | self, 13 | prompt: str, 14 | stop: typing.Optional[typing.List[str]] = None, 15 | run_manager: typing.Optional[AsyncCallbackManagerForLLMRun] = None, 16 | **kwargs: typing.Any, 17 | ) -> str: 18 | # Under the hood, langchain uses urllib.request to query the Azure ML Endpoint. urllib.request is not compatible 19 | # with asyncio, and that's why we had to implement the function this way 20 | task_executor = ThreadPoolExecutor() 21 | return await asyncio.wrap_future( 22 | task_executor.submit(self._call, prompt, stop, run_manager, **kwargs) 23 | ) 24 | -------------------------------------------------------------------------------- /.github/workflows/docs.yml: -------------------------------------------------------------------------------- 1 | name: Deploy docs 2 | on: 3 | push: 4 | branches: ["main"] 5 | paths: 6 | - 'docs/**' 7 | - 'mkdocs.yml' 8 | - 'Pipfile' 9 | workflow_dispatch: 10 | permissions: 11 | contents: read 12 | pages: write 13 | id-token: write 14 | concurrency: 15 | group: "pages" 16 | cancel-in-progress: false 17 | jobs: 18 | deploy: 19 | environment: 20 | name: github-pages 21 | url: ${{ steps.deployment.outputs.page_url }} 22 | runs-on: ubuntu-latest 23 | steps: 24 | - uses: actions/checkout@v4 25 | - uses: actions/configure-pages@v4 26 | - uses: actions/setup-python@v4 27 | with: 28 | python-version: '3.10' 29 | - run: python -m pip install build 30 | - run: make install-poetry 31 | - run: make install-env 32 | - run: make docs 33 | - uses: actions/upload-pages-artifact@v3 34 | with: 35 | path: 'site' 36 | - name: Deploy to GitHub Pages 37 | id: deployment 38 | uses: actions/deploy-pages@v4 39 | 40 | -------------------------------------------------------------------------------- /docs/faq.md: -------------------------------------------------------------------------------- 1 | 2 | # Frequently Asked Questions 3 | 4 | ### 1. How to use the allms in a python notebook? 5 | When using the `allms` library, which utilizes asynchronous programming under the hood, you must install the `nest-asyncio` library to use it in a Jupyter notebook environment. 6 | 7 | To ensure proper functionality, execute the following code at the beginning of your notebook: 8 | ```jupyterpython 9 | !pip install nest-asyncio 10 | import nest_asyncio 11 | nest_asyncio.apply() 12 | ``` 13 | 14 | 15 | 16 | ### 2. How can I estimate the cost of my queries? 17 | 18 | The model provides information for each record about the count of tokens in the prompt and the count of generated tokens. 19 | In most cases, pricing for Language Models (LLMs) is determined based on the total number of tokens processed, which encompasses both prompt tokens and generated tokens. It is essential to familiarize yourself with the pricing details offered by your service provider to understand the associated costs. An example pricing for AzureOpenAI can be found [here](https://azure.microsoft.com/en-us/pricing/details/cognitive-services/openai-service/). 20 | 21 | 22 | 23 | 24 | -------------------------------------------------------------------------------- /.github/workflows/build.yaml: -------------------------------------------------------------------------------- 1 | name: Build 2 | on: 3 | pull_request: 4 | push: 5 | branches: 6 | - main 7 | permissions: 8 | contents: write 9 | pull-requests: write 10 | jobs: 11 | build: 12 | runs-on: ubuntu-latest 13 | steps: 14 | - name: Checkout 15 | uses: actions/checkout@v3 16 | - name: Setup node 17 | uses: actions/setup-python@v3 18 | with: 19 | python-version: "3.10.10" 20 | - name: Install dependencies 21 | run: | 22 | make install-poetry 23 | make install-env 24 | - name: Tests 25 | run: | 26 | make tests 27 | dependabot: 28 | runs-on: ubuntu-latest 29 | if: ${{ github.event_name == 'pull_request' && github.actor == 'dependabot[bot]' }} 30 | needs: build 31 | steps: 32 | - name: Dependabot metadata 33 | id: metadata 34 | uses: dependabot/fetch-metadata@v1.6.0 35 | with: 36 | github-token: "${{ secrets.GITHUB_TOKEN }}" 37 | - name: Enable auto-merge for Dependabot PRs 38 | run: gh pr merge --auto --merge "$PR_URL" 39 | env: 40 | PR_URL: ${{github.event.pull_request.html_url}} 41 | GITHUB_TOKEN: ${{secrets.GITHUB_TOKEN}} 42 | -------------------------------------------------------------------------------- /allms/models/__init__.py: -------------------------------------------------------------------------------- 1 | from typing import Dict, Type 2 | 3 | from allms.domain.configuration import HarmBlockThreshold, HarmCategory 4 | from allms.domain.enumerables import AvailableModels 5 | from allms.models.abstract import AbstractModel 6 | from allms.models.azure_llama2 import AzureLlama2Model 7 | from allms.models.azure_mistral import AzureMistralModel 8 | from allms.models.azure_openai import AzureOpenAIModel 9 | from allms.models.vertexai_gemini import VertexAIGeminiModel 10 | from allms.models.vertexai_palm import VertexAIPalmModel 11 | from allms.models.vertexai_gemma import VertexAIGemmaModel 12 | 13 | __all__ = [ 14 | "AzureOpenAIModel", 15 | "AzureLlama2Model", 16 | "AzureMistralModel", 17 | "VertexAIPalmModel", 18 | "VertexAIGeminiModel", 19 | "VertexAIGemmaModel", 20 | "HarmCategory", 21 | "HarmBlockThreshold", 22 | "get_available_models" 23 | ] 24 | 25 | 26 | def get_available_models() -> Dict[str, Type[AbstractModel]]: 27 | return { 28 | AvailableModels.AZURE_OPENAI_MODEL: AzureOpenAIModel, 29 | AvailableModels.AZURE_LLAMA2_MODEL: AzureLlama2Model, 30 | AvailableModels.AZURE_MISTRAL_MODEL: AzureMistralModel, 31 | AvailableModels.VERTEXAI_PALM2_MODEL: VertexAIPalmModel, 32 | AvailableModels.VERTEXAI_GEMINI_MODEL: VertexAIGeminiModel, 33 | AvailableModels.VERTEXAI_GEMMA_MODEL: VertexAIGemmaModel, 34 | } 35 | 36 | -------------------------------------------------------------------------------- /allms/utils/io_utils.py: -------------------------------------------------------------------------------- 1 | import csv 2 | import logging 3 | from pathlib import Path 4 | from typing import Any, Dict, List, Optional, Union, OrderedDict 5 | 6 | import fsspec 7 | 8 | from allms.constants.input_data import IODataConstants 9 | from allms.domain.input_data import InputData 10 | 11 | logger = logging.getLogger(__name__) 12 | 13 | 14 | def load_csv( 15 | path: str, 16 | limit: Optional[int] = None 17 | ) -> List[OrderedDict[Any, Any]]: 18 | logger.info(f"Loading test data from {path}") 19 | with open(path, mode='r') as csv_file: 20 | csv_reader = csv.DictReader(csv_file) 21 | data = list(csv_reader) 22 | return data[:limit] if limit else data 23 | 24 | 25 | def load_csv_to_input_data(path: str, limit: Optional[int] = None) -> List[InputData]: 26 | csv_data = load_csv(path, limit=limit) 27 | return list( 28 | map( 29 | lambda row: InputData(input_mappings=drop_dict_key(row, IODataConstants.ID), 30 | id=str(row[IODataConstants.ID])), 31 | csv_data 32 | ) 33 | ) 34 | 35 | 36 | def drop_dict_key(dictionary: Dict[Any, Any], key: Any) -> Dict[Any, Any]: 37 | dict_copy = dictionary.copy() 38 | dict_copy.pop(key) 39 | return dict_copy 40 | 41 | 42 | def load_credentials(path: Union[str, Path]) -> str: 43 | with fsspec.open(path, "r") as credentials_file: 44 | return credentials_file.readline() 45 | -------------------------------------------------------------------------------- /docs/index.md: -------------------------------------------------------------------------------- 1 |

2 | aLLMs Logo 3 |

4 | 5 | # Introduction 6 | 7 | `allms` is a versatile and powerful library designed to streamline the process of querying large language models, offering a user-friendly experience. The `allms` module is designed to simplify interactions with the underlying model by providing the following features: 8 | 9 | - **Simple and User-Friendly Interface**: The module offers an intuitive and easy-to-use interface, making it straightforward to work with the model. 10 | 11 | - **Asynchronous Querying (Default)**: Requests to the model are processed asynchronously by default, ensuring efficient and non-blocking interactions. 12 | 13 | - **Automatic Retrying Mechanism**: The module includes an automatic retrying mechanism, which helps handle transient errors and ensures that queries to the model are robust. 14 | 15 | - **Error Handling and Management**: Errors that may occur during interactions with the model are handled and managed gracefully, providing informative error messages and potential recovery options. 16 | 17 | - **Simple Output Parsing**: The module simplifies the process of parsing and working with the model's output, allowing you to easily extract the information you need. 18 | 19 | 20 | 21 | ### Supported Models 22 | 23 | Currently, the library supports: 24 | 25 | * OpenAI models hosted on Microsoft Azure (`gpt-3.5-turbo`, `gpt4`, `gpt4-turbo`); 26 | * Google Cloud Platform VertexAI models (`PaLM2`, `Gemini`); 27 | * Open-source models `Llama2` and `Mistral` self-deployed on Azure and `Gemma` self-deployed on GCP 28 | 29 | -------------------------------------------------------------------------------- /allms/exceptions/validation_input_data_exceptions.py: -------------------------------------------------------------------------------- 1 | def get_missing_input_data_in_prompt_message(example_id: str) -> str: 2 | return f"Missing input_keys in the prompt. Error occurred for id={example_id}" 3 | 4 | 5 | def get_missing_input_data_in_input_data_message(example_id: str) -> str: 6 | return f"Missing input_keys in the input data. Error occurred for id={example_id}" 7 | 8 | 9 | def get_different_number_of_inputs_message(example_id: str) -> str: 10 | return (f"Number of input keys in input_data and prompt are different." 11 | f"If your intention is to instruct the model to output a JSON, make sure you are using double curly brackets." 12 | f" Please make sure the input_keys are consistent." 13 | f" Error has occurred for id={example_id}") 14 | 15 | 16 | def get_different_input_keys_message(example_id: str) -> str: 17 | return (f"Input variables in the prompt and in the input_data are different. Please make sure" 18 | f"the input_keys are consistent. " 19 | f"If your intention is to instruct the model to output a JSON, make sure you are using double curly brackets." 20 | f"Error has occurred for id={example_id}") 21 | 22 | 23 | def get_prompt_contains_input_key_when_missing_input_data() -> str: 24 | return f"When no input_data is provided prompt cannot contain any input_key." 25 | 26 | 27 | def get_system_prompt_contains_input_variables() -> str: 28 | return "System prompt cannot contain any input variables. Please fix your system message and try again." 29 | 30 | 31 | def get_system_prompt_is_not_supported_by_model() -> str: 32 | return "Mistral-based models don't support `system_prompt` parameter." 33 | -------------------------------------------------------------------------------- /docs/usage/basic.md: -------------------------------------------------------------------------------- 1 | # Basic Usage 2 | 3 | ## Single Query 4 | 5 | In the simplest approach you just need to pass a prompt and the model will provide a response for it. 6 | 7 | ```python 8 | from allms.models import AzureOpenAIModel 9 | from allms.domain.configuration import AzureOpenAIConfiguration 10 | from allms.domain.response import ResponseData 11 | 12 | configuration = AzureOpenAIConfiguration( 13 | api_key="", 14 | base_url="", 15 | api_version="", 16 | deployment="", 17 | model_name="" 18 | ) 19 | 20 | model = AzureOpenAIModel(config=configuration) 21 | 22 | response = model.generate("What is the capital of Poland?") 23 | print(response) 24 | 25 | # [ResponseData(response='The capital of Poland is Warsaw.', input_data=None, number_of_prompt_tokens=7, number_of_generated_tokens=7, error=None)] 26 | ``` 27 | 28 | As a response you'll get `List[ResponseData]`, where the first element will contain response from the model in the 29 | `ResponseData.response` field and also information about number of prompt and generated tokens. If any error occurred 30 | also `ResponseData.error` field will be filled with the actual exception. 31 | 32 | ## Single Query with System Prompt 33 | 34 | A System Prompt can be passed along with a standard prompt. Please note that adding a system prompt will increase the 35 | prompt token count for your query, increasing costs and latency. 36 | 37 | ```python 38 | response = model.generate( 39 | system_prompt="You are an AI agent answering questions like a student during an exam. Answer the question in Polish.", 40 | prompt="What is the capital of Poland?" 41 | ) 42 | print(response) 43 | # Stolica Polski to Warszawa. 44 | ``` 45 | 46 | -------------------------------------------------------------------------------- /docs/api/input_output_dataclasses.md: -------------------------------------------------------------------------------- 1 | ## `class allms.domain.input_data.InputData` dataclass 2 | ```python 3 | @dataclass 4 | class InputData: 5 | input_mappings: Dict[str, str] 6 | id: str 7 | ``` 8 | #### Fields 9 | - `input_mappings` (`Dict[str, str]`): Contains mapping from symbolic variables used in the prompt to the actual data 10 | that will be injected in place of these variables. You have to provide a map for each of symbolic variable used 11 | in the prompt. 12 | - `id` (`str`): Unique identifier. Requests are done in an async mode, so the order of the responses is not the same 13 | as the order of the input data, so this field can be used to identify them. 14 | 15 | ## `class allms.domain.response.ResponseData` dataclass 16 | ```python 17 | @dataclass 18 | class ResponseData: 19 | response: Union[str, BaseModel] 20 | input_data: Optional[InputData] = None 21 | 22 | number_of_prompt_tokens: Optional[int] = None 23 | number_of_generated_tokens: Optional[int] = None 24 | error: Optional[str] = None 25 | ``` 26 | #### Fields 27 | - `response` (`Union[str, BaseModel]`): Contains response of the model. If `output_data_model_class` param was provided 28 | to the `generate()` method, it'll contain response parsed to the provided class. If `output_data_model_class` wasn't 29 | provided, it'll contain raw string returned from the model. 30 | - `input_data` (`Optional[InputData]`): If `input_data` was provided to the `generate()` method, it'll copy-paste that 31 | data to this field. 32 | - `number_of_prompt_tokens` (`int`): Number of tokens used in the prompt. 33 | - `number_of_generated_tokens` (`str`): Number of tokens generated by the model. 34 | - `error` (`str`): If any error that prevented from completing the generation pipeline fully occurred, it'll be listed 35 | here. 36 | 37 | -------------------------------------------------------------------------------- /allms/domain/configuration.py: -------------------------------------------------------------------------------- 1 | from dataclasses import dataclass, field 2 | from typing import Dict, Optional, Sequence 3 | 4 | import google.oauth2.credentials 5 | from langchain_google_vertexai import HarmBlockThreshold, HarmCategory 6 | 7 | from allms.defaults.vertex_ai import GeminiModelDefaults, PalmModelDefaults 8 | 9 | 10 | @dataclass 11 | class AzureOpenAIConfiguration: 12 | base_url: str 13 | deployment: str 14 | model_name: str 15 | api_version: str 16 | api_key: Optional[str] = None 17 | azure_ad_token: Optional[str] = None 18 | 19 | 20 | @dataclass 21 | class AzureSelfDeployedConfiguration: 22 | api_key: str 23 | deployment: str 24 | endpoint_url: str 25 | 26 | 27 | @dataclass 28 | class VertexAIConfiguration: 29 | cloud_project: str 30 | cloud_location: str 31 | palm_model_name: Optional[str] = PalmModelDefaults.GCP_MODEL_NAME 32 | gemini_model_name: Optional[str] = GeminiModelDefaults.GCP_MODEL_NAME 33 | gemini_safety_settings: Optional[Dict[HarmCategory, HarmBlockThreshold]] = None 34 | api_endpoint: Optional[str] = None 35 | endpoint_version: Optional[str] = "v1beta1" # the same as in _VertexAIBase 36 | api_transport: Optional[str] = None 37 | extra_headers: Optional[Sequence[tuple[str, str]]] = field(default_factory=list) 38 | credentials: Optional[google.oauth2.credentials.Credentials] = None 39 | 40 | 41 | class VertexAIModelGardenConfiguration(VertexAIConfiguration): 42 | def __init__( 43 | self, 44 | cloud_project: str, 45 | cloud_location: str, 46 | endpoint_id: str 47 | ): 48 | super().__init__( 49 | cloud_project=cloud_project, 50 | cloud_location=cloud_location, 51 | palm_model_name=None, 52 | gemini_model_name=None 53 | ) 54 | self.endpoint_id = endpoint_id 55 | -------------------------------------------------------------------------------- /docs/usage/deploy_open_source_models.md: -------------------------------------------------------------------------------- 1 | # How to Deploy Open-source LLMs on Azure and GCP? 2 | 3 | ## Azure 4 | 5 | To use Open-source models like Llama or Mistral with allms, first you have to deploy it on your own on Azure as a ML Online Endpoint. 6 | Here's how to do it: 7 | 8 | 1. Go to [ml.azure.com](https://ml.azure.com/) and use a subscription with a workspace that has access to the 9 | `Model catalog`. 10 | 2. On the left click `Model catalog`, then under `Introducing Llama 2` click `View models`. 11 | 3. Click the model you want to deploy. 12 | 4. Click `Deploy -> Real-time endpoint`. 13 | 5. Select `Skip Azure AI Content Safety` and click `Proceed`. 14 | 6. Select a virtual machine and click `Deploy`. You must have sufficient quota to deploy the models. 15 | 7. In the menu on the left, click `Endpoints` and select the endpoint you've just created. 16 | 8. After the deployment is complete, you'll see `Consume` tab where the endpoint URL and authentication key will be 17 | provided. 18 | 9. Now you can start using the model by configuring it as in the example below: 19 | 20 | ```python 21 | from allms.models import AzureLlama2Model 22 | from allms.domain.configuration import AzureSelfDeployedConfiguration 23 | 24 | configuration = AzureSelfDeployedConfiguration( 25 | api_key="", 26 | endpoint_url="", 27 | deployment="" 28 | ) 29 | 30 | llama_model = AzureLlama2Model(config=configuration) 31 | llama_response = llama_model.generate("2+2 is?") 32 | ``` 33 | 34 | 35 | In case of any problems with deployment, you can review this guide on the Azure blog: 36 | [Introducing Llama 2 on Azure](https://techcommunity.microsoft.com/t5/ai-machine-learning-blog/introducing-llama-2-on-azure/ba-p/3881233) 37 | 38 | ## GCP 39 | [Follow the following guide](https://cloud.google.com/vertex-ai/docs/start/explore-models#deploy-a-model) to deploy a model on the GCP VertexAI Model Garden. -------------------------------------------------------------------------------- /allms/models/azure_openai.py: -------------------------------------------------------------------------------- 1 | from asyncio import AbstractEventLoop 2 | from typing import Optional 3 | 4 | from langchain_openai import AzureChatOpenAI 5 | 6 | from allms.defaults.azure_defaults import AzureGptTurboDefaults 7 | from allms.defaults.general_defaults import GeneralDefaults 8 | from allms.domain.configuration import AzureOpenAIConfiguration 9 | from allms.models.abstract import AbstractModel 10 | 11 | 12 | class AzureOpenAIModel(AbstractModel): 13 | def __init__( 14 | self, 15 | config: AzureOpenAIConfiguration, 16 | temperature: float = AzureGptTurboDefaults.TEMPERATURE, 17 | max_output_tokens: int = AzureGptTurboDefaults.MAX_OUTPUT_TOKENS, 18 | request_timeout_s: int = AzureGptTurboDefaults.REQUEST_TIMEOUT_S, 19 | model_total_max_tokens: int = AzureGptTurboDefaults.MODEL_TOTAL_MAX_TOKENS, 20 | max_concurrency: int = GeneralDefaults.MAX_CONCURRENCY, 21 | max_retries: int = GeneralDefaults.MAX_RETRIES, 22 | event_loop: Optional[AbstractEventLoop] = None 23 | ) -> None: 24 | self._request_timeout_s = request_timeout_s 25 | self._config = config 26 | 27 | super().__init__( 28 | temperature=temperature, 29 | model_total_max_tokens=model_total_max_tokens, 30 | max_output_tokens=max_output_tokens, 31 | max_concurrency=max_concurrency, 32 | max_retries=max_retries, 33 | event_loop=event_loop 34 | ) 35 | 36 | def _create_llm(self) -> AzureChatOpenAI: 37 | return AzureChatOpenAI( 38 | deployment_name=self._config.deployment, 39 | api_version=self._config.api_version, 40 | model_name=self._config.model_name, 41 | azure_endpoint=self._config.base_url, 42 | api_key=self._config.api_key, 43 | azure_ad_token=self._config.azure_ad_token, 44 | temperature=self._temperature, 45 | max_tokens=self._max_output_tokens, 46 | request_timeout=self._request_timeout_s 47 | ) 48 | -------------------------------------------------------------------------------- /allms/models/vertexai_palm.py: -------------------------------------------------------------------------------- 1 | from asyncio import AbstractEventLoop 2 | from langchain_google_vertexai import VertexAI 3 | from typing import Optional 4 | 5 | from allms.defaults.general_defaults import GeneralDefaults 6 | from allms.defaults.vertex_ai import PalmModelDefaults 7 | from allms.domain.configuration import VertexAIConfiguration 8 | from allms.models.vertexai_base import CustomVertexAI 9 | from allms.models.abstract import AbstractModel 10 | 11 | 12 | class VertexAIPalmModel(AbstractModel): 13 | def __init__( 14 | self, 15 | config: VertexAIConfiguration, 16 | temperature: float = PalmModelDefaults.TEMPERATURE, 17 | top_k: int = PalmModelDefaults.TOP_K, 18 | top_p: float = PalmModelDefaults.TOP_P, 19 | max_output_tokens: int = PalmModelDefaults.MAX_OUTPUT_TOKENS, 20 | model_total_max_tokens: int = PalmModelDefaults.MODEL_TOTAL_MAX_TOKENS, 21 | max_concurrency: int = GeneralDefaults.MAX_CONCURRENCY, 22 | max_retries: int = GeneralDefaults.MAX_RETRIES, 23 | verbose: bool = PalmModelDefaults.VERBOSE, 24 | event_loop: Optional[AbstractEventLoop] = None 25 | ) -> None: 26 | self._top_p = top_p 27 | self._top_k = top_k 28 | self._verbose = verbose 29 | self._config = config 30 | 31 | super().__init__( 32 | temperature=temperature, 33 | model_total_max_tokens=model_total_max_tokens, 34 | max_output_tokens=max_output_tokens, 35 | max_concurrency=max_concurrency, 36 | max_retries=max_retries, 37 | event_loop=event_loop 38 | ) 39 | 40 | def _create_llm(self) -> VertexAI: 41 | return CustomVertexAI( 42 | model_name=self._config.palm_model_name, 43 | max_output_tokens=self._max_output_tokens, 44 | temperature=self._temperature, 45 | top_p=self._top_p, 46 | top_k=self._top_k, 47 | verbose=self._verbose, 48 | project=self._config.cloud_project, 49 | location=self._config.cloud_location 50 | ) -------------------------------------------------------------------------------- /allms/models/vertexai_gemma.py: -------------------------------------------------------------------------------- 1 | from asyncio import AbstractEventLoop 2 | 3 | from langchain_google_vertexai import VertexAIModelGarden 4 | from typing import Optional 5 | 6 | from allms.defaults.general_defaults import GeneralDefaults 7 | from allms.defaults.vertex_ai import GemmaModelDefaults 8 | from allms.domain.configuration import VertexAIModelGardenConfiguration 9 | from allms.models.vertexai_base import VertexAIModelGardenWrapper 10 | from allms.models.abstract import AbstractModel 11 | 12 | 13 | class VertexAIGemmaModel(AbstractModel): 14 | def __init__( 15 | self, 16 | config: VertexAIModelGardenConfiguration, 17 | temperature: float = GemmaModelDefaults.TEMPERATURE, 18 | top_k: int = GemmaModelDefaults.TOP_K, 19 | top_p: float = GemmaModelDefaults.TOP_P, 20 | max_output_tokens: int = GemmaModelDefaults.MAX_OUTPUT_TOKENS, 21 | model_total_max_tokens: int = GemmaModelDefaults.MODEL_TOTAL_MAX_TOKENS, 22 | max_concurrency: int = GeneralDefaults.MAX_CONCURRENCY, 23 | max_retries: int = GeneralDefaults.MAX_RETRIES, 24 | verbose: bool = GemmaModelDefaults.VERBOSE, 25 | event_loop: Optional[AbstractEventLoop] = None 26 | ) -> None: 27 | self._top_p = top_p 28 | self._top_k = top_k 29 | self._verbose = verbose 30 | self._config = config 31 | 32 | super().__init__( 33 | temperature=temperature, 34 | model_total_max_tokens=model_total_max_tokens, 35 | max_output_tokens=max_output_tokens, 36 | max_concurrency=max_concurrency, 37 | max_retries=max_retries, 38 | event_loop=event_loop 39 | ) 40 | 41 | self._is_json_format_injected_into_prompt = False 42 | 43 | def _create_llm(self) -> VertexAIModelGarden: 44 | return VertexAIModelGardenWrapper( 45 | model_name=GemmaModelDefaults.GCP_MODEL_NAME, 46 | max_tokens=self._max_output_tokens, 47 | temperature=self._temperature, 48 | top_p=self._top_p, 49 | top_k=self._top_k, 50 | verbose=self._verbose, 51 | project=self._config.cloud_project, 52 | location=self._config.cloud_location, 53 | endpoint_id=self._config.endpoint_id 54 | ) 55 | -------------------------------------------------------------------------------- /allms/models/azure_mistral.py: -------------------------------------------------------------------------------- 1 | import typing 2 | from asyncio import AbstractEventLoop 3 | 4 | from langchain_community.chat_models.azureml_endpoint import LlamaChatContentFormatter 5 | 6 | from allms.defaults.azure_defaults import AzureMistralAIDefaults 7 | from allms.defaults.general_defaults import GeneralDefaults 8 | from allms.domain.configuration import AzureSelfDeployedConfiguration 9 | from allms.models.abstract import AbstractModel 10 | from allms.models.azure_base import AzureMLOnlineEndpointAsync 11 | 12 | 13 | class AzureMistralModel(AbstractModel): 14 | 15 | def __init__( 16 | self, 17 | config: AzureSelfDeployedConfiguration, 18 | temperature: float = AzureMistralAIDefaults.TEMPERATURE, 19 | top_p: float = AzureMistralAIDefaults.TOP_P, 20 | max_output_tokens: int = AzureMistralAIDefaults.MAX_OUTPUT_TOKENS, 21 | model_total_max_tokens: int = AzureMistralAIDefaults.MODEL_TOTAL_MAX_TOKENS, 22 | max_concurrency: int = GeneralDefaults.MAX_CONCURRENCY, 23 | max_retries: int = GeneralDefaults.MAX_RETRIES, 24 | event_loop: typing.Optional[AbstractEventLoop] = None 25 | ) -> None: 26 | self._top_p = top_p 27 | self._config = config 28 | 29 | super().__init__( 30 | temperature=temperature, 31 | model_total_max_tokens=model_total_max_tokens, 32 | max_output_tokens=max_output_tokens, 33 | max_concurrency=max_concurrency, 34 | max_retries=max_retries, 35 | event_loop=event_loop 36 | ) 37 | 38 | self._is_json_format_injected_into_prompt = False 39 | 40 | def _create_llm(self) -> AzureMLOnlineEndpointAsync: 41 | model_kwargs = { 42 | "max_new_tokens": self._max_output_tokens, "top_p": self._top_p, "do_sample": False, 43 | "return_full_text": False 44 | } 45 | if self._temperature > 0: 46 | model_kwargs["temperature"] = self._temperature 47 | model_kwargs["do_sample"] = True 48 | 49 | return AzureMLOnlineEndpointAsync( 50 | endpoint_api_key=self._config.api_key, 51 | endpoint_url=self._config.endpoint_url, 52 | model_kwargs=model_kwargs, 53 | content_formatter=LlamaChatContentFormatter(), 54 | deployment_name=self._config.deployment 55 | ) 56 | -------------------------------------------------------------------------------- /allms/models/azure_llama2.py: -------------------------------------------------------------------------------- 1 | import typing 2 | from asyncio import AbstractEventLoop 3 | from typing import List, Type 4 | 5 | from langchain_community.chat_models.azureml_endpoint import LlamaChatContentFormatter 6 | from pydantic import BaseModel 7 | 8 | from allms.defaults.azure_defaults import AzureLlama2Defaults 9 | from allms.defaults.general_defaults import GeneralDefaults 10 | from allms.domain.configuration import AzureSelfDeployedConfiguration 11 | from allms.domain.input_data import InputData 12 | from allms.domain.response import ResponseData 13 | from allms.models.abstract import AbstractModel 14 | from allms.models.azure_base import AzureMLOnlineEndpointAsync 15 | 16 | 17 | class AzureLlama2Model(AbstractModel): 18 | 19 | def __init__( 20 | self, 21 | config: AzureSelfDeployedConfiguration, 22 | temperature: float = AzureLlama2Defaults.TEMPERATURE, 23 | top_p: float = AzureLlama2Defaults.TOP_P, 24 | max_output_tokens: int = AzureLlama2Defaults.MAX_OUTPUT_TOKENS, 25 | model_total_max_tokens: int = AzureLlama2Defaults.MODEL_TOTAL_MAX_TOKENS, 26 | max_concurrency: int = GeneralDefaults.MAX_CONCURRENCY, 27 | max_retries: int = GeneralDefaults.MAX_RETRIES, 28 | event_loop: typing.Optional[AbstractEventLoop] = None 29 | ) -> None: 30 | self._top_p = top_p 31 | self._config = config 32 | 33 | super().__init__( 34 | temperature=temperature, 35 | model_total_max_tokens=model_total_max_tokens, 36 | max_output_tokens=max_output_tokens, 37 | max_concurrency=max_concurrency, 38 | max_retries=max_retries, 39 | event_loop=event_loop 40 | ) 41 | 42 | self._is_json_format_injected_into_prompt = False 43 | 44 | def _create_llm(self) -> AzureMLOnlineEndpointAsync: 45 | model_kwargs = {"max_new_tokens": self._max_output_tokens, "top_p": self._top_p, "do_sample": False} 46 | if self._temperature > 0: 47 | model_kwargs["temperature"] = self._temperature 48 | model_kwargs["do_sample"] = True 49 | 50 | return AzureMLOnlineEndpointAsync( 51 | endpoint_api_key=self._config.api_key, 52 | endpoint_url=self._config.endpoint_url, 53 | model_kwargs=model_kwargs, 54 | content_formatter=LlamaChatContentFormatter(), 55 | deployment_name=self._config.deployment 56 | ) 57 | -------------------------------------------------------------------------------- /docs/usage/error_handling.md: -------------------------------------------------------------------------------- 1 | # Error Handling 2 | 3 | ## Too long prompt 4 | Each LLM has its own context size defined. This is the maximum number of input plus output tokens that the model is able 5 | to consume. `allms` before sending the request to the model automatically checks if your input data will fit into 6 | the model's context size and if not it'll either: 7 | - raise `ValueError` saying that your prompt is too long if the prompt alone has already more tokens than the allowed 8 | maximum context size of the model 9 | - log warning saying that number of prompt tokens plus generated tokens may exceed the max allowed number of tokens of 10 | the model if the number of tokens in the prompt plus the `max_output_tokens` you set for the model is longer than the 11 | allowed maximum context size of the model 12 | 13 | In the first case, the only solution is to truncate the input data, so that it'll fit into the context size of the 14 | model. 15 | 16 | The second case is just a warning, because the model will be able to start the generation, but it may fail randomly 17 | if the number of generated tokens will be long enough to exceed the model maximum context size. In this case you have 18 | two options. You can either truncate the input data or lower the `max_output_tokens` so that they added together won't 19 | exceed the max context size. 20 | 21 | In the future releases, we plan to add automatic long sequences handling. Then the package will be able to automatically 22 | split the whole input into shorter chunks, process them separately and combine the outputs. But it's not there yet. 23 | 24 | 25 | ## Output parsing errors 26 | If you use the [Forcing model response format](forcing_response_format.md) functionality, sometimes the model can 27 | generate a response that actually doesn't comform to the provided output data schema. In this case, `allms` won't 28 | be able to parse the output to the provided output data model class. So as a response you'll get a `ResponseData` where 29 | `ResponseData.response` will be a raw, unparsed response from the model, and the `ResponseData.error` will be 30 | `OutputParserException`. 31 | 32 | 33 | ## API errors 34 | `allms` automatically retries failed requests. But even with this feature, the model can fail to return a response 35 | more times than the maximum number of retries (which is currently set to 8) or some other unexpected errors may occur. 36 | In all of these cases, `ResponseData.error` will contain the exception that occurred. So a good rule of thumb is to 37 | first check the `ResponseData.error` and only if it's empty move to processing the response of the model. -------------------------------------------------------------------------------- /allms/utils/response_parsing_utils.py: -------------------------------------------------------------------------------- 1 | import re 2 | import typing 3 | 4 | from langchain.output_parsers import PydanticOutputParser 5 | from langchain.schema import OutputParserException 6 | from pydantic import ValidationError 7 | 8 | from allms.domain.response import ResponseData, ResponseParsingOutput 9 | 10 | 11 | class ResponseParser: 12 | def __init__(self, parser: PydanticOutputParser) -> None: 13 | self._json_pattern = re.compile(r"{.*?}", re.DOTALL) 14 | self._parser = parser 15 | 16 | def _clean_extracted_json(self, extracted_json: str) -> str: 17 | json_without_newlines = extracted_json.replace("\\n", "") 18 | json_without_backslashes = json_without_newlines.replace("\\", "") 19 | 20 | return json_without_backslashes 21 | 22 | def _extract_json_from_response(self, model_response_data: ResponseData) -> str: 23 | search_results = self._json_pattern.findall(model_response_data.response) 24 | 25 | if len(search_results) == 0: 26 | return model_response_data.response 27 | 28 | return self._clean_extracted_json(search_results[0]) 29 | 30 | def _parse_response( 31 | self, 32 | model_response_data: ResponseData 33 | ) -> ResponseParsingOutput: 34 | raw_response = self._extract_json_from_response(model_response_data) 35 | 36 | try: 37 | return ResponseParsingOutput( 38 | response=self._parser.parse(raw_response), 39 | error_message=None 40 | ) 41 | except OutputParserException as output_parser_exception: 42 | return ResponseParsingOutput( 43 | response=None, 44 | error_message=f""" 45 | An OutputParserException has occurred for the model response: {raw_response} 46 | The exception message: {output_parser_exception} 47 | """ 48 | ) 49 | except ValidationError as validation_error: 50 | return ResponseParsingOutput( 51 | response=None, 52 | error_message=f""" 53 | A ValidationError has occurred for the model response: {model_response_data.response} 54 | The exception message: {validation_error} 55 | """ 56 | ) 57 | 58 | 59 | def parse_model_output( 60 | self, 61 | model_responses_data: typing.List[ResponseData] 62 | ) -> typing.List[ResponseData]: 63 | parsed_responses = [] 64 | 65 | for model_response_data in model_responses_data: 66 | if not model_response_data.error: 67 | response_with_error = self._parse_response(model_response_data) 68 | 69 | parsed_responses.append(ResponseData( 70 | input_data=model_response_data.input_data, 71 | response=response_with_error.response, 72 | error=response_with_error.error_message, 73 | number_of_prompt_tokens=model_response_data.number_of_prompt_tokens, 74 | number_of_generated_tokens=model_response_data.number_of_generated_tokens 75 | 76 | )) 77 | else: 78 | parsed_responses.append(model_response_data) 79 | 80 | return parsed_responses -------------------------------------------------------------------------------- /docs/api/models/azure_mistral_model.md: -------------------------------------------------------------------------------- 1 | ## `class allms.models.AzureMistralModel` API 2 | ### Methods 3 | ```python 4 | __init__( 5 | config: AzureSelfDeployedConfiguration, 6 | temperature: float = 0.0, 7 | top_p: float = 1.0, 8 | max_output_tokens: int = 1024, 9 | model_total_max_tokens: int = 8192, 10 | max_concurrency: int = 1000, 11 | max_retries: int = 8 12 | ) 13 | ``` 14 | #### Parameters 15 | - `config` (`AzureSelfDeployedConfiguration`): an instance of `AzureSelfDeployedConfiguration` class 16 | - `temperature` (`float`): The sampling temperature, between 0 and 1. Higher values like 0.8 will make the output more 17 | random, while lower values like 0.2 will make it more focused and deterministic. Default: `0.0`. 18 | - `top_p` (`float`): Default: `1.0`. 19 | - `max_output_tokens` (`int`): The maximum number of tokens to generate by the model. The total length of input tokens 20 | and generated tokens is limited by the model's context length. Default: `1024`. 21 | - `model_total_max_tokens` (`int`): Context length of the model - maximum number of input plus generated tokens. 22 | Default: `8192`. 23 | - `max_concurrency` (`int`): Maximum number of concurrent requests. Default: `1000`. 24 | - `max_retries` (`int`): Maximum number of retries if a request fails. Default: `8`. 25 | 26 | --- 27 | 28 | ```python 29 | generate( 30 | prompt: str, 31 | input_data: typing.Optional[typing.List[InputData]] = None, 32 | output_data_model_class: typing.Optional[typing.Type[BaseModel]] = None 33 | ) -> typing.List[ResponseData]: 34 | ``` 35 | #### Parameters 36 | - `prompt` (`str`): Prompt to use to query the model. 37 | - `input_data` (`Optional[List[InputData]]`): If prompt contains symbolic variables you can use this parameter to 38 | generate model responses for batch of examples. Each symbolic variable from the prompt should have mapping provided 39 | in the `input_mappings` of `InputData`. 40 | - `output_data_model_class` (`Optional[Type[BaseModel]]`): Generated response is automatically parsed to this class. WARNING: You need to manually provide the JSON format instructions in the prompt, they are not injected for this model. 41 | 42 | Note that Mistral-based models currently don't support system prompts. 43 | 44 | #### Returns 45 | `List[ResponseData]`: Each `ResponseData` contains the response for a single example from `input_data`. If `input_data` 46 | is not provided, the length of this list is equal 1, and the first element is the response for the raw prompt. 47 | 48 | --- 49 | 50 | ## `class allms.domain.configuration.AzureSelfDeployedConfiguration` API 51 | ```python 52 | AzureSelfDeployedConfiguration( 53 | api_key: str, 54 | endpoint_url: str, 55 | deployment: str 56 | ) 57 | ``` 58 | #### Parameters 59 | - `api_key` (`str`): Authentication key for the endpoint. 60 | - `endpoint_url` (`str`): URL of pre-existing endpoint. 61 | - `deployment` (`str`): The name under which the model was deployed. 62 | 63 | --- 64 | 65 | ### Example usage 66 | 67 | ```python 68 | from allms.models import AzureMistralModel 69 | from allms.domain.configuration import AzureSelfDeployedConfiguration 70 | 71 | configuration = AzureSelfDeployedConfiguration( 72 | api_key="", 73 | endpoint_url="", 74 | deployment="" 75 | ) 76 | 77 | mistral_model = AzureMistralModel(config=configuration) 78 | mistral_response = mistral_model.generate("2+2 is?") 79 | ``` -------------------------------------------------------------------------------- /docs/api/models/azure_llama2_model.md: -------------------------------------------------------------------------------- 1 | ## `class allms.models.AzureLlama2Model` API 2 | ### Methods 3 | ```python 4 | __init__( 5 | config: AzureSelfDeployedConfiguration, 6 | temperature: float = 0.0, 7 | top_p: float = 1.0, 8 | max_output_tokens: int = 512, 9 | model_total_max_tokens: int = 4096, 10 | max_concurrency: int = 1000, 11 | max_retries: int = 8 12 | ) 13 | ``` 14 | #### Parameters 15 | - `config` (`AzureSelfDeployedConfiguration`): an instance of `AzureSelfDeployedConfiguration` class 16 | - `temperature` (`float`): The sampling temperature, between 0 and 1. Higher values like 0.8 will make the output more 17 | random, while lower values like 0.2 will make it more focused and deterministic. Default: `0.0`. 18 | - `top_p` (`float`): Default: `1.0`. 19 | - `max_output_tokens` (`int`): The maximum number of tokens to generate by the model. The total length of input tokens 20 | and generated tokens is limited by the model's context length. Default: `512`. 21 | - `model_total_max_tokens` (`int`): Context length of the model - maximum number of input plus generated tokens. 22 | Default: `4096`. 23 | - `max_concurrency` (`int`): Maximum number of concurrent requests. Default: `1000`. 24 | - `max_retries` (`int`): Maximum number of retries if a request fails. Default: `8`. 25 | 26 | --- 27 | 28 | ```python 29 | generate( 30 | prompt: str, 31 | system_prompt: Optional[str] = None, 32 | input_data: typing.Optional[typing.List[InputData]] = None, 33 | output_data_model_class: typing.Optional[typing.Type[BaseModel]] = None 34 | ) -> typing.List[ResponseData]: 35 | ``` 36 | #### Parameters 37 | - `prompt` (`str`): Prompt to use to query the model. 38 | - `system_prompt` (`Optional[str]`): System prompt that will be used by the model. 39 | - `input_data` (`Optional[List[InputData]]`): If prompt contains symbolic variables you can use this parameter to 40 | generate model responses for batch of examples. Each symbolic variable from the prompt should have mapping provided 41 | in the `input_mappings` of `InputData`. 42 | - `output_data_model_class` (`Optional[Type[BaseModel]]`): Generated response is automatically parsed to this class. WARNING: You need to manually provide the JSON format instructions in the prompt, they are not injected for this model. 43 | 44 | #### Returns 45 | `List[ResponseData]`: Each `ResponseData` contains the response for a single example from `input_data`. If `input_data` 46 | is not provided, the length of this list is equal 1, and the first element is the response for the raw prompt. 47 | 48 | --- 49 | 50 | ## `class allms.domain.configuration.AzureSelfDeployedConfiguration` API 51 | ```python 52 | AzureSelfDeployedConfiguration( 53 | api_key: str, 54 | endpoint_url: str, 55 | deployment: str 56 | ) 57 | ``` 58 | #### Parameters 59 | - `api_key` (`str`): Authentication key for the endpoint. 60 | - `endpoint_url` (`str`): URL of pre-existing endpoint. 61 | - `deployment` (`str`): The name under which the model was deployed. 62 | 63 | --- 64 | 65 | ### Example usage 66 | 67 | ```python 68 | from allms.models import AzureLlama2Model 69 | from allms.domain.configuration import AzureSelfDeployedConfiguration 70 | 71 | configuration = AzureSelfDeployedConfiguration( 72 | api_key="", 73 | endpoint_url="", 74 | deployment="" 75 | ) 76 | 77 | llama_model = AzureLlama2Model(config=configuration) 78 | llama_response = llama_model.generate("2+2 is?") 79 | ``` -------------------------------------------------------------------------------- /docs/api/models/vertexai_palm_model.md: -------------------------------------------------------------------------------- 1 | ## `class allms.models.VertexAIPalmModel` API 2 | ### Methods 3 | ```python 4 | __init__( 5 | config: VertexAIConfiguration, 6 | temperature: float = 0.0, 7 | top_k: int = 40, 8 | top_p: float = 0.95, 9 | max_output_tokens: int = 1024, 10 | model_total_max_tokens: int = 8192, 11 | max_concurrency: int = 1000, 12 | max_retries: int = 8, 13 | verbose: bool = True 14 | ) 15 | ``` 16 | #### Parameters 17 | - `config` (`VertexAIConfiguration`): An instance of `VertexAIConfiguration` class 18 | - `temperature` (`float`): The sampling temperature, between 0 and 1. Higher values like 0.8 will make the output more 19 | random, while lower values like 0.2 will make it more focused and deterministic. Default: `0.0`. 20 | - `top_k` (`int`): Changes how the model selects tokens for output. A top-k of 3 means that the next token is selected 21 | from among the 3 most probable tokens. Default: `40`. 22 | - `top_p` (`float`): Top-p changes how the model selects tokens for output. Tokens are selected from most probable to 23 | least until the sum of their probabilities equals the top_p value. Default: `0.95`. 24 | - `max_output_tokens` (`int`): The maximum number of tokens to generate by the model. The total length of input tokens 25 | and generated tokens is limited by the model's context length. Default: `1024`. 26 | - `model_total_max_tokens` (`int`): Context length of the model - maximum number of input plus generated tokens. Default: `8192`. 27 | - `max_concurrency` (`int`): Maximum number of concurrent requests. Default: `1000`. 28 | - `max_retries` (`int`): Maximum number of retries if a request fails. Default: `8`. 29 | - `verbose` (`bool`): Default: `True`. 30 | 31 | --- 32 | 33 | ```python 34 | generate( 35 | prompt: str, 36 | system_prompt: Optional[str] = None, 37 | input_data: typing.Optional[typing.List[InputData]] = None, 38 | output_data_model_class: typing.Optional[typing.Type[BaseModel]] = None 39 | ) -> typing.List[ResponseData]: 40 | ``` 41 | #### Parameters 42 | - `prompt` (`str`): Prompt to use to query the model. 43 | - `system_prompt` (`Optional[str]`): System prompt that will be used by the model. 44 | - `input_data` (`Optional[List[InputData]]`): If prompt contains symbolic variables you can use this parameter to 45 | generate model responses for batch of examples. Each symbolic variable from the prompt should have mapping provided 46 | in the `input_mappings` of `InputData`. 47 | - `output_data_model_class` (`Optional[Type[BaseModel]]`): If provided forces the model to generate output in the 48 | format defined by the passed class. Generated response is automatically parsed to this class. 49 | 50 | #### Returns 51 | `List[ResponseData]`: Each `ResponseData` contains the response for a single example from `input_data`. If `input_data` 52 | is not provided, the length of this list is equal 1, and the first element is the response for the raw prompt. 53 | 54 | --- 55 | 56 | ## `class allms.domain.configuration.VertexAIConfiguration` API 57 | ```python 58 | VertexAIConfiguration( 59 | cloud_project: str, 60 | cloud_location: str, 61 | palm_model_name: str 62 | ) 63 | ``` 64 | #### Parameters 65 | - `cloud_project` (`str`): The GCP project to use when making Vertex API calls. 66 | - `cloud_location` (`str`): The region to use when making API calls. 67 | - `palm_model_name` (`str`): The specific Palm version you want to use. Default value: `text-bison@001`. 68 | 69 | --- 70 | 71 | ### Example usage 72 | 73 | ```python 74 | from allms.models import VertexAIPalmModel 75 | from allms.domain.configuration import VertexAIConfiguration 76 | 77 | configuration = VertexAIConfiguration( 78 | cloud_project="", 79 | cloud_location="", 80 | palm_model_name="" 81 | ) 82 | 83 | vertex_model = VertexAIPalmModel(config=configuration) 84 | vertex_response = vertex_model.generate("2+2 is?") 85 | ``` -------------------------------------------------------------------------------- /docs/api/models/vertexai_gemma.md: -------------------------------------------------------------------------------- 1 | ## `class allms.models.VertexAIGemmaModel` API 2 | ### Methods 3 | ```python 4 | __init__( 5 | config: VertexAIModelGardenConfiguration, 6 | temperature: float = 0.0, 7 | top_k: int = 40, 8 | top_p: float = 0.95, 9 | max_output_tokens: int = 1024, 10 | model_total_max_tokens: int = 8192, 11 | max_concurrency: int = 1000, 12 | max_retries: int = 8, 13 | verbose: bool = True 14 | ) 15 | ``` 16 | #### Parameters 17 | - `config` (`VertexAIModelGardenConfiguration`): An instance of `VertexAIModelGardenConfiguration` class 18 | - `temperature` (`float`): The sampling temperature, between 0 and 1. Higher values like 0.8 will make the output more 19 | random, while lower values like 0.2 will make it more focused and deterministic. Default: `0.0`. 20 | - `top_k` (`int`): Changes how the model selects tokens for output. A top-k of 3 means that the next token is selected 21 | from among the 3 most probable tokens. Default: `40`. 22 | - `top_p` (`float`): Top-p changes how the model selects tokens for output. Tokens are selected from most probable to 23 | least until the sum of their probabilities equals the top_p value. Default: `0.95`. 24 | - `max_output_tokens` (`int`): The maximum number of tokens to generate by the model. The total length of input tokens 25 | and generated tokens is limited by the model's context length. Default: `1024`. 26 | - `model_total_max_tokens` (`int`): Context length of the model - maximum number of input plus generated tokens. Default: `8192`. 27 | - `max_concurrency` (`int`): Maximum number of concurrent requests. Default: `1000`. 28 | - `max_retries` (`int`): Maximum number of retries if a request fails. Default: `8`. 29 | - `verbose` (`bool`): Default: `True`. 30 | 31 | --- 32 | 33 | ```python 34 | generate( 35 | prompt: str, 36 | system_prompt: Optional[str] = None, 37 | input_data: typing.Optional[typing.List[InputData]] = None, 38 | output_data_model_class: typing.Optional[typing.Type[BaseModel]] = None 39 | ) -> typing.List[ResponseData]: 40 | ``` 41 | #### Parameters 42 | - `prompt` (`str`): Prompt to use to query the model. 43 | - `system_prompt` (`Optional[str]`): System prompt that will be used by the model. 44 | - `input_data` (`Optional[List[InputData]]`): If prompt contains symbolic variables you can use this parameter to 45 | generate model responses for batch of examples. Each symbolic variable from the prompt should have mapping provided 46 | in the `input_mappings` of `InputData`. 47 | - `output_data_model_class` (`Optional[Type[BaseModel]]`): Generated response is automatically parsed to this class. WARNING: You need to manually provide the JSON format instructions in the prompt, they are not injected for this model. 48 | 49 | #### Returns 50 | `List[ResponseData]`: Each `ResponseData` contains the response for a single example from `input_data`. If `input_data` 51 | is not provided, the length of this list is equal 1, and the first element is the response for the raw prompt. 52 | 53 | --- 54 | 55 | ## `class allms.domain.configuration.VertexAIModelGardenConfiguration` API 56 | ```python 57 | VertexAIModelGardenConfiguration( 58 | cloud_project: str, 59 | cloud_location: str, 60 | endpoint_id: str 61 | ) 62 | ``` 63 | #### Parameters 64 | - `cloud_project` (`str`): The GCP project to use when making Vertex API calls. 65 | - `cloud_location` (`str`): The region to use when making API calls. 66 | - `endpoint_id` (`str`): ID of an endpoint where the model has been deployed. 67 | 68 | --- 69 | 70 | ### Example usage 71 | 72 | ```python 73 | from allms.models import VertexAIGemmaModel 74 | from allms.domain.configuration import VertexAIModelGardenConfiguration 75 | 76 | configuration = VertexAIModelGardenConfiguration( 77 | cloud_project="", 78 | cloud_location="", 79 | endpoint_id="" 80 | ) 81 | 82 | vertex_model = VertexAIGemmaModel(config=configuration) 83 | vertex_response = vertex_model.generate("2+2 is?") 84 | ``` -------------------------------------------------------------------------------- /docs/api/models/azure_openai_model.md: -------------------------------------------------------------------------------- 1 | ## `class allms.models.AzureOpenAIModel` API 2 | ### Methods 3 | ```python 4 | __init__( 5 | config: AzureOpenAIConfiguration, 6 | temperature: float = 0.0, 7 | max_output_tokens: int = 512, 8 | request_timeout_s: int = 60, 9 | model_total_max_tokens: int = 4096, 10 | max_concurrency: int = 1000, 11 | max_retries: int = 8 12 | ) 13 | ``` 14 | #### Parameters 15 | - `config` (`AzureOpenAIConfiguration`): An instance of `AzureOpenAIConfiguration` class 16 | - `temperature` (`float`): The sampling temperature, between 0 and 1. Higher values like 0.8 will make the output more 17 | random, while lower values like 0.2 will make it more focused and deterministic. Default: `0.0`. 18 | - `max_output_tokens` (`int`): The maximum number of tokens to generate by the model. The total length of input tokens 19 | and generated tokens is limited by the model's context length. Default: `512`. 20 | - `request_timeout_s` (`int`): Timeout for requests to the model. Default: `60`. 21 | - `model_total_max_tokens` (`int`): Context length of the model - maximum number of input plus generated tokens. 22 | Default: `4096`. 23 | - `max_concurrency` (`int`): Maximum number of concurrent requests. Default: `1000`. 24 | - `max_retries` (`int`): Maximum number of retries if a request fails. Default: `8`. 25 | 26 | --- 27 | 28 | ```python 29 | generate( 30 | prompt: str, 31 | system_prompt: Optional[str] = None, 32 | input_data: typing.Optional[typing.List[InputData]] = None, 33 | output_data_model_class: typing.Optional[typing.Type[BaseModel]] = None 34 | ) -> typing.List[ResponseData]: 35 | ``` 36 | #### Parameters 37 | - `prompt` (`str`): Prompt to use to query the model. 38 | - `system_prompt` (`Optional[str]`): System prompt that will be used by the model. 39 | - `input_data` (`Optional[List[InputData]]`): If prompt contains symbolic variables you can use this parameter to 40 | generate model responses for batch of examples. Each symbolic variable from the prompt should have mapping provided 41 | in the `input_mappings` of `InputData`. 42 | - `output_data_model_class` (`Optional[Type[BaseModel]]`): If provided forces the model to generate output in the 43 | format defined by the passed class. Generated response is automatically parsed to this class. 44 | 45 | #### Returns 46 | `List[ResponseData]`: Each `ResponseData` contains the response for a single example from `input_data`. If `input_data` 47 | is not provided, the length of this list is equal 1, and the first element is the response for the raw prompt. 48 | 49 | --- 50 | 51 | ## `class allms.domain.configuration.AzureOpenAIConfiguration` API 52 | ```python 53 | AzureOpenAIConfiguration( 54 | api_key: str, 55 | base_url: str, 56 | api_version: str, 57 | deployment: str, 58 | model_name: str 59 | ) 60 | ``` 61 | Sets up the environment for the `AzureOpenAIModel` model. 62 | #### Parameters 63 | - `api_key` (`str`): The API key for your Azure OpenAI resource. You can find this in the Azure portal under 64 | your Azure OpenAI resource. 65 | - `base_url` (`str`): The base URL for your Azure OpenAI resource. You can find this in the Azure portal under 66 | your Azure OpenAI resource. 67 | - `api_version` (`str`): The API version (for example: `2023-03-15-preview`) 68 | - `deployment` (`str`): The name under which the model was deployed. 69 | - `model_name` (`str`): Model name to use (for example: `{gpt-3.5-turbo, gpt-4}`) 70 | 71 | --- 72 | 73 | ### Example usage 74 | 75 | ```python 76 | from allms.models import AzureOpenAIModel 77 | from allms.domain.configuration import AzureOpenAIConfiguration 78 | 79 | configuration = AzureOpenAIConfiguration( 80 | api_key="", 81 | base_url="", 82 | api_version="", 83 | deployment="", 84 | model_name="" 85 | ) 86 | 87 | gpt_model = AzureOpenAIModel(config=configuration) 88 | gpt_response = gpt_model.generate("2+2 is?") 89 | ``` 90 | -------------------------------------------------------------------------------- /docs/api/models/vertexai_gemini_model.md: -------------------------------------------------------------------------------- 1 | ## `class allms.models.VertexAIGeminiModel` API 2 | ### Methods 3 | ```python 4 | __init__( 5 | config: VertexAIConfiguration, 6 | temperature: float = 0.0, 7 | top_k: int = 40, 8 | top_p: float = 0.95, 9 | max_output_tokens: int = 2048, 10 | model_total_max_tokens: int = 30720, 11 | max_concurrency: int = 1000, 12 | max_retries: int = 8, 13 | verbose: bool = True 14 | ) 15 | ``` 16 | #### Parameters 17 | - `config` (`VertexAIConfiguration`): An instance of `VertexAIConfiguration` class 18 | - `temperature` (`float`): The sampling temperature, between 0 and 1. Higher values like 0.8 will make the output more 19 | random, while lower values like 0.2 will make it more focused and deterministic. Default: `0.0`. 20 | - `top_k` (`int`): Changes how the model selects tokens for output. A top-k of 3 means that the next token is selected 21 | from among the 3 most probable tokens. Default: `40`. 22 | - `top_p` (`float`): Top-p changes how the model selects tokens for output. Tokens are selected from most probable to 23 | least until the sum of their probabilities equals the top_p value. Default: `0.95`. 24 | - `max_output_tokens` (`int`): The maximum number of tokens to generate by the model. The total length of input tokens 25 | and generated tokens is limited by the model's context length. Default: `2048`. 26 | - `model_total_max_tokens` (`int`): Context length of the model - maximum number of input plus generated tokens. Default: `30720`. 27 | - `max_concurrency` (`int`): Maximum number of concurrent requests. Default: `1000`. 28 | - `max_retries` (`int`): Maximum number of retries if a request fails. Default: `8`. 29 | - `verbose` (`bool`): Default: `True`. 30 | 31 | --- 32 | 33 | ```python 34 | generate( 35 | prompt: str, 36 | system_prompt: Optional[str] = None, 37 | input_data: typing.Optional[typing.List[InputData]] = None, 38 | output_data_model_class: typing.Optional[typing.Type[BaseModel]] = None 39 | ) -> typing.List[ResponseData]: 40 | ``` 41 | #### Parameters 42 | - `prompt` (`str`): Prompt to use to query the model. 43 | - `system_prompt` (`Optional[str]`): System prompt that will be used by the model. 44 | - `input_data` (`Optional[List[InputData]]`): If prompt contains symbolic variables you can use this parameter to 45 | generate model responses for batch of examples. Each symbolic variable from the prompt should have mapping provided 46 | in the `input_mappings` of `InputData`. 47 | - `output_data_model_class` (`Optional[Type[BaseModel]]`): If provided forces the model to generate output in the 48 | format defined by the passed class. Generated response is automatically parsed to this class. 49 | 50 | #### Returns 51 | `List[ResponseData]`: Each `ResponseData` contains the response for a single example from `input_data`. If `input_data` 52 | is not provided, the length of this list is equal 1, and the first element is the response for the raw prompt. 53 | 54 | --- 55 | 56 | ## `class allms.domain.configuration.VertexAIConfiguration` API 57 | ```python 58 | VertexAIConfiguration( 59 | cloud_project: str, 60 | cloud_location: str, 61 | gemini_model_name: str 62 | ) 63 | ``` 64 | #### Parameters 65 | - `cloud_project` (`str`): The GCP project to use when making Vertex API calls. 66 | - `cloud_location` (`str`): The region to use when making API calls. 67 | - `gemini_model_name` (`str`): The specific Gemini version you want to use. Default value: `gemini-pro` (i.e. Gemini 1.0) 68 | For an updated list of supported models, please refer to the official [Gemini API documentation](https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#gemini-models). 69 | 70 | --- 71 | 72 | ### Example usage 73 | 74 | ```python 75 | from allms.models import VertexAIGeminiModel 76 | from allms.domain.configuration import VertexAIConfiguration 77 | 78 | configuration = VertexAIConfiguration( 79 | cloud_project="", 80 | cloud_location="", 81 | gemini_model_name="" 82 | ) 83 | 84 | vertex_model = VertexAIGeminiModel(config=configuration) 85 | vertex_response = vertex_model.generate("2+2 is?") 86 | ``` -------------------------------------------------------------------------------- /tests/conftest.py: -------------------------------------------------------------------------------- 1 | import asyncio 2 | import typing 3 | from contextlib import ExitStack 4 | from dataclasses import dataclass 5 | from unittest.mock import patch, Mock 6 | 7 | import pytest 8 | from langchain_community.llms.fake import FakeListLLM 9 | 10 | from allms.domain.configuration import ( 11 | AzureOpenAIConfiguration, AzureSelfDeployedConfiguration, VertexAIConfiguration, VertexAIModelGardenConfiguration) 12 | from allms.models import AzureOpenAIModel, VertexAIPalmModel, AzureLlama2Model 13 | from allms.models.azure_mistral import AzureMistralModel 14 | from allms.models.vertexai_gemini import VertexAIGeminiModel 15 | from allms.models.vertexai_gemma import VertexAIGemmaModel 16 | 17 | 18 | class AzureOpenAIEnv: 19 | OPENAI_API_BASE: str = "https://dummy-endpoint.openai.azure.com/" 20 | OPENAI_API_VERSION: str = "dummy-api-version" 21 | OPENAI_DEPLOYMENT_NAME: str = "dummy-deployment-name" 22 | 23 | 24 | @dataclass 25 | class GenerativeModels: 26 | azure_gpt: typing.Optional[AzureOpenAIModel] = None 27 | vertex_palm: typing.Optional[VertexAIPalmModel] = None 28 | 29 | @dataclass 30 | class Client: 31 | client: typing.Any 32 | 33 | 34 | class ModelWithoutAsyncRequestsMock(FakeListLLM, Client): 35 | def __init__(self, *args, **kwargs): 36 | super().__init__(responses=["{}"], client=Mock()) 37 | 38 | 39 | @pytest.fixture(scope="function") 40 | def models(): 41 | event_loop = asyncio.new_event_loop() 42 | 43 | with ExitStack() as stack: 44 | stack.enter_context(patch("allms.models.vertexai_palm.CustomVertexAI", ModelWithoutAsyncRequestsMock)) 45 | stack.enter_context(patch("allms.models.vertexai_gemini.CustomVertexAI", ModelWithoutAsyncRequestsMock)) 46 | stack.enter_context(patch("allms.models.vertexai_gemma.VertexAIModelGardenWrapper", ModelWithoutAsyncRequestsMock)) 47 | stack.enter_context(patch("allms.models.azure_llama2.AzureMLOnlineEndpointAsync", ModelWithoutAsyncRequestsMock)) 48 | stack.enter_context(patch("allms.models.azure_mistral.AzureMLOnlineEndpointAsync", ModelWithoutAsyncRequestsMock)) 49 | 50 | return { 51 | "azure_open_ai": AzureOpenAIModel( 52 | config=AzureOpenAIConfiguration( 53 | api_key="dummy_api_key", 54 | base_url=AzureOpenAIEnv.OPENAI_API_BASE, 55 | api_version=AzureOpenAIEnv.OPENAI_API_VERSION, 56 | deployment=AzureOpenAIEnv.OPENAI_DEPLOYMENT_NAME, 57 | model_name="gpt-4" 58 | ), 59 | event_loop=event_loop 60 | ), 61 | "vertex_palm": VertexAIPalmModel( 62 | config=VertexAIConfiguration( 63 | cloud_project="dummy-project-id", 64 | cloud_location="us-central1" 65 | ), 66 | event_loop=event_loop 67 | ), 68 | "vertex_gemini": VertexAIGeminiModel( 69 | config=VertexAIConfiguration( 70 | cloud_project="dummy-project-id", 71 | cloud_location="us-central1" 72 | ), 73 | event_loop=event_loop 74 | ), 75 | "vertex_gemma": VertexAIGemmaModel( 76 | config=VertexAIModelGardenConfiguration( 77 | cloud_project="dummy-project-id", 78 | cloud_location="us-central1", 79 | endpoint_id="dummy-endpoint-id" 80 | ), 81 | event_loop=event_loop 82 | ), 83 | "azure_llama2": AzureLlama2Model( 84 | config=AzureSelfDeployedConfiguration( 85 | api_key="dummy_api_key", 86 | endpoint_url="https://dummy-endpoint.dummy-region.inference.ml.azure.com/score", 87 | deployment="dummy_deployment_name" 88 | ), 89 | event_loop=event_loop 90 | ), 91 | "azure_mistral": AzureMistralModel( 92 | config=AzureSelfDeployedConfiguration( 93 | api_key="dummy_api_key", 94 | endpoint_url="https://dummy-endpoint.dummy-region.inference.ml.azure.com/score", 95 | deployment="dummy_deployment_name" 96 | ), 97 | event_loop=event_loop 98 | ) 99 | } 100 | -------------------------------------------------------------------------------- /allms/models/vertexai_base.py: -------------------------------------------------------------------------------- 1 | from typing import List, Optional, Any, Dict 2 | 3 | from google.cloud.aiplatform.models import Prediction 4 | from langchain_core.callbacks import AsyncCallbackManagerForLLMRun 5 | from langchain_core.outputs import LLMResult, Generation 6 | from langchain_google_vertexai import VertexAI, VertexAIModelGarden 7 | from pydash import chain 8 | 9 | from allms.constants.vertex_ai import VertexModelConstants 10 | 11 | 12 | class GCPInvalidRequestError(Exception): 13 | pass 14 | 15 | 16 | class CustomVertexAI(VertexAI): 17 | async def _agenerate( 18 | self, 19 | prompts: List[str], 20 | stop: Optional[List[str]] = None, 21 | run_manager: Optional[AsyncCallbackManagerForLLMRun] = None, 22 | **kwargs: Any, 23 | ) -> LLMResult: 24 | def was_response_blocked(generation: Generation) -> bool: 25 | return ( 26 | generation.generation_info is not None 27 | and 'is_blocked' in generation.generation_info 28 | and generation.generation_info['is_blocked'] 29 | ) 30 | 31 | result = await super()._agenerate( 32 | prompts=prompts, 33 | stop=stop, 34 | run_manager=run_manager, 35 | **kwargs 36 | ) 37 | 38 | if not all(result.generations): 39 | raise GCPInvalidRequestError("The response is empty. It may have been blocked due to content filtering.") 40 | 41 | return LLMResult( 42 | generations=( 43 | chain(result.generations) 44 | .map(lambda generation_candidates: ( 45 | chain(generation_candidates) 46 | .map( 47 | lambda single_candidate: Generation( 48 | text=VertexModelConstants.RESPONSE_BLOCKED_STR 49 | ) if was_response_blocked(single_candidate) else single_candidate 50 | ) 51 | .value() 52 | )) 53 | .value() 54 | ), 55 | llm_output=result.llm_output, 56 | run=result.run 57 | ) 58 | 59 | 60 | class VertexAIModelGardenWrapper(VertexAIModelGarden): 61 | temperature: float = 0.0 62 | max_tokens: int = 128 63 | top_p: float = 0.95 64 | top_k: int = 40 65 | n: int = 1 66 | 67 | def __init__(self, **kwargs: Any) -> None: 68 | super().__init__(**kwargs) 69 | self.allowed_model_args = list(self._default_params.keys()) 70 | 71 | @property 72 | def _default_params(self) -> Dict[str, Any]: 73 | return { 74 | "temperature": self.temperature, 75 | "max_tokens": self.max_tokens, 76 | "top_k": self.top_k, 77 | "top_p": self.top_p, 78 | "n": self.n 79 | } 80 | 81 | def _parse_response(self, predictions: "Prediction", prompts: List[str]) -> LLMResult: 82 | generations: List[List[Generation]] = [] 83 | for result, prompt in zip(predictions.predictions, prompts): 84 | if isinstance(result, str): 85 | generations.append([Generation(text=self._parse_prediction(result, prompt))]) 86 | else: 87 | generations.append( 88 | [ 89 | Generation(text=self._parse_prediction(prediction, prompt)) 90 | for prediction in result 91 | ] 92 | ) 93 | return LLMResult(generations=generations) 94 | 95 | def _parse_prediction(self, prediction: Any, prompt: str) -> str: 96 | parsed_prediction = super()._parse_prediction(prediction) 97 | try: 98 | text_to_remove = f"Prompt:\n{prompt}\nOutput:\n" 99 | return parsed_prediction.rsplit(text_to_remove, maxsplit=1)[1] 100 | except Exception: 101 | raise ValueError(f"Output returned from the model doesn't follow the expected format.") 102 | 103 | async def _agenerate( 104 | self, 105 | prompts: List[str], 106 | stop: Optional[List[str]] = None, 107 | run_manager: Optional[AsyncCallbackManagerForLLMRun] = None, 108 | **kwargs: Any, 109 | ) -> LLMResult: 110 | kwargs = {**kwargs, **self._default_params} 111 | instances = self._prepare_request(prompts, **kwargs) 112 | response = await self.async_client.predict( 113 | endpoint=self.endpoint_path, instances=instances 114 | ) 115 | return self._parse_response(response, prompts) 116 | 117 | -------------------------------------------------------------------------------- /docs/installation_and_quick_start.md: -------------------------------------------------------------------------------- 1 | # Installation 2 | Install the package via pip: 3 | 4 | ```bash 5 | pip install allms 6 | ``` 7 | 8 | # Quick Start 9 | 10 | To use our package, you must have access to the credentials of the endpoint with the deployed model. 11 | Each of the supported models have a different set of credentials 12 | that must be passed in the corresponding configuration object. Below is a brief overview of how to use each of these models. 13 | 14 | ## Simple usage 15 | 16 | ### Azure GPT 17 | 18 | ```python 19 | from allms.models import AzureOpenAIModel 20 | from allms.domain.configuration import AzureOpenAIConfiguration 21 | 22 | configuration = AzureOpenAIConfiguration( 23 | api_key="", 24 | base_url="", 25 | api_version="", 26 | deployment="", 27 | model_name="" 28 | ) 29 | 30 | gpt_model = AzureOpenAIModel(config=configuration) 31 | gpt_response = gpt_model.generate("2+2 is?") 32 | ``` 33 | 34 | * ``: The API key for your Azure OpenAI resource. You can find this in the Azure portal under your 35 | Azure OpenAI resource. 36 | * ``: The base URL for your Azure OpenAI resource. You can find this in the Azure portal under your 37 | Azure OpenAI resource. 38 | * ``: The API version. 39 | * ``: The name under which the model was deployed. 40 | * ``: The underlying model's name. 41 | 42 | ### VertexAI PaLM 43 | 44 | ```python 45 | from allms.models import VertexAIPalmModel 46 | from allms.domain.configuration import VertexAIConfiguration 47 | 48 | configuration = VertexAIConfiguration( 49 | cloud_project="", 50 | cloud_location="" 51 | ) 52 | 53 | palm_model = VertexAIPalmModel(config=configuration) 54 | palm_response = palm_model.generate("2+2 is?") 55 | ``` 56 | 57 | * ``: The GCP project in which you have access to the PALM model. 58 | * ``: The region where the model is deployed. 59 | 60 | ### VertexAI Gemini 61 | 62 | ```python 63 | from allms.models import VertexAIGeminiModel 64 | from allms.domain.configuration import VertexAIConfiguration 65 | 66 | configuration = VertexAIConfiguration( 67 | cloud_project="", 68 | cloud_location="" 69 | ) 70 | 71 | gemini_model = VertexAIGeminiModel(config=configuration) 72 | gemini_response = gemini_model.generate("2+2 is?") 73 | ``` 74 | 75 | * ``: The GCP project in which you have access to the PALM model. 76 | * ``: The region where the model is deployed. 77 | 78 | ### VertexAI Gemma 79 | 80 | ```python 81 | from allms.models import VertexAIGemmaModel 82 | from allms.domain.configuration import VertexAIModelGardenConfiguration 83 | 84 | configuration = VertexAIModelGardenConfiguration( 85 | cloud_project="", 86 | cloud_location="", 87 | endpoint_id="" 88 | ) 89 | 90 | gemini_model = VertexAIGemmaModel(config=configuration) 91 | gemini_response = gemini_model.generate("2+2 is?") 92 | ``` 93 | 94 | * ``: The GCP project in which you have access to the PALM model. 95 | * ``: The region where the model is deployed. 96 | * ``: ID of an endpoint where the model has been deployed. 97 | 98 | ### Azure LLaMA 2 99 | 100 | ```python 101 | from allms.models import AzureLlama2Model 102 | from allms.domain.configuration import AzureSelfDeployedConfiguration 103 | 104 | configuration = AzureSelfDeployedConfiguration( 105 | api_key="", 106 | endpoint_url="", 107 | deployment="" 108 | ) 109 | 110 | llama_model = AzureLlama2Model(config=configuration) 111 | llama_response = llama_model.generate("2+2 is?") 112 | ``` 113 | 114 | * ``: Authentication key for the endpoint. 115 | * ``: URL of pre-existing endpoint. 116 | * ``: The name under which the model was deployed. 117 | 118 | ### Azure Mistral 119 | 120 | ```python 121 | from allms.models.azure_mistral import AzureMistralModel 122 | from allms.domain.configuration import AzureSelfDeployedConfiguration 123 | 124 | configuration = AzureSelfDeployedConfiguration( 125 | api_key="", 126 | endpoint_url="", 127 | deployment="" 128 | ) 129 | 130 | mistral_model = AzureMistralModel(config=configuration) 131 | mistral_response = mistral_model.generate("2+2 is?") 132 | ``` 133 | 134 | * ``: Authentication key for the endpoint. 135 | * ``: URL of pre-existing endpoint. 136 | * ``: The name under which the model was deployed. 137 | -------------------------------------------------------------------------------- /docs/usage/forcing_response_format.md: -------------------------------------------------------------------------------- 1 | # Forcing Structured Response Format 2 | 3 | ## Pydantic BaseModels Integration 4 | 5 | If you want to force the model to output the response in a given JSON schema, `allms` provides an easy way to do 6 | it. You just need to provide a data model that describes the desired output format and the package does all the rest. 7 | As an output you get string already parsed to a provided data model class. 8 | 9 | Here's how to use this functionality step by step: 10 | 11 | 1. Define the desired output data model class. It needs to inherit from pydantic `BaseModel`. Each field should have 12 | type defined and a description provided in `Field()` which should describe what given field means. By providing accurate 13 | description, you make it easier for the model to generate proper response. 14 | 15 | ```python 16 | import typing 17 | 18 | from pydantic import BaseModel, Field 19 | 20 | class ReviewOutputDataModel(BaseModel): 21 | summary: str = Field(description="Summary of a product description") 22 | should_buy: bool = Field(description="Recommendation whether I should buy the product or not") 23 | brand_name: str = Field(description="Brand of the coffee") 24 | aroma:str = Field(description="Description of the coffee aroma") 25 | cons: typing.List[str] = Field(description="List of cons of the coffee") 26 | ``` 27 | 28 | 2. Provide the data model class together with prompt and input data to the `.generate()` method. `allms` will 29 | automatically force the model to output the data in the provided format and will parse the string returned from the 30 | model to the provided data model class. 31 | 32 | ```python 33 | review = "Marketing is doing its job and I was tempted too, but this Blue Orca coffee is nothing above the level of coffees from the supermarket. And the method of brewing or grinding does not help here. The coffee is simply weak - both in terms of strength and taste. I do not recommend." 34 | 35 | prompt = "Summarize review of the coffee. Review: {review}" 36 | input_data = [ 37 | InputData(input_mappings={"review": review}, id="0") 38 | ] 39 | 40 | responses = model.generate( 41 | prompt=prompt, 42 | input_data=input_data, 43 | output_data_model_class=ReviewOutputDataModel 44 | ) 45 | response = responses[0].response 46 | ``` 47 | 48 | Now we can check the response: 49 | 50 | ```python 51 | >>> type(response) 52 | ReviewOutputDataModel 53 | 54 | >>> response.should_buy 55 | False 56 | 57 | >>> response.brand_name 58 | "Blue Orca" 59 | 60 | >>> response.aroma 61 | "Not mentioned in the review" 62 | 63 | >>> response.cons 64 | ['Weak in terms of strength', 'Weak in terms of taste'] 65 | ``` 66 | 67 | ## What to do when output formatting doesn't work? 68 | 69 | The feature described above works only with advanced proprietary models like GPT and PaLM/Gemini. Less capable models like Llama2 or Mistral 70 | are unable to understand instructions passed as output_dataclasses. 71 | 72 | For these less capable models, you need to manually specify in the prompt how the response should look like. You can then pass the `output_data_model_class` to try parsing the output. Using few-shot learning techniques is also advisable. In the case of JSON-like output, use double curly brackets instead of single ones, e.g. `{{"key": "value"}}` instead of `{"key": "value"}`. 73 | 74 | ## How forcing response format works under the hood? 75 | To force the model to provide output in a desired format, under the hood `allms` automatically adds a description 76 | of the desired output format. For example, for the `ReviewOutputDataModel` the description looks like this: 77 | 78 | ````text 79 | The output should be formatted as a JSON instance that conforms to the JSON schema below. 80 | 81 | As an example, for the schema {"properties": {"foo": {"title": "Foo", "description": "a list of strings", "type": "array", "items": {"type": "string"}}}, "required": ["foo"]}} 82 | the object {"foo": ["bar", "baz"]} is a well-formatted instance of the schema. The object {"properties": {"foo": ["bar", "baz"]}} is not well-formatted. 83 | 84 | Here is the output schema: 85 | ``` 86 | {"properties": {"summary": {"title": "Summary", "description": "Summary of a product description", "type": "string"}, "should_buy": {"title": "Should Buy", "description": "Recommendation whether I should buy the product or not", "type": "boolean"}, "brand_name": {"title": "Brand Name", "description": "Brand of the coffee", "type": "string"}, "aroma": {"title": "Aroma", "description": "Description of the coffee aroma", "type": "string"}, "cons": {"title": "Cons", "description": "List of cons of the coffee", "type": "array", "items": {"type": "string"}}}, "required": ["summary", "should_buy", "brand_name", "aroma", "cons"]} 87 | ``` 88 | ```` 89 | 90 | This feature is really helpful, but you have to keep in mind that by using it you increase the number or prompt tokens 91 | so it'll make the requests more costly (if you're using model with per token pricing) 92 | 93 | If the model will return an output that doesn't comform to the defined data model, raw model response will be returned 94 | in `ResponseData.response` and `ResponseData.error` will be `OutputParserException`. -------------------------------------------------------------------------------- /allms/utils/long_text_processing_utils.py: -------------------------------------------------------------------------------- 1 | from typing import List 2 | 3 | from langchain import BasePromptTemplate 4 | from langchain.base_language import BaseLanguageModel 5 | from langchain.schema import Document 6 | 7 | from allms.defaults.long_text_chain import LongTextChainDefaults 8 | 9 | 10 | def truncate_text_to_max_size( 11 | llm: BaseLanguageModel, 12 | prompt_template: BasePromptTemplate, 13 | text: str, 14 | model_total_max_tokens: int, 15 | max_output_tokens: int, 16 | ) -> str: 17 | """ 18 | This function is supposed to truncate the input to fit the maximum context size of a model. The problem is that 19 | the max context size is in tokens and in our code we operate on raw, un-tokenized strings. We can only calculate 20 | how many tokens given string has. So to find the point on which we should truncate, this function calculates in 21 | tokens how many times the current `text` is longer than the allowed limit. Then it assumes that the ration is true 22 | also when reasoning on words instead of tokens. And based on this the `split_point_index` is calculated. 23 | This is only an approximation (ration calculated on tokens is only similar to the ratio calculated on words, but in 24 | most of the cases it won't be the same). That's why this function is used in a recursive way. It calculates the 25 | split point, truncates the text and checks again if the total prompt length is lower than the max context size of 26 | a model. If not it reruns itself again and if yes, then it returns the truncated text. 27 | 28 | Another possibility would be to implement this function using tokenizer to tokenize text to tokens, then truncate 29 | the text, detokenize it to string and return truncated text. But for this solution, first we'd need to have a list 30 | of tokenizers used by every model we'd like to support (now it's provided inside langchain) and second, the 31 | tokenization and de-tokenization steps could change the input prompt by introducing some artifacts. 32 | """ 33 | max_token_limit = get_max_allowed_number_of_tokens(model_total_max_tokens, max_output_tokens) 34 | num_tokens = int(llm.get_num_tokens(prompt_template.format(text=text))) 35 | 36 | if num_tokens <= max_token_limit: 37 | return text 38 | 39 | # We add `text="text"` and not empty string, because the empty string may be tokenized together with the whitespaces 40 | # that are around it in the prompt. But when joining the actual `{text}` with the prompt instructions we get one 41 | # additional token 42 | num_tokens_prompt_wo_text = int(llm.get_num_tokens(prompt_template.format(text="text"))) 43 | num_tokens_text = int(llm.get_num_tokens(text)) 44 | num_tokens_left_for_text = max_token_limit - num_tokens_prompt_wo_text 45 | if num_tokens_left_for_text <= 0: 46 | raise ValueError("Prompt instruction (without the actual text) is longer than the allowed model input length") 47 | 48 | # How many times the current text is longer than the allowed length 49 | current_to_allowed_length_ration = num_tokens_text / num_tokens_left_for_text 50 | words = text.split() 51 | split_point_index = int(len(words) / current_to_allowed_length_ration) 52 | 53 | text_truncated = " ".join(words[:split_point_index]) 54 | 55 | return truncate_text_to_max_size( 56 | llm=llm, 57 | prompt_template=prompt_template, 58 | text=text_truncated, 59 | model_total_max_tokens=model_total_max_tokens, 60 | max_output_tokens=max_output_tokens 61 | ) 62 | 63 | 64 | def split_text_to_max_size( 65 | llm: BaseLanguageModel, 66 | prompt_template: BasePromptTemplate, 67 | text: str, 68 | model_total_max_tokens: int, 69 | max_output_tokens: int, 70 | overlap_size: int = LongTextChainDefaults.OVERLAP_SIZE 71 | ) -> List[Document]: 72 | max_token_limit = get_max_allowed_number_of_tokens(model_total_max_tokens, max_output_tokens) 73 | if int(llm.get_num_tokens(prompt_template.format(text=text))) < max_token_limit: 74 | return [Document(page_content=text)] 75 | 76 | words = text.split() 77 | middle_word_index = len(words) // 2 78 | 79 | overlap_left = overlap_size // 2 80 | overlap_right = overlap_size - overlap_left 81 | data_left_half = " ".join(words[:middle_word_index + overlap_left]) 82 | data_right_half = " ".join(words[middle_word_index - overlap_right:]) 83 | 84 | return ( 85 | split_text_to_max_size(llm=llm, prompt_template=prompt_template, text=data_left_half, 86 | model_total_max_tokens=model_total_max_tokens, max_output_tokens=max_output_tokens) 87 | + split_text_to_max_size(llm=llm, prompt_template=prompt_template, text=data_right_half, 88 | model_total_max_tokens=model_total_max_tokens, max_output_tokens=max_output_tokens) 89 | ) 90 | 91 | 92 | def get_max_allowed_number_of_tokens(model_total_max_tokens: int, max_output_tokens: int) -> int: 93 | buffer = 50 # for things like BOS, EOS and other unexpected things 94 | return model_total_max_tokens - max_output_tokens - buffer 95 | -------------------------------------------------------------------------------- /allms/chains/long_text_processing_chain.py: -------------------------------------------------------------------------------- 1 | import asyncio 2 | from functools import reduce 3 | from typing import List, Any, Tuple, Optional, Union 4 | 5 | from langchain import LLMChain, BasePromptTemplate 6 | from langchain.base_language import BaseLanguageModel 7 | from langchain.chains.combine_documents.base import BaseCombineDocumentsChain 8 | from langchain.schema import Document 9 | 10 | from allms.domain.enumerables import AggregationLogicForLongInputData, LanguageModelTask 11 | from allms.domain.input_data import InputData 12 | from allms.domain.prompt_dto import (AggregateOutputClass, KeywordsOutputClass, SummaryOutputClass) 13 | from allms.utils.long_text_processing_utils import split_text_to_max_size 14 | 15 | 16 | class LongTextProcessingChain(BaseCombineDocumentsChain): 17 | task: LanguageModelTask 18 | model_total_max_tokens: int 19 | max_output_tokens: int 20 | map_llm_chain: LLMChain 21 | reduce_llm_chain: LLMChain 22 | input_data_variable_name: str 23 | aggregation_strategy: AggregationLogicForLongInputData 24 | 25 | @property 26 | def _chain_type(self) -> str: 27 | return "long_description_chain" 28 | 29 | async def combine_docs(self, input_data: Document, **kwargs: Any) -> Tuple[str, dict]: 30 | chunked_input: List[Document] = split_text_to_max_size( 31 | llm=self.map_llm_chain.llm, 32 | prompt_template=self.map_llm_chain.prompt, 33 | text=input_data, 34 | model_total_max_tokens=self.model_total_max_tokens, 35 | max_output_tokens=self.max_output_tokens 36 | ) 37 | 38 | chunk_responses = await self._map_step(chunked_input) 39 | aggregated_response = self._reduce_step(chunk_responses) 40 | 41 | return aggregated_response, {} 42 | 43 | async def acombine_docs(self, input_data: List[Document], **kwargs: Any) -> Tuple[str, dict]: 44 | return await self.combine_docs(input_data) 45 | 46 | async def _map_step(self, chunked_document: List[Document]) -> List[str]: 47 | results = list(map(lambda document: self.map_llm_chain.arun(document), chunked_document)) 48 | return await asyncio.gather(*results) 49 | 50 | def _reduce_step(self, chunk_responses: List[InputData]) -> str: 51 | if self.aggregation_strategy == AggregationLogicForLongInputData.REDUCE_BY_LLM_PROMPTING: 52 | return self._construct_input_from_list_and_run_reduce_chain(chunk_responses) 53 | elif self.aggregation_strategy == AggregationLogicForLongInputData.SIMPLE_CONCATENATION: 54 | if self.task == LanguageModelTask.SUMMARY: 55 | return self._aggregate_results_for_summary(chunk_responses).json() 56 | elif self.task == LanguageModelTask.KEYWORDS: 57 | return self._aggregate_results_for_keywords(chunk_responses).json() 58 | 59 | def _deserialize_response(self, response: str) -> Union[SummaryOutputClass, KeywordsOutputClass]: 60 | if self.task == LanguageModelTask.SUMMARY: 61 | return SummaryOutputClass.parse_raw(response) 62 | elif self.task == LanguageModelTask.KEYWORDS: 63 | return KeywordsOutputClass.parse_raw(response) 64 | 65 | def _construct_input_from_list_and_run_reduce_chain(self, response_list: List[InputData]) -> str: 66 | aggregate_input = Document( 67 | page_content=AggregateOutputClass(summaries=[ 68 | self._deserialize_response(response) for response in response_list] 69 | ).json() 70 | ) 71 | 72 | return self.reduce_llm_chain.run(aggregate_input.text) 73 | 74 | @staticmethod 75 | def _aggregate_results_for_summary(chunk_responses: List[Document]) -> SummaryOutputClass: 76 | return SummaryOutputClass(summary=" ".join([ 77 | SummaryOutputClass.parse_raw(response_json).summary for response_json in chunk_responses 78 | ])) 79 | 80 | @staticmethod 81 | def _aggregate_results_for_keywords(chunk_responses: List[str]) -> KeywordsOutputClass: 82 | return KeywordsOutputClass(keywords=list(reduce( 83 | lambda x, y: x + y, 84 | [KeywordsOutputClass.parse_raw(response_json).keywords for response_json in chunk_responses], 85 | [] 86 | ))) 87 | 88 | 89 | def load_long_text_processing_chain( 90 | task: LanguageModelTask, 91 | llm: BaseLanguageModel, 92 | model_total_max_tokens: int, 93 | max_output_tokens: int, 94 | map_prompt: BasePromptTemplate, 95 | reduce_prompt: BasePromptTemplate, 96 | aggregation_strategy: AggregationLogicForLongInputData, 97 | input_data_variable_name: str = "text", 98 | verbose: Optional[bool] = None 99 | ) -> LongTextProcessingChain: 100 | map_chain = LLMChain(llm=llm, prompt=map_prompt, verbose=verbose) 101 | reduce_chain = LLMChain(llm=llm, prompt=reduce_prompt, verbose=verbose) 102 | 103 | return LongTextProcessingChain( 104 | task=task, 105 | model_total_max_tokens=model_total_max_tokens, 106 | max_output_tokens=max_output_tokens, 107 | map_llm_chain=map_chain, 108 | reduce_llm_chain=reduce_chain, 109 | input_data_variable_name=input_data_variable_name, 110 | aggregation_strategy=aggregation_strategy, 111 | verbose=verbose 112 | ) 113 | -------------------------------------------------------------------------------- /docs/usage/advanced.md: -------------------------------------------------------------------------------- 1 | # Advanced Usage 2 | 3 | ## Symbolic Variables and Batch Mode 4 | 5 | If you want to generate responses for a batch of examples, you can achieve this by preparing a prompt with symbolic 6 | variables and providing input data that will be injected into this prompt. `allms` will automatically make these 7 | requests in an async mode and retry them in case of any API error. 8 | 9 | Let's say we want to classify reviews of coffee as positive or negative. Here's how to do it: 10 | 11 | ```python 12 | from allms.models import AzureOpenAIModel 13 | from allms.domain.configuration import AzureOpenAIConfiguration 14 | from allms.domain.input_data import InputData 15 | 16 | configuration = AzureOpenAIConfiguration( 17 | api_key="", 18 | base_url="", 19 | api_version="", 20 | deployment="", 21 | model_name="" 22 | ) 23 | 24 | model = AzureOpenAIModel(config=configuration) 25 | 26 | positive_review_0 = "Very good coffee, lightly roasted, with good aroma and taste. The taste of sourness is barely noticeable (which is good because I don't like sour coffees). After grinding, the aroma spreads throughout the room. I recommend it to all those who do not like strongly roasted and pitch-black coffees. A very good solution is to close the package with string, which allows you to preserve the aroma and freshness." 27 | positive_review_1 = "Delicious coffee!! Delicate, just the way I like it, and the smell after opening is amazing. It smells freshly roasted. Faithful to Lavazza coffee for years, I decided to look for other flavors. Based on the reviews, I blindly bought it and it was a 10-shot, it outperformed Lavazze in taste. For me the best." 28 | negative_review = "Marketing is doing its job and I was tempted too, but this coffee is nothing above the level of coffees from the supermarket. And the method of brewing or grinding does not help here. The coffee is simply weak - both in terms of strength and taste. I do not recommend." 29 | 30 | prompt = "You'll be provided with a review of a coffe. Decide if the review is positive or negative. Review: {review}" 31 | input_data = [ 32 | InputData(input_mappings={"review": positive_review_0}, id="0"), 33 | InputData(input_mappings={"review": positive_review_1}, id="1"), 34 | InputData(input_mappings={"review": negative_review}, id="2") 35 | ] 36 | 37 | responses = model.generate(prompt=prompt, input_data=input_data) 38 | ``` 39 | 40 | As an output we'll get `List[ResponseData]` where each `ResponseData` will contain response for a single example from 41 | `input_data`. The requests are performed in an async mode, so remember that the order of the `responses` is not the same 42 | as the order of the `input_data`. That's why together with the response, we pass also the `ResponseData.input_data` to 43 | the output. 44 | 45 | So let's see the responses: 46 | ```python 47 | >>> {f"review_id={response.input_data.id}": response.response for response in responses} 48 | { 49 | 'review_id=0': 'The review is positive.', 50 | 'review_id=1': 'The review is positive.', 51 | 'review_id=2': 'The review is negative.' 52 | } 53 | 54 | ``` 55 | 56 | ## Multiple symbolic variables 57 | You can also define prompt with multiple symbolic variables. The rule is that each symbolic variable from the prompt 58 | should have mapping provided in the `input_mappings` of `InputData`. Let's say we want to provide two reviews in one 59 | prompt and let the model decide which one of them is positive. Here's how to do it: 60 | 61 | ```python 62 | prompt = """You'll be provided with two reviews of a coffee. Decide which one is positive. 63 | 64 | First review: {first_review} 65 | Second review: {second_review}""" 66 | input_data = [ 67 | InputData(input_mappings={"first_review": positive_review_0, "second_review": negative_review}, id="0"), 68 | InputData(input_mappings={"first_review": negative_review, "second_review": positive_review_1}, id="1"), 69 | ] 70 | 71 | responses = model.generate(prompt=prompt, input_data=input_data) 72 | ``` 73 | 74 | And the results: 75 | ```python 76 | >>> {f"example_id={response.input_data.id}": response.response for response in responses} 77 | { 78 | 'example_id=0': 'The first review is positive.', 79 | 'example_id=1': 'The second review is positive.' 80 | } 81 | ``` 82 | 83 | ## Controlling the Number of Concurrent Requests 84 | As it's written above, `allms` automatically makes requests in an async mode. By default, the maximum number of 85 | concurrent requests is set to 1000. You can control this value by setting the `max_concurrency` parameter when 86 | initializing the model. Set it to a value that is appropriate for your model endpoint. 87 | 88 | ## Using a common asyncio event loop 89 | By default, each model instance has its own event loop for handling the execution of async tasks. If you want to use 90 | a common loop for multiple models or to have a custom loop, it's possible to specify it in the model constructor: 91 | 92 | ```python 93 | import asyncio 94 | 95 | from allms.models import AzureOpenAIModel 96 | from allms.domain.configuration import AzureOpenAIConfiguration 97 | 98 | custom_event_loop = asyncio.new_event_loop() 99 | 100 | configuration = AzureOpenAIConfiguration( 101 | api_key="", 102 | base_url="", 103 | api_version="", 104 | deployment="", 105 | model_name="" 106 | ) 107 | 108 | model = AzureOpenAIModel( 109 | config=configuration, 110 | event_loop=custom_event_loop 111 | ) 112 | ``` -------------------------------------------------------------------------------- /allms/models/vertexai_gemini.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import typing 3 | from asyncio import AbstractEventLoop 4 | from typing import Optional 5 | 6 | from langchain_core.prompts import ChatPromptTemplate 7 | from vertexai.preview import tokenization 8 | from vertexai.tokenization._tokenizers import Tokenizer 9 | 10 | from allms.defaults.general_defaults import GeneralDefaults 11 | from allms.defaults.vertex_ai import GeminiModelDefaults 12 | from allms.domain.configuration import VertexAIConfiguration 13 | from allms.domain.input_data import InputData 14 | from allms.models.abstract import AbstractModel 15 | from allms.models.vertexai_base import CustomVertexAI 16 | from allms.utils.logger_utils import setup_logger 17 | 18 | 19 | logger = logging.getLogger(__name__) 20 | setup_logger() 21 | 22 | BASE_GEMINI_MODEL_NAMES = [ 23 | "gemini-1.0-pro", 24 | "gemini-1.5-pro", 25 | "gemini-1.5-flash", 26 | # TODO: add `gemini-2.0-flash` when available 27 | ] 28 | 29 | 30 | class VertexAIGeminiModel(AbstractModel): 31 | def __init__( 32 | self, 33 | config: VertexAIConfiguration, 34 | temperature: float = GeminiModelDefaults.TEMPERATURE, 35 | top_k: int = GeminiModelDefaults.TOP_K, 36 | top_p: float = GeminiModelDefaults.TOP_P, 37 | max_output_tokens: int = GeminiModelDefaults.MAX_OUTPUT_TOKENS, 38 | model_total_max_tokens: int = GeminiModelDefaults.MODEL_TOTAL_MAX_TOKENS, 39 | max_concurrency: int = GeneralDefaults.MAX_CONCURRENCY, 40 | max_retries: int = GeneralDefaults.MAX_RETRIES, 41 | verbose: bool = GeminiModelDefaults.VERBOSE, 42 | event_loop: Optional[AbstractEventLoop] = None 43 | ) -> None: 44 | self._top_p = top_p 45 | self._top_k = top_k 46 | self._verbose = verbose 47 | self._config = config 48 | 49 | self._gcp_tokenizer = self._get_gcp_tokenizer(self._config.gemini_model_name) 50 | 51 | super().__init__( 52 | temperature=temperature, 53 | model_total_max_tokens=model_total_max_tokens, 54 | max_output_tokens=max_output_tokens, 55 | max_concurrency=max_concurrency, 56 | max_retries=max_retries, 57 | event_loop=event_loop 58 | ) 59 | 60 | def _create_llm(self) -> CustomVertexAI: 61 | llm = CustomVertexAI( 62 | model_name=self._config.gemini_model_name, 63 | max_output_tokens=self._max_output_tokens, 64 | temperature=self._temperature, 65 | top_p=self._top_p, 66 | top_k=self._top_k, 67 | safety_settings=self._config.gemini_safety_settings, 68 | verbose=self._verbose, 69 | project=self._config.cloud_project, 70 | location=self._config.cloud_location, 71 | api_endpoint=self._config.api_endpoint, 72 | api_transport=self._config.api_transport, 73 | credentials=self._config.credentials, 74 | ) 75 | # NOTE: this param is for some reason not passed, see: langchain_google_vertexai.llms.VertexAI.validate_environment 76 | # `endpoint_version` is not passed to the `ChatVertexAI` constructor 77 | # but in _VertexAIBase, grandparent of VertexAI (VertexAI -> _VertexAICommon -> _VertexAIBase) 78 | # it's set v1beta1 by default 79 | if self._config.endpoint_version: 80 | llm.client.endpoint_version = self._config.endpoint_version 81 | # NOTE: `ChatVertexAI` is child of _VertexAICommon and grandchild of _VertexAIBase, the same as `VertexAI`, 82 | # so they use the same validation in langchain_google_vertexai._base._VertexAIBase.validate_params_base. 83 | # In `validate_params_base` the `default_metadata` is set to `additional_headers`. 84 | # And in constructor of `ChatVertexAI` `additional_headers` is not passed. 85 | # So `default_metadata` is always set to default value. 86 | if self._config.extra_headers: 87 | llm.client.default_metadata = self._config.extra_headers 88 | return llm 89 | 90 | def _get_prompt_tokens_number(self, prompt: ChatPromptTemplate, input_data: InputData) -> int: 91 | return self._gcp_tokenizer.count_tokens( 92 | prompt.format_prompt(**input_data.input_mappings).to_string() 93 | ).total_tokens 94 | 95 | def _get_model_response_tokens_number(self, model_response: typing.Optional[str]) -> int: 96 | if model_response: 97 | return self._gcp_tokenizer.count_tokens(model_response).total_tokens 98 | return 0 99 | 100 | 101 | @staticmethod 102 | def _get_gcp_tokenizer(model_name) -> Tokenizer: 103 | try: 104 | return tokenization.get_tokenizer_for_model(model_name) 105 | except ValueError: 106 | for base_model_name in BASE_GEMINI_MODEL_NAMES: 107 | if model_name.startswith(base_model_name): 108 | return tokenization.get_tokenizer_for_model(base_model_name) 109 | else: 110 | # Currently supported models for token listing and counting 111 | # https://cloud.google.com/vertex-ai/generative-ai/docs/multimodal/list-token#supported-models 112 | # `gemini-2.0` family of models is not supported yet, hence we need this workaround 113 | logger.info( 114 | f"Model %s is not supported for tokenization, using default tokenizer:" 115 | f" {GeminiModelDefaults.GCP_MODEL_NAME}", 116 | model_name 117 | ) 118 | return tokenization.get_tokenizer_for_model(GeminiModelDefaults.GCP_MODEL_NAME) 119 | raise 120 | 121 | -------------------------------------------------------------------------------- /tests/resources/test_input_data.csv: -------------------------------------------------------------------------------- 1 | text,id 2 | "

Indywidualna racja żywnościowa wojskowa S-R-9 (set nr 9)

Skład zestawu :

  • Makaron po bolońsku (300g)
  • Konserwa tyrolska (100g)
  • Suchary (90 g)
  • Koncentrat napoju herbacianego instant o smaku owoców leśnych (15g)
  • Dżem malinowy (25 g)
  • Baton zbożowo- owocowy o smaku figowym (35 g)
  • Guma do żucia (2 szt)
  • Cukierek z ekstraktem z kawy naturalnej (1 szt)
  • Cukierek z witaminą C (1 szt)
  • Sól (1 g)
  • Pieprz (0,2 g)
  • Serwetka (1 szt)
  • Chusteczka nawilżona (1 szt)
  • Papier toaletowy ( 150 cm)
  • Torebka strunowa (1 szt)
  • Łyżka jednorazowa (1 szt)
  • Rurka do napojów ( 1 szt)
  • Bezpłomieniowy podgrzewacz chemiczny (1 szt)

Racja żywnościowa to często jedyny sposób, aby przetrwać trudne chwile i doczekać momentu, w którym nadejdzie pomoc. Z takiego rozwiązania korzystają wojskowi, ale także survivalowcy. Nic też nie stoi na przeszkodzie, aby racje żywnościowe zabrać ze sobą za każdym razem, kiedy wybierasz się w długą podróż.

WAŻNOŚĆ ZESTAWU TO MINIMUM 30.11.2023r.

Posiadam również inne numery racji !

Zapraszam do zapoznania się z aukcjami .

Prosimy nie sugerować się zdjęciami . Zdjęcia są tylko poglądowe .

",12181 3 | "

Witam,

Przedmiotem mojej oferty jest:

Indywidualna racja żywnościowa żołnierza Wojska Polskiego S-R-6.

Skład racji żywnościowej:

  • bigos z kiełbasą (300 g)
  • gulasz angielski(100 g)
  • suchary (90 g)
  • dżem jagodowy(25 g)
  • koncentrat napoju herbacianego instant o smaku cytrynowym (15 g)
  • baton zbożowo- owocowy o smaku wiśniowym(1 szt )
  • guma do żucia (2 szt)
  • cukierek zawierający ekstrakt kawy naturalnej (1 szt)
  • cukierek zawierający witaminę C (1 szt)
  • sól, pieprz
  • serwetka papierowa (1 szt)
  • serwetka nawilżona (1 szt)
  • papier toaletowy ( 1 szt)
  • woreczek strunowy (1 szt)
  • łyżka jednorazowa (1 szt)
  • rurka do napojów ( 1 szt)
  • bezpłomieniowy podgrzewacz chemiczny (1 szt)

Racje żywnościowe wydawane są żołnierzom, gdy nie można zapewnić im ciepłego posiłku z kuchni polowej, na przykład na szkoleniach poligonowych. Nie oznacza to jednak, że żołnierz będzie jadł zimny posiłek. W każdej racji znajduje się podgrzewacz chemiczny, który w łatwy sposób w krótkim czasie pozwala podgrzać posiłek bez użycia ognia. Racje są często stosowane również przez harcerzy, turystów(szczególnie pieszych), himalaistów, wędkarzy, żeglarzy i grotołazów.

(Zdjęcia poglądowe, proszę sugerować się menu i datą podanymi w opisie)

Ważność zestawu minimum 30.11.2023 r.

W mojej subiektywnej ocenie termin przydatności w przypadku tego rodzaju pożywienia nie jest wskaźnikiem przydatności. Regulacje prawne nie pozwalają na wydłużone terminy przydatności dla żywności, a najczęściej podanym terminem przydatności jest produkt o najkrótszym terminie ( np. dżem, powidła etc.)

Zapraszam na inne moje oferty - posiadam inne numery!!!!

",12181 4 | "

Kross Level 8.0 szary pp 2023. Koła 29 cali, rama 20"".

Rama i widelec

  • WYKOŃCZENIE LAKIERU:POŁYSK
  • MATERIAŁ RAMY:CARBON NO.2
  • WIDELEC:ROCK SHOX RECON SILVER RL
  • SKOK WIDELCA:100MM
  • TYLNY AMORTYZATOR:BRAK
  • SKOK TYLNEGO AMORTYZATORA:BRAK

Napęd

  • PRZERZUTKA PRZÓD:BRAK
  • PRZERZUTKA TYŁ:SHIMANO DEORE M6100
  • MANETKI:SHIMANO DEORE M6100
  • KORBA:PROWHEEL MPX-CR094S-TT-B
  • KORONKI:34T/170MM
  • KASETA / WOLNOBIEG:MTB-CS-HR-1050AHS
  • ZAKRES KASETY/WOLNOBIEGU:10-50T
  • ILOŚĆ PRZEŁOŻEŃ:12
  • SUPORT:SHIMANO BSA BB52
  • ŁAŃCUCH:SHIMANO M6100

Koła

  • PIASTA PRZÓD:GL-B93F-B/CL-X15 32H
  • PIASTA TYŁ:GL-B93R-SB/CL-X12 MS 32H
  • OBRĘCZE:KROSS
  • OPONY:SCHWALBE RAPID ROB 29X2.25

Hamulce

  • HAMULEC PRZÓD:SHIMANO MT200
  • HAMULEC TYŁ:SHIMANO MT200
  • DŹWIGNIE HAMULCA:SHIMANO MT200
  • TARCZE HAMULCOWE:SHIMANO (160)
  • TARCZE HAMULCOWE TYŁ:SHIMANO (160)

Komponenty

  • KIEROWNICA:ALUMINIUM 720 MM 31.8
  • WSPORNIK KIEROWNICY:ALUMINIUM 31,8/7° (S - 60 MM, M, L - 70 MM, XL - 80 MM)
  • SIODŁO:SELLE ROYAL
  • WSPORNIK SIODŁA:ALUMINIUM 27,2X400
  • STERY:FSA ORBIT C-40-ACB NO.42
  • CHWYTY:KROSS SCALE

System e-bike

  • SILNIK:BRAK
  • BATERIA:BRAK
  • WYŚWIETLACZ:BRAK

Informacje dodatkowe

  • WAGA [KG]:S - 12,67 KG, M - 12,7 KG, L - 12,75 KG, XL - BD
",16483 5 | "

Witam,

Przedmiotem mojej oferty jest:

Indywidualna racja żywnościowa żołnierza Armii USA MRE nr 6

Oryginalna racja żywnościowa US Army MRE (Meal, Ready-to-Eat) przeznaczona dla jednej osoby. Całość zamknięta została w szczelnym opakowaniu dzięki czemu posiada długi okres przydatności do spożycia. Dodatkowo każdy z produktów wewnątrz posiada osobne opakowanie. Przyrządzanie racji odbywa się za pomocą bezpłomieniowego podgrzewacza chemicznego, dzięki czemu możemy zjeść ciepły posiłek bez konieczności rozpalania ogniska. Każde Menu posiada inne danie główne.

Skład racji żywnościowej:

Menu 6

Beef Taco

Santa Fe Style Rice and Beans

Cheddar Cheese Spread

Tortillas

Nut and Fruit Mix w/ M&Ms

Orange Beverage Base, Sugar-free

Coffee

Creamer

Sugar

Matches

Chewing Gum, Sugar Free

Towelette

Salt

Toilet Tissue

(Zdjęcia poglądowe, proszę sugerować się menu i datą podanymi w opisie)

Ważność zestawu minimum 12.2023 r.

W mojej subiektywnej ocenie termin przydatności w przypadku tego rodzaju pożywienia nie jest wskaźnikiem przydatności. Regulacje prawne nie pozwalają na wydłużone terminy przydatności dla żywności, a najczęściej podanym terminem przydatności jest produkt o najkrótszym terminie ( np. dżem, powidła etc.)

Skład zestawu może się delikatnie różnić w zależności od partii produkcji.

Zapraszam na inne moje oferty - posiadam inne numery!!!!

",19504 6 | -------------------------------------------------------------------------------- /tests/test_output_parser.py: -------------------------------------------------------------------------------- 1 | import json 2 | from unittest.mock import patch 3 | 4 | import pytest 5 | 6 | from allms.domain.input_data import InputData 7 | from allms.domain.prompt_dto import SummaryOutputClass, KeywordsOutputClass 8 | 9 | 10 | class TestOutputModelParserForDifferentModelOutputs: 11 | @patch("langchain.chains.base.Chain.arun") 12 | @patch("langchain_google_vertexai.llms.VertexAI.get_num_tokens") 13 | def test_output_parser_returns_desired_format(self, tokens_mock, chain_run_mock, models): 14 | # GIVEN 15 | text_output = "This is the model output" 16 | expected_model_response = json.dumps({"summary": text_output}) 17 | chain_run_mock.return_value = expected_model_response 18 | tokens_mock.return_value = 1 19 | 20 | input_data = [InputData(input_mappings={"text": "Some dummy text"}, id="1")] 21 | prompt = "Some Dummy Prompt {text}" 22 | 23 | # WHEN & THEN 24 | for model in models.values(): 25 | model_response = model.generate(prompt, input_data, SummaryOutputClass) 26 | assert type(model_response[0].response) == SummaryOutputClass 27 | assert model_response[0].response.summary == text_output 28 | 29 | @patch("langchain.chains.base.Chain.arun") 30 | @patch("langchain_google_vertexai.llms.VertexAI.get_num_tokens") 31 | def test_output_parser_returns_error_when_model_output_returns_different_field(self, tokens_mock, chain_run_mock, models): 32 | # GIVEN 33 | text_output = "This is the model output" 34 | expected_model_response = json.dumps({"other_key": text_output}) 35 | chain_run_mock.return_value = expected_model_response 36 | tokens_mock.return_value = 1 37 | 38 | input_data = [InputData(input_mappings={"text": "Some dummy text"}, id="1")] 39 | prompt = "Some Dummy Prompt {text}" 40 | 41 | # WHEN & THEN 42 | for model in models.values(): 43 | model_response = model.generate(prompt, input_data, SummaryOutputClass) 44 | assert "OutputParserException" in model_response[0].error 45 | assert model_response[0].response is None 46 | 47 | @patch("langchain.chains.base.Chain.arun") 48 | @patch("langchain_google_vertexai.llms.VertexAI.get_num_tokens") 49 | @pytest.mark.parametrize("json_response", [ 50 | ("{\"summary\": \"This is the model output\"}"), 51 | ("Sure! Here's the JSON you wanted: {\"summary\": \"This is the model output\"} Have a nice day!"), 52 | ("<>\\n{\\n \"summary\": \"This is the model output\"\\n}\\n<>"), 53 | ("{\\\"summary\\\": \\\"This is the model output\\\"}\\n}") 54 | ]) 55 | def test_output_parser_extracts_json_from_response(self, tokens_mock, chain_run_mock, models, json_response): 56 | # GIVEN 57 | chain_run_mock.return_value = json_response 58 | tokens_mock.return_value = 1 59 | 60 | input_data = [InputData(input_mappings={"text": "Some dummy text"}, id="1")] 61 | prompt = "Some Dummy Prompt {text}" 62 | 63 | # WHEN & THEN 64 | for model in models.values(): 65 | model_response = model.generate(prompt, input_data, SummaryOutputClass) 66 | assert model_response[0].response == SummaryOutputClass(summary="This is the model output") 67 | 68 | @patch("langchain.chains.base.Chain.arun") 69 | @patch("langchain_google_vertexai.llms.VertexAI.get_num_tokens") 70 | def test_output_parser_returns_error_when_json_is_garbled(self, tokens_mock, chain_run_mock, models): 71 | # GIVEN 72 | chain_run_mock.return_value = "Sure! Here's the JSON you wanted: {\"summary: \"text\"}" 73 | tokens_mock.return_value = 1 74 | 75 | input_data = [InputData(input_mappings={"text": "Some dummy text"}, id="1")] 76 | prompt = "Some Dummy Prompt {text}" 77 | 78 | # WHEN & THEN 79 | for model in models.values(): 80 | model_response = model.generate(prompt, input_data, SummaryOutputClass) 81 | assert "OutputParserException" in model_response[0].error 82 | assert model_response[0].response is None 83 | 84 | @patch("langchain.chains.base.Chain.arun") 85 | @patch("langchain_google_vertexai.llms.VertexAI.get_num_tokens") 86 | def test_output_parser_returns_parsed_class_when_model_output_returns_too_many_fields(self, tokens_mock, chain_run_mock, models): 87 | # GIVEN 88 | text_output = "This is the model output" 89 | expected_model_response = json.dumps({"other_key": text_output, "summary": text_output}) 90 | chain_run_mock.return_value = expected_model_response 91 | tokens_mock.return_value = 1 92 | 93 | input_data = [InputData(input_mappings={"text": "Some dummy text"}, id="1")] 94 | prompt = "Some Dummy Prompt {text}" 95 | 96 | # WHEN & THEN 97 | for model in models.values(): 98 | model_response = model.generate(prompt, input_data, SummaryOutputClass) 99 | assert type(model_response[0].response) == SummaryOutputClass 100 | assert model_response[0].response.summary == text_output 101 | 102 | @patch("langchain.chains.base.Chain.arun") 103 | @patch("langchain_google_vertexai.llms.VertexAI.get_num_tokens") 104 | def test_model_returns_output_as_python_list_correctly(self, tokens_mock, chain_run_mock, models): 105 | # GIVEN 106 | text_output = ["1", "2", "3"] 107 | expected_model_response = json.dumps({"text": text_output, "keywords": text_output}) 108 | chain_run_mock.return_value = expected_model_response 109 | tokens_mock.return_value = 1 110 | 111 | input_data = [InputData(input_mappings={"text": "Some dummy text"}, id="1")] 112 | prompt = "Some Dummy Prompt {text}" 113 | 114 | # WHEN & THEN 115 | for model in models.values(): 116 | model_response = model.generate(prompt, input_data, KeywordsOutputClass) 117 | assert type(model_response[0].response) == KeywordsOutputClass 118 | assert model_response[0].response.keywords == list(map(str, text_output)) 119 | 120 | @patch("langchain.chains.base.Chain.arun") 121 | @patch("langchain_google_vertexai.llms.VertexAI.get_num_tokens") 122 | def test_model_output_when_input_data_is_empty(self, tokens_mock, chain_run_mock, models): 123 | # GIVEN 124 | expected_model_response = "2+2 is 4" 125 | chain_run_mock.return_value = expected_model_response 126 | tokens_mock.return_value = 1 127 | 128 | prompt = "2+2 is..." 129 | 130 | # WHEN & THEN 131 | for model in models.values(): 132 | model_response = model.generate(prompt, None, KeywordsOutputClass) 133 | assert model_response[0].response is None 134 | assert "OutputParserException" in model_response[0].error -------------------------------------------------------------------------------- /tests/resources/test_end_to_end_expected_output.csv: -------------------------------------------------------------------------------- 1 | text,response,id,number_of_prompt_tokens,number_of_generated_tokens 2 | "

Indywidualna racja żywnościowa wojskowa S-R-9 (set nr 9)

Skład zestawu :

  • Makaron po bolońsku (300g)
  • Konserwa tyrolska (100g)
  • Suchary (90 g)
  • Koncentrat napoju herbacianego instant o smaku owoców leśnych (15g)
  • Dżem malinowy (25 g)
  • Baton zbożowo- owocowy o smaku figowym (35 g)
  • Guma do żucia (2 szt)
  • Cukierek z ekstraktem z kawy naturalnej (1 szt)
  • Cukierek z witaminą C (1 szt)
  • Sól (1 g)
  • Pieprz (0,2 g)
  • Serwetka (1 szt)
  • Chusteczka nawilżona (1 szt)
  • Papier toaletowy ( 150 cm)
  • Torebka strunowa (1 szt)
  • Łyżka jednorazowa (1 szt)
  • Rurka do napojów ( 1 szt)
  • Bezpłomieniowy podgrzewacz chemiczny (1 szt)

Racja żywnościowa to często jedyny sposób, aby przetrwać trudne chwile i doczekać momentu, w którym nadejdzie pomoc. Z takiego rozwiązania korzystają wojskowi, ale także survivalowcy. Nic też nie stoi na przeszkodzie, aby racje żywnościowe zabrać ze sobą za każdym razem, kiedy wybierasz się w długą podróż.

WAŻNOŚĆ ZESTAWU TO MINIMUM 30.11.2023r.

Posiadam również inne numery racji !

Zapraszam do zapoznania się z aukcjami .

Prosimy nie sugerować się zdjęciami . Zdjęcia są tylko poglądowe .

","['Indywidualna racja żywnościowa', 3 | 'wojskowa', 4 | 'S-R-9', 5 | 'set nr 9', 6 | 'Makaron po bolońsku', 7 | 'Konserwa tyrolska', 8 | 'Suchary', 9 | 'Koncentrat napoju herbacianego instant o smaku owoców leśnych', 10 | 'Dżem malinowy', 11 | 'Baton zbożowo-owocowy o smaku figowym']",12181,772,105 12 | "

Witam,

Przedmiotem mojej oferty jest:

Indywidualna racja żywnościowa żołnierza Wojska Polskiego S-R-6.

Skład racji żywnościowej:

  • bigos z kiełbasą (300 g)
  • gulasz angielski(100 g)
  • suchary (90 g)
  • dżem jagodowy(25 g)
  • koncentrat napoju herbacianego instant o smaku cytrynowym (15 g)
  • baton zbożowo- owocowy o smaku wiśniowym(1 szt )
  • guma do żucia (2 szt)
  • cukierek zawierający ekstrakt kawy naturalnej (1 szt)
  • cukierek zawierający witaminę C (1 szt)
  • sól, pieprz
  • serwetka papierowa (1 szt)
  • serwetka nawilżona (1 szt)
  • papier toaletowy ( 1 szt)
  • woreczek strunowy (1 szt)
  • łyżka jednorazowa (1 szt)
  • rurka do napojów ( 1 szt)
  • bezpłomieniowy podgrzewacz chemiczny (1 szt)

Racje żywnościowe wydawane są żołnierzom, gdy nie można zapewnić im ciepłego posiłku z kuchni polowej, na przykład na szkoleniach poligonowych. Nie oznacza to jednak, że żołnierz będzie jadł zimny posiłek. W każdej racji znajduje się podgrzewacz chemiczny, który w łatwy sposób w krótkim czasie pozwala podgrzać posiłek bez użycia ognia. Racje są często stosowane również przez harcerzy, turystów(szczególnie pieszych), himalaistów, wędkarzy, żeglarzy i grotołazów.

(Zdjęcia poglądowe, proszę sugerować się menu i datą podanymi w opisie)

Ważność zestawu minimum 30.11.2023 r.

W mojej subiektywnej ocenie termin przydatności w przypadku tego rodzaju pożywienia nie jest wskaźnikiem przydatności. Regulacje prawne nie pozwalają na wydłużone terminy przydatności dla żywności, a najczęściej podanym terminem przydatności jest produkt o najkrótszym terminie ( np. dżem, powidła etc.)

Zapraszam na inne moje oferty - posiadam inne numery!!!!

","['Indywidualna racja żywnościowa', 13 | 'wojskowa', 14 | 'S-R-9', 15 | 'set nr 9', 16 | 'Makaron po bolońsku', 17 | 'Konserwa tyrolska', 18 | 'Suchary', 19 | 'Koncentrat napoju herbacianego instant o smaku owoców leśnych', 20 | 'Dżem malinowy', 21 | 'Baton zbożowo-owocowy o smaku figowym']",12181,942,105 22 | "

Kross Level 8.0 szary pp 2023. Koła 29 cali, rama 20"".

Rama i widelec

  • WYKOŃCZENIE LAKIERU:POŁYSK
  • MATERIAŁ RAMY:CARBON NO.2
  • WIDELEC:ROCK SHOX RECON SILVER RL
  • SKOK WIDELCA:100MM
  • TYLNY AMORTYZATOR:BRAK
  • SKOK TYLNEGO AMORTYZATORA:BRAK

Napęd

  • PRZERZUTKA PRZÓD:BRAK
  • PRZERZUTKA TYŁ:SHIMANO DEORE M6100
  • MANETKI:SHIMANO DEORE M6100
  • KORBA:PROWHEEL MPX-CR094S-TT-B
  • KORONKI:34T/170MM
  • KASETA / WOLNOBIEG:MTB-CS-HR-1050AHS
  • ZAKRES KASETY/WOLNOBIEGU:10-50T
  • ILOŚĆ PRZEŁOŻEŃ:12
  • SUPORT:SHIMANO BSA BB52
  • ŁAŃCUCH:SHIMANO M6100

Koła

  • PIASTA PRZÓD:GL-B93F-B/CL-X15 32H
  • PIASTA TYŁ:GL-B93R-SB/CL-X12 MS 32H
  • OBRĘCZE:KROSS
  • OPONY:SCHWALBE RAPID ROB 29X2.25

Hamulce

  • HAMULEC PRZÓD:SHIMANO MT200
  • HAMULEC TYŁ:SHIMANO MT200
  • DŹWIGNIE HAMULCA:SHIMANO MT200
  • TARCZE HAMULCOWE:SHIMANO (160)
  • TARCZE HAMULCOWE TYŁ:SHIMANO (160)

Komponenty

  • KIEROWNICA:ALUMINIUM 720 MM 31.8
  • WSPORNIK KIEROWNICY:ALUMINIUM 31,8/7° (S - 60 MM, M, L - 70 MM, XL - 80 MM)
  • SIODŁO:SELLE ROYAL
  • WSPORNIK SIODŁA:ALUMINIUM 27,2X400
  • STERY:FSA ORBIT C-40-ACB NO.42
  • CHWYTY:KROSS SCALE

System e-bike

  • SILNIK:BRAK
  • BATERIA:BRAK
  • WYŚWIETLACZ:BRAK

Informacje dodatkowe

  • WAGA [KG]:S - 12,67 KG, M - 12,7 KG, L - 12,75 KG, XL - BD
","['Indywidualna racja żywnościowa', 23 | 'wojskowa', 24 | 'S-R-9', 25 | 'set nr 9', 26 | 'Makaron po bolońsku', 27 | 'Konserwa tyrolska', 28 | 'Suchary', 29 | 'Koncentrat napoju herbacianego instant o smaku owoców leśnych', 30 | 'Dżem malinowy', 31 | 'Baton zbożowo-owocowy o smaku figowym']",16483,1152,105 32 | "

Witam,

Przedmiotem mojej oferty jest:

Indywidualna racja żywnościowa żołnierza Armii USA MRE nr 6

Oryginalna racja żywnościowa US Army MRE (Meal, Ready-to-Eat) przeznaczona dla jednej osoby. Całość zamknięta została w szczelnym opakowaniu dzięki czemu posiada długi okres przydatności do spożycia. Dodatkowo każdy z produktów wewnątrz posiada osobne opakowanie. Przyrządzanie racji odbywa się za pomocą bezpłomieniowego podgrzewacza chemicznego, dzięki czemu możemy zjeść ciepły posiłek bez konieczności rozpalania ogniska. Każde Menu posiada inne danie główne.

Skład racji żywnościowej:

Menu 6

Beef Taco

Santa Fe Style Rice and Beans

Cheddar Cheese Spread

Tortillas

Nut and Fruit Mix w/ M&Ms

Orange Beverage Base, Sugar-free

Coffee

Creamer

Sugar

Matches

Chewing Gum, Sugar Free

Towelette

Salt

Toilet Tissue

(Zdjęcia poglądowe, proszę sugerować się menu i datą podanymi w opisie)

Ważność zestawu minimum 12.2023 r.

W mojej subiektywnej ocenie termin przydatności w przypadku tego rodzaju pożywienia nie jest wskaźnikiem przydatności. Regulacje prawne nie pozwalają na wydłużone terminy przydatności dla żywności, a najczęściej podanym terminem przydatności jest produkt o najkrótszym terminie ( np. dżem, powidła etc.)

Skład zestawu może się delikatnie różnić w zależności od partii produkcji.

Zapraszam na inne moje oferty - posiadam inne numery!!!!

","['Indywidualna racja żywnościowa', 33 | 'wojskowa', 34 | 'S-R-9', 35 | 'set nr 9', 36 | 'Makaron po bolońsku', 37 | 'Konserwa tyrolska', 38 | 'Suchary', 39 | 'Koncentrat napoju herbacianego instant o smaku owoców leśnych', 40 | 'Dżem malinowy', 41 | 'Baton zbożowo-owocowy o smaku figowym']",19504,795,105 42 | -------------------------------------------------------------------------------- /tests/test_model_behavior_for_different_input_data.py: -------------------------------------------------------------------------------- 1 | import logging 2 | 3 | import allms.exceptions.validation_input_data_exceptions as input_validation_messages 4 | from unittest.mock import patch 5 | 6 | import pytest 7 | 8 | from allms.domain.input_data import InputData 9 | from allms.domain.response import ResponseData 10 | import allms.models as llm_models 11 | 12 | 13 | class TestModelBehaviorForDifferentInput: 14 | 15 | @patch("langchain.chains.base.Chain.arun") 16 | def test_no_input_variables_provided_in_the_prompt_raise_exception(self, chain_run_mock, models): 17 | for model in models.values(): 18 | chain_run_mock.return_value = "{}" 19 | 20 | input_data = [InputData(input_mappings={"text": "Some dummy text"}, id="1")] 21 | 22 | prompt = "Some Dummy Prompt without input variable" 23 | 24 | with pytest.raises(ValueError, match=input_validation_messages.get_missing_input_data_in_prompt_message( 25 | input_data[0].id)) as expected_value_exception: 26 | model.generate(prompt, input_data) 27 | 28 | @patch("langchain.chains.base.Chain.arun") 29 | def test_no_input_variables_provided_in_the_input_data_raise_exception(self, chain_run_mock, models): 30 | for model in models.values(): 31 | chain_run_mock.return_value = "{}" 32 | 33 | input_data = [InputData(input_mappings={}, id="1")] 34 | 35 | prompt = "Some Dummy Prompt without input variable :{text}" 36 | 37 | with pytest.raises(ValueError, match=input_validation_messages.get_missing_input_data_in_input_data_message( 38 | input_data[0].id)) as expected_value_exception: 39 | model.generate(prompt, input_data) 40 | 41 | @patch("langchain.chains.base.Chain.arun") 42 | def test_different_input_keys_provided_in_input_data_and_prompt(self, chain_run_mock, models): 43 | for model in models.values(): 44 | chain_run_mock.return_value = "{}" 45 | 46 | input_data = [InputData(input_mappings={"text": "Some dummy text", "text_2": "Another dummy text"}, id="1")] 47 | 48 | prompt = "Some Dummy Prompt without input variable {text} {text_1}" 49 | 50 | with pytest.raises(ValueError, match=input_validation_messages.get_different_input_keys_message( 51 | input_data[0].id)) as expected_value_exception: 52 | model.generate(prompt, input_data) 53 | 54 | @patch("langchain.chains.base.Chain.arun") 55 | def test_different_number_of_input_keys_provided_in_input_data_and_prompt(self, chain_run_mock, models): 56 | for model in models.values(): 57 | chain_run_mock.return_value = "{}" 58 | 59 | input_data = [InputData(input_mappings={"text": "Some dummy text", "text_2": "Another dummy text"}, id="1")] 60 | 61 | prompt = "Some Dummy Prompt without input variable {text} {text_1} {text_2}" 62 | 63 | with pytest.raises(ValueError, match=input_validation_messages.get_different_number_of_inputs_message( 64 | input_data[0].id)) as expected_value_exception: 65 | model.generate(prompt, input_data) 66 | 67 | @patch("langchain.chains.base.Chain.arun") 68 | def test_exception_when_input_data_is_missing_and_prompt_contains_input_key(self, chain_run_mock, models): 69 | for model in models.values(): 70 | chain_run_mock.return_value = "{}" 71 | 72 | prompt = "Some Dummy Prompt without input variable {text} {text_1}" 73 | 74 | with pytest.raises( 75 | ValueError, 76 | match=input_validation_messages.get_prompt_contains_input_key_when_missing_input_data() 77 | ): 78 | model.generate(prompt, None) 79 | 80 | @patch("langchain.chains.base.Chain.arun") 81 | @patch("langchain_google_vertexai.llms.VertexAI.get_num_tokens") 82 | def test_exception_when_num_prompt_tokens_larger_than_model_total_max_tokens(self, tokens_mock, chain_run_mock, models): 83 | # GIVEN 84 | chain_run_mock.return_value = "{}" 85 | 86 | input_data = [InputData(input_mappings={"text": "Some dummy text", "text_2": "Another dummy text"}, id="1")] 87 | 88 | prompt = "Some dummy really, really long prompt. " * 10000 + "input variables: {text} {text_2}" 89 | tokens_mock.return_value = len(prompt.split()) 90 | 91 | # WHEN & THEN 92 | for model in models.values(): 93 | response = model.generate(prompt, input_data)[0] 94 | 95 | assert isinstance(response, ResponseData) 96 | assert response.response is None 97 | assert "Value Error has occurred: Prompt is too long" in response.error 98 | 99 | @patch("langchain.chains.base.Chain.arun") 100 | @patch("langchain_google_vertexai.llms.VertexAI.get_num_tokens") 101 | def test_whether_curly_brackets_are_not_breaking_the_prompt(self, tokens_mock, chain_run_mock, models): 102 | # GIVEN 103 | chain_run_mock.return_value = "{}" 104 | 105 | input_data = [InputData(input_mappings={"text": "Some dummy text"}, id="1")] 106 | 107 | prompt = "Extract parameters from this text: {text} and output them as a JSON: {{name: parameter_name, value: parameter_value}}" 108 | tokens_mock.return_value = len(prompt.split()) 109 | 110 | # WHEN & THEN 111 | for model in models.values(): 112 | response = model.generate(prompt, input_data)[0] 113 | 114 | assert isinstance(response, ResponseData) 115 | assert response.response is not None 116 | 117 | @patch("langchain.chains.base.Chain.arun") 118 | @patch("langchain_google_vertexai.llms.VertexAI.get_num_tokens") 119 | def test_warning_when_num_prompt_tokens_plus_max_output_tokens_larger_than_model_total_max_tokens( 120 | self, 121 | tokens_mock, 122 | chain_run_mock, 123 | models, 124 | caplog 125 | ): 126 | # GIVEN 127 | chain_run_mock.return_value = "{}" 128 | 129 | input_data = [InputData(input_mappings={"text": "Some dummy text", "text_2": "Another dummy text"}, id="1")] 130 | 131 | prompt = "Some dummy prompt. input variables: {text} {text_2}" 132 | tokens_mock.return_value = len(prompt.split()) 133 | 134 | # WHEN & THEN 135 | for model in models.values(): 136 | model._max_output_tokens = 100000 137 | 138 | with caplog.at_level(logging.WARNING): 139 | model.generate(prompt, input_data) 140 | 141 | log_records = caplog.records 142 | assert len(log_records) == 1 143 | assert log_records[0].levelname == "WARNING" 144 | assert "Number of prompt tokens plus generated tokens may exceed the the max allowed number of tokens of the model." in log_records[0].message 145 | 146 | caplog.clear() 147 | 148 | @patch("langchain.chains.base.Chain.arun") 149 | def test_model_raises_exception_when_system_prompt_is_invalid(self, chain_run_mock, models): 150 | for model in models.values(): 151 | chain_run_mock.return_value = "{}" 152 | 153 | prompt = "Some Dummy Prompt without input variable" 154 | if isinstance(model, llm_models.AzureMistralModel): 155 | with pytest.raises( 156 | ValueError, match=input_validation_messages.get_system_prompt_is_not_supported_by_model() 157 | ) as expected_value_exception: 158 | model.generate(prompt, system_prompt="This is a system prompt with {additional} field") 159 | else: 160 | with pytest.raises( 161 | ValueError, match=input_validation_messages.get_system_prompt_contains_input_variables() 162 | ) as expected_value_exception: 163 | model.generate(prompt, system_prompt="This is a system prompt with {additional} field") 164 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # allms 2 | 3 | ___ 4 | ## About 5 | 6 | allms is a versatile and powerful library designed to streamline the process of querying Large Language Models 7 | (LLMs) 🤖💬 8 | 9 | Developed by the Allegro engineers, allms is based on popular libraries like transformers, pydantic, and langchain. It takes care 10 | of the boring boiler-plate code you write around your LLM applications, quickly enabling you to prototype ideas, and eventually helping you to scale up 11 | for production use-cases! 12 | 13 | Among the allms most notable features, you will find: 14 | 15 | * **😊 Simple and User-Friendly Interface**: The module offers an intuitive and easy-to-use interface, making it straightforward to work with the model. 16 | 17 | * **🔀 Asynchronous Querying**: Requests to the model are processed asynchronously by default, ensuring efficient and non-blocking interactions. 18 | 19 | * **🔄 Automatic Retrying Mechanism** : The module includes an automatic retrying mechanism, which helps handle transient errors and ensures that queries to the model are robust. 20 | 21 | * **🛠️ Error Handling and Management**: Errors that may occur during interactions with the model are handled and managed gracefully, providing informative error messages and potential recovery options. 22 | 23 | * **⚙️ Output Parsing**: The module simplifies the process of defining the model's output format as well as parsing and working with it, allowing you to easily extract the information you need. 24 | 25 | ___ 26 | 27 | ## Supported Models 28 | 29 | | LLM Family | Hosting | Supported LLMs | 30 | | :---------- | :------------------ | :---------------------------------------------------------------------------------- | 31 | | GPT(s) | OpenAI endpoint | `gpt-3.5-turbo`, `gpt-4`, `gpt-4-turbo`, `gpt4-o`, `gpt4-o mini` | 32 | | Google LLMs | VertexAI deployment | `text-bison@001`, [Gemini family](https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#gemini-models) | 33 | | Llama2 | Azure deployment | `llama2-7b`, `llama2-13b`, `llama2-70b` | 34 | | Mistral | Azure deployment | `Mistral-7b`, `Mixtral-7bx8` | 35 | | Gemma | GCP deployment | `gemma` | 36 | 37 | * Do you already have a subscription to a Cloud Provider for any the models above? Configure 38 | the model using your credentials and start querying! 39 | * Are you interested in knowing how to self-deploy open-source models in Azure and GCP? 40 | Consult our [guide](https://allms.allegro.tech/usage/deploy_open_source_models/) 41 | 42 | ___ 43 | 44 | ## Documentation 45 | 46 | Full documentation available at **[allms.allegro.tech](https://allms.allegro.tech/)** 47 | 48 | Get familiar with allms 🚀: [introductory jupyter notebook](https://github.com/allegro/allms/blob/main/examples/introduction.ipynb) 49 | 50 | ___ 51 | 52 | ## Quickstart 53 | 54 | ### Installation 🚧 55 | 56 | Install the package via pip: 57 | 58 | ``` 59 | pip install allms 60 | ``` 61 | 62 | ### Basic Usage ⭐ 63 | 64 | Configure endpoint credentials and start querying the model with any prompt: 65 | 66 | ```python 67 | from allms.models import AzureOpenAIModel 68 | from allms.domain.configuration import AzureOpenAIConfiguration 69 | 70 | configuration = AzureOpenAIConfiguration( 71 | api_key="your-secret-api-key", 72 | base_url="https://endpoint.openai.azure.com/", 73 | api_version="2023-03-15-preview", 74 | deployment="gpt-35-turbo", 75 | model_name="gpt-3.5-turbo" 76 | ) 77 | 78 | gpt_model = AzureOpenAIModel(config=configuration) 79 | gpt_response = gpt_model.generate(prompt="Plan me a 3-day holiday trip to Italy") 80 | ``` 81 | 82 | You can pass also a system prompt: 83 | 84 | ```python 85 | gpt_response = gpt_model.generate( 86 | system_prompt="You are an AI assistant acting as a trip planner", 87 | prompt="Plan me a 3-day holiday trip to Italy" 88 | ) 89 | ``` 90 | 91 | ### Advanced Usage 🔥 92 | 93 | ### Batch Querying and Symbolic Variables 94 | 95 | If you want to generate responses for a batch of examples, you can achieve this by preparing a prompt with symbolic 96 | variables and providing input data that will be injected into the prompt. Symbolic variables can be more than one. 97 | 98 | ```python 99 | positive_review_0 = "Very good coffee, lightly roasted, with good aroma and taste. The taste of sourness is barely noticeable (which is good because I don't like sour coffees). After grinding, the aroma spreads throughout the room. I recommend it to all those who do not like strongly roasted and pitch-black coffees. A very good solution is to close the package with string, which allows you to preserve the aroma and freshness." 100 | positive_review_1 = "Delicious coffee!! Delicate, just the way I like it, and the smell after opening is amazing. It smells freshly roasted. Faithful to Lavazza coffee for years, I decided to look for other flavors. Based on the reviews, I blindly bought it and it was a 10-shot, it outperformed Lavazze in taste. For me the best." 101 | negative_review = "Marketing is doing its job and I was tempted too, but this coffee is nothing above the level of coffees from the supermarket. And the method of brewing or grinding does not help here. The coffee is simply weak - both in terms of strength and taste. I do not recommend." 102 | 103 | prompt = "You'll be provided with a review of a coffe. Decide if the review is positive or negative. Review: {review}" 104 | input_data = [ 105 | InputData(input_mappings={"review": positive_review_0}, id="0"), 106 | InputData(input_mappings={"review": positive_review_1}, id="1"), 107 | InputData(input_mappings={"review": negative_review}, id="2") 108 | ] 109 | 110 | responses = model.generate(prompt=prompt, input_data=input_data) 111 | 112 | # >>> {f"review_id={response.input_data.id}": response.response for response in responses} 113 | # { 114 | # 'review_id=0': 'The review is positive.', 115 | # 'review_id=1': 'The review is positive.', 116 | # 'review_id=2': 'The review is negative.' 117 | # } 118 | ``` 119 | 120 | ### Forcing Structured Output Format 121 | 122 | Through pydantic integration, in allms you can pass an output dataclass and force the LLM to provide 123 | you the response in a structured way. 124 | 125 | ```python 126 | from pydantic import BaseModel, Field 127 | 128 | class ReviewOutputDataModel(BaseModel): 129 | summary: str = Field(description="Summary of a product description") 130 | should_buy: bool = Field(description="Recommendation whether I should buy the product or not") 131 | brand_name: str = Field(description="Brand of the coffee") 132 | aroma:str = Field(description="Description of the coffee aroma") 133 | cons: list[str] = Field(description="List of cons of the coffee") 134 | 135 | 136 | review = "Marketing is doing its job and I was tempted too, but this Blue Orca coffee is nothing above the level of coffees from the supermarket. And the method of brewing or grinding does not help here. The coffee is simply weak - both in terms of strength and taste. I do not recommend." 137 | 138 | prompt = "Summarize review of the coffee. Review: {review}" 139 | input_data = [InputData(input_mappings={"review": review}, id="0")] 140 | 141 | responses = model.generate( 142 | prompt=prompt, 143 | input_data=input_data, 144 | output_data_model_class=ReviewOutputDataModel 145 | ) 146 | response = responses[0].response 147 | 148 | # >>> type(response) 149 | # ReviewOutputDataModel 150 | # 151 | # >>> response.should_buy 152 | # False 153 | # 154 | # >>> response.brand_name 155 | # "Blue Orca" 156 | # 157 | # >>> response.aroma 158 | # "Not mentioned in the review" 159 | # 160 | # >>> response.cons 161 | # ['Weak in terms of strength', 'Weak in terms of taste'] 162 | ``` 163 | ___ 164 | 165 | ## Local Development 🛠️ 166 | 167 | ### Installation from the source 168 | 169 | We assume that you have python `3.10.*` installed on your machine. 170 | You can set it up using [pyenv](https://github.com/pyenv/pyenv#installationbrew) 171 | ([How to install pyenv on MacOS](https://jordanthomasg.medium.com/python-development-on-macos-with-pyenv-2509c694a808)). To install allms env locally: 172 | 173 | * Activate your venv; 174 | * Install Poetry via: 175 | 176 | ```bash 177 | make install-poetry 178 | ``` 179 | 180 | * Install allms dependencies with the command: 181 | 182 | ```bash 183 | make install-env 184 | ``` 185 | 186 | 187 | ### Tests 188 | 189 | In order to execute tests, run: 190 | 191 | ```bash 192 | make tests 193 | ``` 194 | 195 | ### Updating the documentation 196 | 197 | Run `mkdocs serve` to serve a local instance of the documentation. 198 | 199 | Modify the content of `docs` directory to update the documentation. The updated content will be deployed 200 | via the github action `.github/workflows/docs.yml` 201 | 202 | ### Make a new release 203 | 204 | When a new version of allms is ready to be released, do the following operations: 205 | 206 | 1. **Merge to master** the dev branch in which the new version has been specified: 207 | 1. In this branch, `version` under `[tool.poetry]` section in `pyproject.toml` should be updated, e.g `0.1.0`; 208 | 209 | 2. **Tag the new master** with the name of the newest version using command-line: 210 | 1. `git tag -a ` 211 | 2. `git push origin ` 212 | 213 | 3. **Publish package to PyPI**: 214 | 1. Go to _Actions_ → _Manual Publish To PyPI_; 215 | 2. Select "master" as branch and click _Run workflow_; 216 | 3. If successful, you will find the package under # TODO: open-source. 217 | 218 | 4. **Make a GitHub release**: 219 | 1. Go to _Releases_ → _Draft a new release_; 220 | 2. Select the recently created tag in _Choose a tag_ window; 221 | 3. Copy/paste all the content present in the CHANGELOG under the version you are about to release; 222 | 4. Upload `allms-.whl` and `allms-.tar.gz` as assets; 223 | 5. Click `Publish release`. -------------------------------------------------------------------------------- /tests/test_end_to_end.py: -------------------------------------------------------------------------------- 1 | import re 2 | import logging 3 | 4 | import pytest 5 | import httpx 6 | import respx 7 | from httpx import Response 8 | from langchain.prompts import ChatPromptTemplate, HumanMessagePromptTemplate, PromptTemplate, \ 9 | SystemMessagePromptTemplate 10 | 11 | from allms.constants.input_data import IODataConstants 12 | from allms.defaults.vertex_ai import GeminiModelDefaults 13 | from allms.domain.configuration import VertexAIConfiguration 14 | from allms.domain.prompt_dto import KeywordsOutputClass 15 | from allms.models import VertexAIGeminiModel, HarmBlockThreshold, HarmCategory 16 | from allms.utils import io_utils 17 | from tests.conftest import AzureOpenAIEnv 18 | 19 | 20 | class TestEndToEnd: 21 | 22 | def test_model_is_queried_successfully( 23 | self, 24 | models 25 | ): 26 | # GIVEN 27 | with respx.mock: 28 | respx.post( 29 | url=re.compile(f"^{AzureOpenAIEnv.OPENAI_API_BASE}.*$")).mock( 30 | return_value=Response(status_code=200, json={ 31 | "choices": [{ 32 | "message": { 33 | "content": "{\"keywords\": [\"Indywidualna racja żywnościowa\", \"wojskowa\", \"S-R-9\", \"set nr 9\", \"Makaron po bolońsku\", \"Konserwa tyrolska\", \"Suchary\", \"Koncentrat napoju herbacianego instant o smaku owoców leśnych\", \"Dżem malinowy\", \"Baton zbożowo-owocowy o smaku figowym\"]}", 34 | "role": "" 35 | } 36 | }], 37 | "usage": {} 38 | }, 39 | ) 40 | ) 41 | 42 | input_data = io_utils.load_csv_to_input_data( 43 | limit=5, 44 | path="./tests/resources/test_input_data.csv" 45 | ) 46 | prompt_template_text = """Extract at most 10 keywords that could be used as features in a search index from this Polish product description. 47 | 48 | {text} 49 | """ 50 | 51 | # WHEN 52 | parsed_responses = models["azure_open_ai"].generate( 53 | prompt=prompt_template_text, 54 | input_data=input_data, 55 | output_data_model_class=KeywordsOutputClass, 56 | system_prompt="This is a system prompt." 57 | ) 58 | parsed_responses = sorted(parsed_responses, key=lambda key: key.input_data.id) 59 | 60 | # THEN 61 | expected_output = io_utils.load_csv("./tests/resources/test_end_to_end_expected_output.csv") 62 | expected_output = sorted(expected_output, key=lambda example: example[IODataConstants.ID]) 63 | for idx in range(len(expected_output)): 64 | expected_output[idx]["response"] = eval(expected_output[idx]["response"]) 65 | 66 | assert list(map(lambda output: output[IODataConstants.ID], expected_output)) == list( 67 | map(lambda example: example.input_data.id, parsed_responses)) 68 | 69 | assert list(map(lambda output: output[IODataConstants.TEXT], expected_output)) == list( 70 | map(lambda example: example.input_data.input_mappings["text"], parsed_responses)) 71 | 72 | assert list(map(lambda output: output[IODataConstants.RESPONSE_STR_NAME], expected_output)) == list( 73 | map(lambda example: example.response.keywords, parsed_responses)) 74 | 75 | assert list(map(lambda output: int(output[IODataConstants.PROMPT_TOKENS_NUMBER]), expected_output)) == list( 76 | map(lambda example: example.number_of_prompt_tokens, parsed_responses)) 77 | 78 | assert list( 79 | map(lambda output: int(output[IODataConstants.GENERATED_TOKENS_NUMBER]), expected_output)) == list( 80 | map(lambda example: example.number_of_generated_tokens, parsed_responses)) 81 | 82 | def test_prompt_is_not_modified_for_open_source_models(self, models, mocker): 83 | # GIVEN 84 | open_source_models = ["azure_llama2", "azure_mistral", "vertex_gemma"] 85 | 86 | with respx.mock: 87 | respx.post( 88 | url=re.compile(f"^https:\/\/dummy-endpoint.*$")).mock( 89 | return_value=Response(status_code=200, json={ 90 | "choices": [{ 91 | "message": { 92 | "content": "{\"keywords\": [\"Indywidualna racja żywnościowa\", \"wojskowa\", \"S-R-9\", \"set nr 9\", \"Makaron po bolońsku\", \"Konserwa tyrolska\", \"Suchary\", \"Koncentrat napoju herbacianego instant o smaku owoców leśnych\", \"Dżem malinowy\", \"Baton zbożowo-owocowy o smaku figowym\"]}", 93 | "role": "" 94 | } 95 | }], 96 | "usage": {} 97 | }, 98 | )) 99 | 100 | input_data = io_utils.load_csv_to_input_data( 101 | limit=5, 102 | path="./tests/resources/test_input_data.csv" 103 | ) 104 | prompt_template_text = """Extract at most 10 keywords that could be used as features in a search index from this Polish product description. 105 | 106 | {text} 107 | """ 108 | prompt_template_spy = mocker.spy(ChatPromptTemplate, "from_messages") 109 | 110 | # WHEN & THEN 111 | for model_name, model in models.items(): 112 | model.generate( 113 | prompt=prompt_template_text, 114 | input_data=input_data, 115 | output_data_model_class=KeywordsOutputClass, 116 | system_prompt=None if model_name == "azure_mistral" else "This is a system prompt." 117 | ) 118 | 119 | if model_name in open_source_models: 120 | messages = [ 121 | HumanMessagePromptTemplate( 122 | prompt=PromptTemplate( 123 | input_variables=["text"], 124 | template=prompt_template_text 125 | ) 126 | ) 127 | ] 128 | if model_name != "azure_mistral": 129 | messages = [ 130 | SystemMessagePromptTemplate( 131 | prompt=PromptTemplate( 132 | input_variables=[], 133 | template="This is a system prompt." 134 | ) 135 | ) 136 | ] + messages 137 | prompt_template_spy.assert_called_with(messages) 138 | else: 139 | prompt_template_spy.assert_called_with([ 140 | SystemMessagePromptTemplate( 141 | prompt=PromptTemplate( 142 | input_variables=[], 143 | template="This is a system prompt." 144 | ) 145 | ), 146 | HumanMessagePromptTemplate( 147 | prompt=PromptTemplate( 148 | input_variables=["text"], 149 | partial_variables={ 150 | 'output_data_model': 'The output should be formatted as a JSON instance that conforms to the JSON schema below.\n\nAs an example, for the schema {"properties": {"foo": {"title": "Foo", "description": "a list of strings", "type": "array", "items": {"type": "string"}}}, "required": ["foo"]}\nthe object {"foo": ["bar", "baz"]} is a well-formatted instance of the schema. The object {"properties": {"foo": ["bar", "baz"]}} is not well-formatted.\n\nHere is the output schema:\n```\n{"properties": {"keywords": {"description": "List of keywords", "items": {"type": "string"}, "title": "Keywords", "type": "array"}}, "required": ["keywords"]}\n```' 151 | }, 152 | template=f"{prompt_template_text}\n\n{{output_data_model}}" 153 | ) 154 | ) 155 | ]) 156 | 157 | def test_gemini_version_is_passed_to_model(self): 158 | # GIVEN 159 | model_config = VertexAIConfiguration( 160 | cloud_project="dummy-project-id", 161 | cloud_location="us-central1", 162 | gemini_model_name="gemini-1.0-pro-001" 163 | ) 164 | 165 | # WHEN 166 | gemini_model = VertexAIGeminiModel(config=model_config) 167 | 168 | # THEN 169 | assert gemini_model._llm.model_name == "gemini-1.0-pro-001" 170 | 171 | def test_model_times_out( 172 | self, 173 | models 174 | ): 175 | # GIVEN 176 | with respx.mock: 177 | respx.post(re.compile(f"^https:\/\/dummy-endpoint.*$")).mock( 178 | side_effect=httpx.TimeoutException("Request timed out") 179 | ) 180 | 181 | # WHEN 182 | responses = models["azure_open_ai"].generate("Some prompt") 183 | 184 | # THEN 185 | assert responses[0].response is None 186 | assert "Request timed out" in responses[0].error 187 | def test_gemini_specific_args_are_passed_to_model(self): 188 | gemini_model_name = "gemini-1.5-pro-001" 189 | gemini_safety_settings = { 190 | HarmCategory.HARM_CATEGORY_UNSPECIFIED: HarmBlockThreshold.BLOCK_NONE, 191 | HarmCategory.HARM_CATEGORY_DANGEROUS_CONTENT: HarmBlockThreshold.BLOCK_MEDIUM_AND_ABOVE, 192 | HarmCategory.HARM_CATEGORY_HATE_SPEECH: HarmBlockThreshold.BLOCK_ONLY_HIGH, 193 | HarmCategory.HARM_CATEGORY_HARASSMENT: HarmBlockThreshold.BLOCK_LOW_AND_ABOVE, 194 | HarmCategory.HARM_CATEGORY_SEXUALLY_EXPLICIT: HarmBlockThreshold.BLOCK_NONE, 195 | } 196 | model_config = VertexAIConfiguration( 197 | cloud_project="dummy-project-id", 198 | cloud_location="us-central1", 199 | gemini_model_name=gemini_model_name, 200 | gemini_safety_settings=gemini_safety_settings 201 | ) 202 | # WHEN 203 | gemini_model = VertexAIGeminiModel(config=model_config) 204 | 205 | # THEN 206 | assert gemini_model._llm.model_name == gemini_model_name 207 | assert gemini_model._llm.safety_settings == gemini_safety_settings 208 | 209 | @pytest.mark.parametrize( 210 | "model_name", [ 211 | "gemini-1.0-pro", "gemini-1.5-pro", "gemini-1.5-flash","gemini-1.0-pro-001", "gemini-1.0-pro-002", 212 | "gemini-1.5-pro-001", "gemini-1.5-flash-001", "gemini-1.5-pro-preview-0514" 213 | ] 214 | ) 215 | def test_correct_gemini_model_name_work(self, model_name): 216 | # GIVEN 217 | model_config = VertexAIConfiguration( 218 | cloud_project="dummy-project-id", 219 | cloud_location="us-central1", 220 | gemini_model_name=model_name, 221 | ) 222 | 223 | # WHEN & THEN 224 | VertexAIGeminiModel(config=model_config) 225 | 226 | @pytest.mark.parametrize( 227 | "model_name", [ 228 | "gemini-2.0-flash-lite", "gemini-2.0-flash", "ggemini-2.5-pro-exp-03-25","gemini-x" 229 | ] 230 | ) 231 | def test_default_tokenizer_fallback(self, caplog, model_name): 232 | # GIVEN 233 | model_config = VertexAIConfiguration( 234 | cloud_project="dummy-project-id", 235 | cloud_location="us-central1", 236 | gemini_model_name=model_name, 237 | ) 238 | 239 | # WHEN 240 | with caplog.at_level(logging.INFO): 241 | VertexAIGeminiModel(config=model_config) 242 | 243 | # THEN 244 | assert ( 245 | f"Model {model_name} is not supported for tokenization, using default tokenizer:" 246 | f" {GeminiModelDefaults.GCP_MODEL_NAME}" 247 | in caplog.text 248 | ) 249 | 250 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "[]" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright [yyyy] [name of copyright owner] 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /allms/models/abstract.py: -------------------------------------------------------------------------------- 1 | import asyncio 2 | import logging 3 | import re 4 | import typing 5 | import urllib 6 | from abc import ABC, abstractmethod 7 | from functools import partial 8 | from urllib.error import URLError 9 | 10 | import google 11 | import openai 12 | from google.api_core.exceptions import InvalidArgument 13 | from langchain.chains import LLMChain 14 | from langchain.chat_models.base import BaseChatModel 15 | from langchain.output_parsers import PydanticOutputParser 16 | from langchain.prompts import ChatPromptTemplate 17 | from langchain_core.language_models.llms import create_base_retry_decorator 18 | from langchain_core.prompts import HumanMessagePromptTemplate, SystemMessagePromptTemplate 19 | from langchain_core.prompts.prompt import PromptTemplate 20 | from pydantic import BaseModel 21 | 22 | import allms.exceptions.validation_input_data_exceptions as input_exception_message 23 | import allms.models as models 24 | from allms.chains.long_text_processing_chain import ( 25 | LongTextProcessingChain, 26 | load_long_text_processing_chain 27 | ) 28 | from allms.constants.input_data import IODataConstants 29 | from allms.constants.prompt import PromptConstants 30 | from allms.defaults.general_defaults import GeneralDefaults 31 | from allms.defaults.long_text_chain import LongTextChainDefaults 32 | from allms.domain.enumerables import AggregationLogicForLongInputData, LanguageModelTask 33 | from allms.domain.input_data import InputData 34 | from allms.domain.prompt_dto import SummaryOutputClass, KeywordsOutputClass 35 | from allms.domain.response import ResponseData 36 | from allms.models.vertexai_base import GCPInvalidRequestError 37 | from allms.utils.long_text_processing_utils import get_max_allowed_number_of_tokens 38 | from allms.utils.response_parsing_utils import ResponseParser 39 | 40 | logger = logging.getLogger(__name__) 41 | 42 | 43 | class AbstractModel(ABC): 44 | def __init__( 45 | self, 46 | temperature: float, 47 | max_output_tokens: int, 48 | model_total_max_tokens: int, 49 | event_loop: typing.Optional[asyncio.AbstractEventLoop] = None, 50 | max_concurrency: int = GeneralDefaults.MAX_CONCURRENCY, 51 | max_retries: int = GeneralDefaults.MAX_RETRIES 52 | ): 53 | self._model_total_max_tokens = model_total_max_tokens 54 | self._max_output_tokens = max_output_tokens 55 | self._temperature = temperature 56 | self._semaphore = asyncio.Semaphore(max_concurrency) 57 | 58 | # TODO: To be changed after implementing support for long sequences 59 | self._task = LanguageModelTask.KEYWORDS 60 | self._is_long_text_bypass_enabled: bool = False # Should be false till we fully implement support for long sequences in our package 61 | self._aggregation_strategy: AggregationLogicForLongInputData = AggregationLogicForLongInputData.SIMPLE_CONCATENATION 62 | self._parser: typing.Optional[PydanticOutputParser] = None 63 | self._json_pattern = re.compile(r"{.*?}", re.DOTALL) 64 | self._is_json_format_injected_into_prompt: bool = True 65 | 66 | if max_output_tokens >= model_total_max_tokens: 67 | raise ValueError("max_output_tokens has to be lower than model_total_max_tokens") 68 | 69 | self._llm = self._create_llm() 70 | 71 | if not event_loop: 72 | try: 73 | event_loop = asyncio.get_running_loop() 74 | except RuntimeError as error: 75 | event_loop = asyncio.new_event_loop() 76 | asyncio.set_event_loop(event_loop) 77 | self._event_loop = event_loop 78 | 79 | self._predict_example = create_base_retry_decorator( 80 | error_types=[ 81 | openai.RateLimitError, openai.APIError, openai.Timeout, 82 | openai.APIConnectionError, openai.InternalServerError, 83 | google.api_core.exceptions.ResourceExhausted, urllib.error.HTTPError 84 | ], 85 | max_retries=max_retries, 86 | )(self._predict_example) 87 | 88 | @abstractmethod 89 | def _create_llm(self) -> BaseChatModel: 90 | ... 91 | 92 | def _get_prompt_tokens_number(self, prompt: ChatPromptTemplate, input_data: InputData) -> int: 93 | return self._llm.get_num_tokens(prompt.format_prompt(**input_data.input_mappings).to_string()) 94 | 95 | def _get_model_response_tokens_number(self, model_response: typing.Optional[str]) -> int: 96 | if model_response: 97 | return self._llm.get_num_tokens(model_response) 98 | return 0 99 | 100 | def generate( 101 | self, 102 | prompt: str, 103 | input_data: typing.Optional[typing.List[InputData]] = None, 104 | output_data_model_class: typing.Optional[typing.Type[BaseModel]] = None, 105 | system_prompt: typing.Optional[str] = None 106 | ) -> typing.List[ResponseData]: 107 | model_responses = self._event_loop.run_until_complete( 108 | self._generate( 109 | prompt=prompt, 110 | input_data=input_data, 111 | output_data_model_class=output_data_model_class, 112 | system_prompt=system_prompt 113 | ) 114 | ) 115 | 116 | if output_data_model_class: 117 | return ResponseParser(self._parser).parse_model_output(model_responses) 118 | return model_responses 119 | 120 | async def _generate( 121 | self, 122 | prompt: str, 123 | input_data: typing.Optional[typing.List[InputData]] = None, 124 | output_data_model_class: typing.Optional[typing.Type[BaseModel]] = None, 125 | system_prompt: typing.Optional[str] = None 126 | ) -> typing.List[ResponseData]: 127 | self._validate_system_prompt(system_prompt=system_prompt) 128 | self._validate_input(prompt=prompt, input_data=input_data) 129 | if input_data is None: 130 | # Prompt without symbolic variables is passed - create input_data accordingly 131 | input_data = [InputData(input_mappings={}, id=IODataConstants.DEFAULT_ID)] 132 | 133 | prompt_template_args = { 134 | PromptConstants.TEMPLATE_STR: prompt, 135 | PromptConstants.INPUT_VARIABLES_STR: list(input_data[0].get_input_keys()) 136 | } 137 | 138 | if output_data_model_class: 139 | self._parser = PydanticOutputParser(pydantic_object=output_data_model_class) 140 | 141 | if self._is_json_format_injected_into_prompt: 142 | prompt_template_args[PromptConstants.PARTIAL_VARIABLES_STR] = { 143 | PromptConstants.OUTPUT_DATA_MODEL: self._parser.get_format_instructions(), 144 | } 145 | prompt_template_args[PromptConstants.TEMPLATE_STR] = self._add_output_data_format(prompt=prompt) 146 | 147 | chat_prompts = await self._build_chat_prompts(prompt_template_args, system_prompt) 148 | 149 | prompt_template = ChatPromptTemplate.from_messages(chat_prompts) 150 | 151 | chain = self._get_chain(prompt_template) 152 | long_chain = self._get_chain_for_long_text(prompt_template) 153 | 154 | predict_example_any_length_partial = partial( 155 | self._predict_example_of_any_length, 156 | prompt_template=prompt_template, 157 | standard_chain=chain, 158 | long_chain=long_chain 159 | ) 160 | 161 | logger.info("Generating responses...") 162 | results = list(map(lambda data: predict_example_any_length_partial(input_data=data), input_data)) 163 | 164 | responses = await asyncio.gather(*results) 165 | 166 | return responses 167 | 168 | async def _build_chat_prompts( 169 | self, 170 | prompt_template_args: dict, 171 | system_prompt: SystemMessagePromptTemplate 172 | ) -> typing.List[typing.Union[SystemMessagePromptTemplate, HumanMessagePromptTemplate]]: 173 | human_message = HumanMessagePromptTemplate(prompt=PromptTemplate(**prompt_template_args)) 174 | if not system_prompt: 175 | return [human_message] 176 | system_message_template = SystemMessagePromptTemplate.from_template(system_prompt) 177 | 178 | return [system_message_template, human_message] 179 | 180 | @staticmethod 181 | def _add_output_data_format(prompt: str) -> str: 182 | return f"{prompt}{PromptConstants.OUTPUT_DATA_MODEL_CLASS_SEPARATOR}{{{PromptConstants.OUTPUT_DATA_MODEL}}}" 183 | 184 | def _validate_input_data_len( 185 | self, 186 | input_data: InputData, 187 | number_of_prompt_tokens: int 188 | ): 189 | if number_of_prompt_tokens > self._model_total_max_tokens: 190 | raise ValueError( 191 | f"Prompt is too long. Entire prompt has {number_of_prompt_tokens} tokens, where the max allowed number " 192 | f"of tokens of the model is {self._model_total_max_tokens}. This leaves no space for the model to " 193 | f"generate a response and will lead to errors. Example id: {input_data.id}" 194 | ) 195 | elif number_of_prompt_tokens + self._max_output_tokens > self._model_total_max_tokens: 196 | logger.warning( 197 | f"Number of prompt tokens plus generated tokens may exceed the the max allowed number of tokens of the " 198 | f"model. Entire prompt has {number_of_prompt_tokens} tokens, the max number of tokens to generate is " 199 | f"{self._max_output_tokens} and the max allowed number of tokens of the model is " 200 | f"{self._model_total_max_tokens}. Consider lowering the max_output_tokens param or truncating the " 201 | f"input, because otherwise it may lead to unexpected errors. Example id: {input_data.id}" 202 | ) 203 | 204 | def _predict_example_of_any_length( 205 | self, 206 | input_data: InputData, 207 | prompt_template: ChatPromptTemplate, 208 | standard_chain: LLMChain, 209 | long_chain: LLMChain 210 | ) -> ResponseData: 211 | number_of_prompt_tokens = self._get_prompt_tokens_number( 212 | prompt=prompt_template, 213 | input_data=input_data 214 | ) 215 | max_token_limit = get_max_allowed_number_of_tokens(self._model_total_max_tokens, self._max_output_tokens) 216 | is_example_too_long = number_of_prompt_tokens > max_token_limit 217 | 218 | predict_example_partial = partial( 219 | self._predict_example, 220 | input_data=input_data, 221 | prompt_tokens_number=number_of_prompt_tokens 222 | ) 223 | if is_example_too_long and self._is_long_text_bypass_enabled: 224 | return predict_example_partial(chain=long_chain) 225 | return predict_example_partial(chain=standard_chain) 226 | 227 | async def _predict_example( 228 | self, 229 | chain: LLMChain, 230 | input_data: InputData, 231 | prompt_tokens_number: int 232 | ) -> ResponseData: 233 | error_message: typing.Optional[str] = None 234 | number_of_input_mappings = len(input_data.input_mappings) 235 | 236 | try: 237 | self._validate_input_data_len(input_data=input_data, number_of_prompt_tokens=prompt_tokens_number) 238 | except ValueError as value_error: 239 | logger.info(f"Error for id {input_data.id} has occurred. Message: {value_error} ") 240 | error_message = f"{IODataConstants.VALUE_ERROR_MESSAGE}: {value_error}" 241 | return ResponseData( 242 | input_data=None if number_of_input_mappings == 0 else input_data, 243 | response=None, 244 | number_of_prompt_tokens=prompt_tokens_number, 245 | number_of_generated_tokens=0, 246 | error=error_message 247 | ) 248 | 249 | try: 250 | async with self._semaphore: 251 | if number_of_input_mappings == 0: 252 | # Workaround when prompt without symbolic variables is passed - arun() can't be called without any arg 253 | model_response = chain.run({}) if hasattr(chain.llm, "api_transport") and chain.llm.api_transport == "rest" else await chain.arun({}) 254 | else: 255 | model_response = chain.run( 256 | **input_data.input_mappings) if hasattr(chain.llm, "api_transport") and chain.llm.api_transport == "rest" else await chain.arun( 257 | **input_data.input_mappings) 258 | except openai.InternalServerError as invalid_request_error: 259 | logger.info(f"Error for id {input_data.id} has occurred. Message: {invalid_request_error} ") 260 | if invalid_request_error.code == "content_filter": 261 | model_response = None 262 | error_message = f"{IODataConstants.CONTENT_FILTER_MESSAGE}: {invalid_request_error}" 263 | else: 264 | model_response = None 265 | error_message = f"{IODataConstants.ERROR_MESSAGE_STR}: {invalid_request_error}" 266 | 267 | except (InvalidArgument, ValueError, TimeoutError, openai.APIError, GCPInvalidRequestError, 268 | openai.APITimeoutError) as other_error: 269 | model_response = None 270 | logger.info(f"Error for id {input_data.id} has occurred. Message: {other_error} ") 271 | error_message = f"{type(other_error).__name__}: {other_error}" 272 | 273 | return ResponseData( 274 | input_data=None if number_of_input_mappings == 0 else input_data, 275 | response=model_response, 276 | number_of_prompt_tokens=prompt_tokens_number, 277 | number_of_generated_tokens=self._get_model_response_tokens_number(model_response), 278 | error=error_message 279 | ) 280 | 281 | def _get_number_of_tokens_in_prompt(self, prompt: PromptTemplate, input_data: InputData) -> int: 282 | return self._llm.get_num_tokens(prompt.format_prompt(**input_data.input_mappings).to_string()) 283 | 284 | def _get_chain(self, prompt: PromptTemplate) -> LLMChain: 285 | return LLMChain( 286 | llm=self._llm, 287 | prompt=prompt, 288 | ) 289 | 290 | # TODO: When adding support for long documents, we'll need to rethink how output_data_model will be passed to the 291 | # TODO: aggregation prompt 292 | def _get_chain_for_long_text( 293 | self, 294 | prompt_template: PromptTemplate, 295 | ) -> LongTextProcessingChain: 296 | parser = PydanticOutputParser( 297 | pydantic_object=SummaryOutputClass if self._task == LanguageModelTask.SUMMARY else KeywordsOutputClass) 298 | reduce_prompt_template = PromptTemplate( 299 | template=LongTextChainDefaults.AGGREGATION_PROMPT, 300 | input_variables=["text"], 301 | partial_variables={PromptConstants.OUTPUT_DATA_MODEL: parser.get_format_instructions()} 302 | ) 303 | 304 | return load_long_text_processing_chain( 305 | task=self._task, 306 | llm=self._llm, 307 | model_total_max_tokens=self._model_total_max_tokens, 308 | max_output_tokens=self._max_output_tokens, 309 | map_prompt=prompt_template, 310 | reduce_prompt=reduce_prompt_template, 311 | aggregation_strategy=self._aggregation_strategy 312 | ) 313 | 314 | def _validate_input(self, prompt: str, input_data: typing.Optional[typing.List[InputData]] = None) -> None: 315 | # Extracts text inside the {} but escapes the text inside {{}} 316 | # This behaviour allows to pass JSON-like strings to the prompt 317 | # reference: https://github.com/langchain-ai/langchain/issues/1660#issuecomment-1469320129 318 | prompt_input_variables_set = AbstractModel._extract_input_variables_from_prompt(prompt) 319 | if PromptConstants.OUTPUT_DATA_MODEL in prompt_input_variables_set: 320 | prompt_input_variables_set.remove(PromptConstants.OUTPUT_DATA_MODEL) 321 | 322 | if input_data: 323 | for data in input_data: 324 | self._validate_input_data(prompt_input_variables_set, data) 325 | elif len(prompt_input_variables_set) > 0: 326 | raise ValueError( 327 | input_exception_message.get_prompt_contains_input_key_when_missing_input_data()) 328 | 329 | def _validate_system_prompt(self, system_prompt: typing.Optional[str] = None) -> None: 330 | if isinstance(self, models.AzureMistralModel) and system_prompt is not None: 331 | raise ValueError(input_exception_message.get_system_prompt_is_not_supported_by_model()) 332 | elif system_prompt: 333 | prompt_input_variables_set = AbstractModel._extract_input_variables_from_prompt(system_prompt) 334 | if prompt_input_variables_set: 335 | raise ValueError(input_exception_message.get_system_prompt_contains_input_variables()) 336 | 337 | @staticmethod 338 | def _extract_input_variables_from_prompt(prompt: str) -> typing.Set[str]: 339 | input_variables_pattern = r'(? None: 348 | if len(input_data.input_mappings.keys()) > 0 and len(prompt_input_variables) == 0: 349 | raise ValueError(input_exception_message.get_missing_input_data_in_prompt_message(input_data.id)) 350 | 351 | if len(input_data.input_mappings.keys()) == 0 and len(prompt_input_variables) > 0: 352 | raise ValueError(input_exception_message.get_missing_input_data_in_input_data_message(input_data.id)) 353 | 354 | if len(input_data.input_mappings.keys()) != len(prompt_input_variables): 355 | raise ValueError(input_exception_message.get_different_number_of_inputs_message(input_data.id)) 356 | 357 | if not prompt_input_variables == set(input_data.get_input_keys()): 358 | raise ValueError(input_exception_message.get_different_input_keys_message(input_data.id)) 359 | -------------------------------------------------------------------------------- /examples/introduction.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "source": [ 6 | "# Introduction\n", 7 | "\n", 8 | "Follow this tutorial to get to know the most important features of allms!\n", 9 | "\n" 10 | ], 11 | "metadata": { 12 | "collapsed": false 13 | }, 14 | "id": "d6cb6b8c8fdca3cd" 15 | }, 16 | { 17 | "cell_type": "markdown", 18 | "id": "7bcb1d86-2487-4ca1-9d03-19bd3ad1a097", 19 | "metadata": {}, 20 | "source": [ 21 | "# Import and utils" 22 | ] 23 | }, 24 | { 25 | "cell_type": "code", 26 | "execution_count": 1, 27 | "id": "6e0a9b56-8099-4b2e-a881-01af966ed59d", 28 | "metadata": { 29 | "ExecuteTime": { 30 | "end_time": "2024-01-04T16:03:35.407204Z", 31 | "start_time": "2024-01-04T16:03:35.401117Z" 32 | } 33 | }, 34 | "outputs": [], 35 | "source": [ 36 | "# This allows to run asynchronous code in a Jupyter notebook\n", 37 | "import nest_asyncio\n", 38 | "\n", 39 | "nest_asyncio.apply()" 40 | ] 41 | }, 42 | { 43 | "cell_type": "markdown", 44 | "id": "3b81480b-06ad-4f7e-9fe6-731baf9c80ef", 45 | "metadata": {}, 46 | "source": [ 47 | "## Setting up your LLM\n", 48 | "\n", 49 | "To start working with `allms` you need to import one of the supported models and configure it. Make sure to have access to an Azure OpenAI endpoint and dispose of the needed information. In this tutorial we are going to use a GPT model." 50 | ] 51 | }, 52 | { 53 | "cell_type": "markdown", 54 | "id": "9c0fba84-c906-4c40-9fcb-15f7fefd2b82", 55 | "metadata": {}, 56 | "source": [] 57 | }, 58 | { 59 | "cell_type": "code", 60 | "execution_count": 3, 61 | "id": "cecb6d45-52bb-4530-bfd9-99848b40e106", 62 | "metadata": { 63 | "ExecuteTime": { 64 | "end_time": "2024-01-04T16:03:39.700051Z", 65 | "start_time": "2024-01-04T16:03:35.414123Z" 66 | } 67 | }, 68 | "outputs": [], 69 | "source": [ 70 | "from allms.models import AzureOpenAIModel\n", 71 | "from allms.domain.configuration import AzureOpenAIConfiguration\n", 72 | "\n", 73 | "configuration = AzureOpenAIConfiguration(\n", 74 | " api_key=\"your-secret-api-key\",\n", 75 | " base_url=\"https://endpoint.openai.azure.com/\",\n", 76 | " api_version=\"2023-03-15-preview\",\n", 77 | " deployment=\"gpt-35-turbo\",\n", 78 | " model_name=\"gpt-3.5-turbo\"\n", 79 | ")\n", 80 | "\n", 81 | "model = AzureOpenAIModel(config=configuration)" 82 | ] 83 | }, 84 | { 85 | "cell_type": "markdown", 86 | "id": "d4afb572-c2a8-4e00-95a7-d7f7bdf2dc84", 87 | "metadata": {}, 88 | "source": [ 89 | "## Basic usage" 90 | ] 91 | }, 92 | { 93 | "cell_type": "markdown", 94 | "id": "4278435d-a259-408c-85dc-329b38e617d5", 95 | "metadata": {}, 96 | "source": [ 97 | "The model has a `generate()` method that is responsible for running the generations. In the most basic case, you can simply provide it with a prompt and it’ll return generated content. " 98 | ] 99 | }, 100 | { 101 | "cell_type": "code", 102 | "execution_count": 5, 103 | "id": "c2c243b4-51af-4bfd-a0a0-d9787f4d19e5", 104 | "metadata": { 105 | "ExecuteTime": { 106 | "end_time": "2024-01-04T16:03:47.961341Z", 107 | "start_time": "2024-01-04T16:03:47.030222Z" 108 | } 109 | }, 110 | "outputs": [ 111 | { 112 | "data": { 113 | "text/plain": "[ResponseData(response='The capital of Poland is Warsaw.', input_data=None, number_of_prompt_tokens=7, number_of_generated_tokens=7, error=None)]" 114 | }, 115 | "execution_count": 5, 116 | "metadata": {}, 117 | "output_type": "execute_result" 118 | } 119 | ], 120 | "source": [ 121 | "model.generate(\"What is the capital of Poland?\")" 122 | ] 123 | }, 124 | { 125 | "cell_type": "markdown", 126 | "id": "e3997f4f-c26f-4af2-b1b9-3c6f35217aa5", 127 | "metadata": {}, 128 | "source": [ 129 | "This was an example of the most basic usage. But what if you wanted to run a single prompt multiple times, but with slightly changed data? For example, you have a dataset of reviews and you want to classify each of them as positive or negative. You can use batch mode to do this." 130 | ] 131 | }, 132 | { 133 | "cell_type": "markdown", 134 | "id": "c6ab1163-bc77-4234-8095-31f0592af3bc", 135 | "metadata": {}, 136 | "source": [ 137 | "## Batch mode" 138 | ] 139 | }, 140 | { 141 | "cell_type": "markdown", 142 | "id": "a3d3beb8-0b94-40fd-bca7-2b213362ef50", 143 | "metadata": {}, 144 | "source": [ 145 | "Let's say you have a dataset with 3 reviews and you want to classify each of them as positive or negative. To do so:\n", 146 | "- create a `prompt` and inside it use symbolic variable `{review}`, which will later be replaced by actual reviews coming from the dataset.\n", 147 | "- create `input_data`. `input_data` is simply a list of `InputData`, where each `InputData` is a single example and it's a dataclass with two fields:\n", 148 | " - `input_mappings` - a dictionary mapping symbolic variables used in the prompt to the actual review.\n", 149 | " - `id` - is needed because requests are made asynchronously, so the output order will not always be the same as the input order.\n", 150 | "- run the generation by calling the `generate()` method with the `prompt` and `input_data` as arguments. \n", 151 | "\n", 152 | "This will automatically run the generation in async mode, so it'll be much faster than a normal, sequential calling. Additionally, it'll automatically retry requests in case of failure. " 153 | ] 154 | }, 155 | { 156 | "cell_type": "code", 157 | "execution_count": 6, 158 | "id": "eeb8c703-8203-46fb-b3c2-c0836ad2c349", 159 | "metadata": { 160 | "ExecuteTime": { 161 | "end_time": "2024-01-04T16:03:48.720192Z", 162 | "start_time": "2024-01-04T16:03:48.234700Z" 163 | } 164 | }, 165 | "outputs": [ 166 | { 167 | "data": { 168 | "text/plain": "{'review_id=0': 'The review is positive.',\n 'review_id=1': 'The review is positive.',\n 'review_id=2': 'The review is negative.'}" 169 | }, 170 | "execution_count": 6, 171 | "metadata": {}, 172 | "output_type": "execute_result" 173 | } 174 | ], 175 | "source": [ 176 | "from allms.domain.input_data import InputData\n", 177 | "\n", 178 | "\n", 179 | "positive_review_0 = \"Very good coffee, lightly roasted, with good aroma and taste. The taste of sourness is barely noticeable (which is good because I don't like sour coffees). After grinding, the aroma spreads throughout the room. I recommend it to all those who do not like strongly roasted and pitch-black coffees. A very good solution is to close the package with string, which allows you to preserve the aroma and freshness.\"\n", 180 | "positive_review_1 = \"Delicious coffee!! Delicate, just the way I like it, and the smell after opening is amazing. It smells freshly roasted. Faithful to Lavazza coffee for years, I decided to look for other flavors. Based on the reviews, I blindly bought it and it was a 10-shot, it outperformed Lavazze in taste. For me the best.\"\n", 181 | "negative_review = \"Marketing is doing its job and I was tempted too, but this coffee is nothing above the level of coffees from the supermarket. And the method of brewing or grinding does not help here. The coffee is simply weak - both in terms of strength and taste. I do not recommend.\"\n", 182 | "\n", 183 | "prompt = \"You'll be provided with a review of a coffe. Decide if the review is positive or negative. Review: {review}\"\n", 184 | "input_data = [\n", 185 | " InputData(input_mappings={\"review\": positive_review_0}, id=\"0\"),\n", 186 | " InputData(input_mappings={\"review\": positive_review_1}, id=\"1\"),\n", 187 | " InputData(input_mappings={\"review\": negative_review}, id=\"2\")\n", 188 | "]\n", 189 | "\n", 190 | "responses = model.generate(prompt=prompt, input_data=input_data)\n", 191 | "\n", 192 | "{f\"review_id={response.input_data.id}\": response.response for response in responses}" 193 | ] 194 | }, 195 | { 196 | "cell_type": "markdown", 197 | "id": "52cc821b-1d30-4220-be2e-7d4464c3d605", 198 | "metadata": {}, 199 | "source": [ 200 | "### Multiple symbolic variables" 201 | ] 202 | }, 203 | { 204 | "cell_type": "markdown", 205 | "id": "82d20f2a-cbf3-4b15-a4d0-07c7a6cbf771", 206 | "metadata": {}, 207 | "source": [ 208 | "The example above showed a prompt with only one symbolic variable used in it. But you can use as many of them as you want.\n", 209 | "\n", 210 | "Let’s say you have two reviews: one positive and one negative, and you want the model to tell which one of them is positive. To do so:\n", 211 | "- create a prompt as shown in the cell below. Two symbolic variables are used inside it: `{first_review}` and `{second_review}`.\n", 212 | "- create `input_data`. It looks similar to the example above - it's a list of `InputData`, but here the `input_mappings` fields have two entries, one per single symbolic variable used in the prompt.\n", 213 | "- same as above, generation is ran by calling the `generate()` method." 214 | ] 215 | }, 216 | { 217 | "cell_type": "code", 218 | "execution_count": 7, 219 | "id": "c4ddfdcd-21c1-43d5-9f88-97868da710cb", 220 | "metadata": { 221 | "ExecuteTime": { 222 | "end_time": "2024-01-04T16:03:50.249349Z", 223 | "start_time": "2024-01-04T16:03:49.587683Z" 224 | } 225 | }, 226 | "outputs": [ 227 | { 228 | "data": { 229 | "text/plain": "{'example_id=0': 'The first review is positive.',\n 'example_id=1': 'The second review is positive.'}" 230 | }, 231 | "execution_count": 7, 232 | "metadata": {}, 233 | "output_type": "execute_result" 234 | } 235 | ], 236 | "source": [ 237 | "prompt = \"\"\"You'll be provided with two reviews of a coffee. Decide which one is positive.\n", 238 | "\n", 239 | "First review: {first_review}\n", 240 | "Second review: {second_review}\"\"\"\n", 241 | "input_data = [\n", 242 | " InputData(input_mappings={\"first_review\": positive_review_0, \"second_review\": negative_review}, id=\"0\"),\n", 243 | " InputData(input_mappings={\"first_review\": negative_review, \"second_review\": positive_review_1}, id=\"1\"),\n", 244 | "]\n", 245 | "\n", 246 | "responses = model.generate(prompt=prompt, input_data=input_data)\n", 247 | "{f\"example_id={response.input_data.id}\": response.response for response in responses}" 248 | ] 249 | }, 250 | { 251 | "cell_type": "markdown", 252 | "id": "1a64a6a8-28c8-4ce3-9c7c-bd1ffc56ed24", 253 | "metadata": {}, 254 | "source": [ 255 | "## Forcing model response format" 256 | ] 257 | }, 258 | { 259 | "cell_type": "markdown", 260 | "id": "c16211a8-c937-4f9e-8c61-286b1278f004", 261 | "metadata": {}, 262 | "source": [ 263 | "This is one of the most interesting features of our library. In a production setup, it's often the case that we want the model to return generated content in a format that will later be easy to ingest by the rest of our pipeline - for example, json with some predefined fields. With our library it’s really easy to achieve." 264 | ] 265 | }, 266 | { 267 | "cell_type": "markdown", 268 | "id": "85da7d66-7b05-4b78-838f-82ab1c8968c6", 269 | "metadata": {}, 270 | "source": [ 271 | "Let’s say that again you have a review of a coffee, and you want the model to generate information that might be interesting for you, and additionally you want it to return them in the format provided by you. To do so, first you have to create a dataclass that defines the output format and the information you want the model to generate. Each field of this dataclass must have a type defined and also a description provided that describes what given field means. The better the description, the better the model will understand what it should generate for a given field." 272 | ] 273 | }, 274 | { 275 | "cell_type": "code", 276 | "execution_count": 8, 277 | "id": "62fcaf9d-b5c5-4e6c-a8b6-077a8bc9288f", 278 | "metadata": { 279 | "ExecuteTime": { 280 | "end_time": "2024-01-04T16:03:52.193625Z", 281 | "start_time": "2024-01-04T16:03:52.187122Z" 282 | } 283 | }, 284 | "outputs": [], 285 | "source": [ 286 | "import typing\n", 287 | " \n", 288 | "from pydantic import BaseModel, Field\n", 289 | " \n", 290 | "class ReviewOutputDataModel(BaseModel):\n", 291 | " summary: str = Field(description=\"Summary of a product description\")\n", 292 | " should_buy: bool = Field(description=\"Recommendation whether I should buy the product or not\")\n", 293 | " brand_name: str = Field(description=\"Brand of the coffee\")\n", 294 | " aroma:str = Field(description=\"Description of the coffee aroma\")\n", 295 | " cons: typing.List[str] = Field(description=\"List of cons of the coffee\")" 296 | ] 297 | }, 298 | { 299 | "cell_type": "markdown", 300 | "id": "19c9635d-93aa-4549-87db-366ef914acb6", 301 | "metadata": {}, 302 | "source": [ 303 | "The next thing is to create a prompt, which can be pretty simple as shown in the cell below, and the `input_data` for the model. To force the model to generate a response in a given format, you have to call the `generate()` method with `prompt`, `input_data` and with one additional argument called `output_data_model_class`. The `ReviewOutputDataModel` class defined above should be provided to this argument. This automatically tells the model to output predictions in the format defined by this dataclass." 304 | ] 305 | }, 306 | { 307 | "cell_type": "code", 308 | "execution_count": 9, 309 | "id": "dd633a75-ce33-4bad-a298-61ed6c4e8de4", 310 | "metadata": { 311 | "ExecuteTime": { 312 | "end_time": "2024-01-04T16:03:54.571172Z", 313 | "start_time": "2024-01-04T16:03:53.648911Z" 314 | } 315 | }, 316 | "outputs": [], 317 | "source": [ 318 | "review = \"Marketing is doing its job and I was tempted too, but this Blue Orca coffee is nothing above the level of coffees from the supermarket. And the method of brewing or grinding does not help here. The coffee is simply weak - both in terms of strength and taste. I do not recommend.\"\n", 319 | " \n", 320 | "prompt = \"Summarize review of the coffee. Review: {review}\"\n", 321 | "input_data = [\n", 322 | " InputData(input_mappings={\"review\": review}, id=\"0\")\n", 323 | "]\n", 324 | "\n", 325 | "responses = model.generate(\n", 326 | " prompt=prompt, \n", 327 | " input_data=input_data,\n", 328 | " output_data_model_class=ReviewOutputDataModel\n", 329 | ")\n", 330 | "response = responses[0].response" 331 | ] 332 | }, 333 | { 334 | "cell_type": "markdown", 335 | "id": "3a322c89-c2ca-462f-ba77-8120b5e0945a", 336 | "metadata": {}, 337 | "source": [ 338 | "The results below show that the predictions are indeed returned in the format defined above. " 339 | ] 340 | }, 341 | { 342 | "cell_type": "code", 343 | "execution_count": 10, 344 | "id": "9f9996ec-141f-45a6-a900-71efd5fe3a96", 345 | "metadata": { 346 | "ExecuteTime": { 347 | "end_time": "2024-01-04T16:03:55.095035Z", 348 | "start_time": "2024-01-04T16:03:55.078664Z" 349 | } 350 | }, 351 | "outputs": [ 352 | { 353 | "data": { 354 | "text/plain": "__main__.ReviewOutputDataModel" 355 | }, 356 | "execution_count": 10, 357 | "metadata": {}, 358 | "output_type": "execute_result" 359 | } 360 | ], 361 | "source": [ 362 | "type(response)" 363 | ] 364 | }, 365 | { 366 | "cell_type": "code", 367 | "execution_count": 11, 368 | "id": "01bb6d5c-80bc-43e7-97ac-252b98b45262", 369 | "metadata": { 370 | "ExecuteTime": { 371 | "end_time": "2024-01-04T16:03:55.867763Z", 372 | "start_time": "2024-01-04T16:03:55.854264Z" 373 | } 374 | }, 375 | "outputs": [ 376 | { 377 | "data": { 378 | "text/plain": "{'summary': 'The Blue Orca coffee is nothing above the level of coffees from the supermarket. It is weak in terms of strength and taste.',\n 'should_buy': False,\n 'brand_name': 'Blue Orca',\n 'aroma': 'Not mentioned in the review',\n 'cons': ['Weak in terms of strength', 'Weak in terms of taste']}" 379 | }, 380 | "execution_count": 11, 381 | "metadata": {}, 382 | "output_type": "execute_result" 383 | } 384 | ], 385 | "source": [ 386 | "response.dict()" 387 | ] 388 | }, 389 | { 390 | "cell_type": "code", 391 | "execution_count": 12, 392 | "id": "767ff534-85a6-4d2c-b71b-c869f9343623", 393 | "metadata": { 394 | "ExecuteTime": { 395 | "end_time": "2024-01-04T16:03:56.968171Z", 396 | "start_time": "2024-01-04T16:03:56.958045Z" 397 | } 398 | }, 399 | "outputs": [ 400 | { 401 | "data": { 402 | "text/plain": "ReviewOutputDataModel(summary='The Blue Orca coffee is nothing above the level of coffees from the supermarket. It is weak in terms of strength and taste.', should_buy=False, brand_name='Blue Orca', aroma='Not mentioned in the review', cons=['Weak in terms of strength', 'Weak in terms of taste'])" 403 | }, 404 | "execution_count": 12, 405 | "metadata": {}, 406 | "output_type": "execute_result" 407 | } 408 | ], 409 | "source": [ 410 | "response" 411 | ] 412 | }, 413 | { 414 | "cell_type": "markdown", 415 | "id": "64fef78d-1ae4-4477-8096-4b52fa4e8d16", 416 | "metadata": {}, 417 | "source": [ 418 | "This is really interesting feature, because it gives the possibility to do several tasks at once. In the above example, there was summarization, classification, entity extraction and so on. To add another one, simply add a new field to the dataclass. For example, if you'd like to know the pros of the coffee, you just need to add one additional field `pros` to the dataclass, describe it properly, re-run everything and you'll get the results. So as you can see, it significantly reduces the need to do extensive prompt engineering. You just define it in the code as an additional field and you’re done." 419 | ] 420 | } 421 | ], 422 | "metadata": { 423 | "kernelspec": { 424 | "display_name": "Python 3 (ipykernel)", 425 | "language": "python", 426 | "name": "python3" 427 | }, 428 | "language_info": { 429 | "codemirror_mode": { 430 | "name": "ipython", 431 | "version": 3 432 | }, 433 | "file_extension": ".py", 434 | "mimetype": "text/x-python", 435 | "name": "python", 436 | "nbconvert_exporter": "python", 437 | "pygments_lexer": "ipython3", 438 | "version": "3.10.13" 439 | } 440 | }, 441 | "nbformat": 4, 442 | "nbformat_minor": 5 443 | } 444 | --------------------------------------------------------------------------------