├── .env.example ├── .github ├── pull_request_template.md └── workflows │ ├── publish.yaml │ └── run-unit-tests.yaml ├── .gitignore ├── .pre-commit-config.yaml ├── CLAUDE.md ├── CONTRIBUTING.md ├── LICENSE ├── Makefile ├── README.md ├── docs ├── evaluate_prompts_quality │ ├── evaluate_prompt_quality.py │ ├── flow_prompt_service.py │ └── prompt.py ├── getting_started_notebook.ipynb ├── media │ └── optimized_lamoom_mechanisms.gif ├── sequence_diagrams │ ├── add_ideal_answer_to_log.md │ ├── pngs │ │ ├── lamoom_add_ideal_answer.png │ │ ├── lamoom_call.png │ │ ├── lamoom_save_user_interactions.png │ │ └── lamoom_test_creation.png │ ├── save_user_interactions.md │ ├── sequence_diagram_lamoom_call.md │ └── test_creation.md └── test_data │ └── medical_questions_answers.csv ├── lamoom ├── __init__.py ├── ai_models │ ├── __init__.py │ ├── ai_model.py │ ├── attempt_to_call.py │ ├── behaviour.py │ ├── claude │ │ ├── __init__.py │ │ ├── claude_model.py │ │ ├── constants.py │ │ └── responses.py │ ├── constants.py │ ├── openai │ │ ├── __init__.py │ │ ├── azure_models.py │ │ ├── exceptions.py │ │ ├── openai_models.py │ │ ├── responses.py │ │ └── utils.py │ └── utils.py ├── exceptions.py ├── prompt │ ├── __init__.py │ ├── base_prompt.py │ ├── chat.py │ ├── lamoom.py │ ├── prompt.py │ └── user_prompt.py ├── response_parsers │ └── response_parser.py ├── responses.py ├── services │ ├── SaveWorker.py │ ├── __init__.py │ └── lamoom │ │ └── __init__.py ├── settings.py └── utils.py ├── poetry.lock ├── pyproject.toml └── tests ├── __init__.py ├── ai_models ├── __init__.py └── test_utils.py ├── conftest.py ├── prompts ├── test_chat.py ├── test_ci_cd.py ├── test_create_test.py ├── test_model.py ├── test_pricing.py ├── test_prompt.py └── test_stream.py ├── response_parsers └── test_response_parsers.py ├── services ├── __init__.py └── test_flow_prompt.py └── test_integrational.py /.env.example: -------------------------------------------------------------------------------- 1 | LAMOOM_API_TOKEN= 2 | AZURE_KEYS= 3 | OPENAI_API_KEY= 4 | BEARER_TOKEN= 5 | LAMOOM_API_URI= -------------------------------------------------------------------------------- /.github/pull_request_template.md: -------------------------------------------------------------------------------- 1 | ## Context 2 | Please make a bullet pointed list of changes 3 | 1. ... 4 | 5 | ## Checklist before requesting a review 6 | - [ ] Self-Review 7 | - [ ] Added Tests for functionality 8 | - [ ] Do you need to add/update metrics data, like cost, latency...? 9 | - [ ] Do you need to update Readme? 10 | -------------------------------------------------------------------------------- /.github/workflows/publish.yaml: -------------------------------------------------------------------------------- 1 | name: publish 2 | 3 | on: 4 | pull_request: 5 | types: 6 | - closed 7 | branches: 8 | - main 9 | 10 | jobs: 11 | publish: 12 | if: github.event.pull_request.merged == true 13 | runs-on: ubuntu-22.04 14 | steps: 15 | - name: Check out repository 16 | uses: actions/checkout@v3 17 | with: 18 | fetch-depth: 0 19 | token: ${{ secrets.GH_PAT }} 20 | 21 | - name: Install Poetry 22 | run: pip install poetry 23 | 24 | - name: Install Python 25 | uses: actions/setup-python@v3 26 | with: 27 | python-version: 3.11 28 | cache: poetry 29 | 30 | - name: Install Python libraries 31 | run: poetry install 32 | 33 | - name: Configure Git 34 | run: | 35 | git config --global user.name "GitHub Actions" 36 | git config --global user.email "actions@github.com" 37 | 38 | - name: Publish package and update version 39 | if: github.ref == 'refs/heads/main' 40 | env: 41 | PYPI_API_KEY: ${{ secrets.PYPI_API_KEY }} 42 | run: | 43 | poetry run make publish-release 44 | version=$(poetry version -s) 45 | git add pyproject.toml 46 | git commit -m "Bump version to ${version}" 47 | git push -------------------------------------------------------------------------------- /.github/workflows/run-unit-tests.yaml: -------------------------------------------------------------------------------- 1 | name: run-unit-tests 2 | 3 | on: push 4 | 5 | jobs: 6 | run-unit-tests: 7 | runs-on: ubuntu-22.04 8 | container: python:3.11-slim 9 | steps: 10 | - name: Check out repository 11 | uses: actions/checkout@v3 12 | 13 | - name: 'Create env file' 14 | run: | 15 | touch .env 16 | echo AZURE_KEYS=${{ secrets.AZURE_KEYS }} >> .env 17 | echo CLAUDE_API_KEY=${{ secrets.CLAUDE_API_KEY }} >> .env 18 | echo GEMINI_API_KEY=${{ secrets.GEMINI_API_KEY }} >> .env 19 | echo OPENAI_API_KEY=${{ secrets.OPENAI_API_KEY }} >> .env 20 | echo LAMOOM_API_URI=${{ secrets.LAMOOM_API_URI }} >> .env 21 | echo LAMOOM_API_TOKEN=${{ secrets.LAMOOM_API_TOKEN }} >> .env 22 | echo NEBIUS_API_KEY=${{ secrets.NEBIUS_API_KEY }} >> .env 23 | echo CUSTOM_API_KEY=${{ secrets.CUSTOM_API_KEY }} >> .env 24 | cat .env 25 | 26 | - name: Install dependencies 27 | run: | 28 | apt-get update && apt-get install -y curl build-essential 29 | 30 | - name: Install Poetry 31 | run: pip install poetry 32 | 33 | - name: Install Python 34 | uses: actions/setup-python@v3 35 | with: 36 | python-version: 3.11 37 | cache: poetry 38 | 39 | - name: Install Python libraries 40 | run: poetry install 41 | 42 | - name: Run tests with pytest 43 | run: | 44 | poetry run make test 45 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .venv/* 2 | *.pyc 3 | *__pycache__/* 4 | .env 5 | dist 6 | .idea 7 | .venv 8 | .coverage 9 | .DS_Store 10 | .vscode 11 | .pytest_cache 12 | python 13 | .env.test -------------------------------------------------------------------------------- /.pre-commit-config.yaml: -------------------------------------------------------------------------------- 1 | exclude: '^(tree-sitter-*)' 2 | 3 | repos: 4 | - repo: https://github.com/pre-commit/pre-commit-hooks 5 | rev: v3.2.0 6 | hooks: 7 | - id: check-ast 8 | - id: trailing-whitespace 9 | - id: check-toml 10 | - id: check-yaml 11 | - id: check-added-large-files 12 | - id: check-merge-conflict 13 | 14 | - repo: https://github.com/macisamuele/language-formatters-pre-commit-hooks 15 | rev: v2.1.0 16 | hooks: 17 | - id: pretty-format-yaml 18 | args: 19 | - --autofix 20 | - --preserve-quotes 21 | - --indent=2 22 | 23 | - repo: https://github.com/kynan/nbstripout 24 | rev: 0.5.0 25 | hooks: 26 | - id: nbstripout 27 | name: Clean notebook outputs 28 | 29 | - repo: local 30 | hooks: 31 | 32 | - id: isort 33 | name: Format with isort 34 | entry: poetry run isort 35 | language: system 36 | types: [python] 37 | 38 | - id: black 39 | name: Format with Black 40 | entry: poetry run black 41 | language: system 42 | types: [python] 43 | 44 | # - id: flake8 45 | # name: Validate with flake8 46 | # entry: poetry run flake8 lamoom 47 | # language: system 48 | # pass_filenames: false 49 | # types: [python] 50 | # args: [--count] 51 | -------------------------------------------------------------------------------- /CLAUDE.md: -------------------------------------------------------------------------------- 1 | # Lamoom Python Project Guide 2 | 3 | ## Build/Test/Lint Commands 4 | - Install deps: `poetry install` 5 | - Run all tests: `poetry run pytest --cache-clear -vv tests` 6 | - Run specific test: `poetry run pytest tests/path/to/test_file.py::test_function_name -v` 7 | - Run with coverage: `make test` 8 | - Format code: `make format` (runs black, isort, flake8, mypy) 9 | - Individual formatting: 10 | - Black: `make make-black` 11 | - isort: `make make-isort` 12 | - Flake8: `make flake8` 13 | - Autopep8: `make autopep8` 14 | 15 | ## Code Style Guidelines 16 | - Python 3.9+ compatible code 17 | - Type hints required for all functions and methods 18 | - Classes: PascalCase with descriptive names 19 | - Functions/Variables: snake_case 20 | - Constants: UPPERCASE_WITH_UNDERSCORES 21 | - Imports organization with isort: 22 | 1. Standard library imports 23 | 2. Third-party imports 24 | 3. Local application imports 25 | - Error handling: Use specific exception types 26 | - Logging: Use the logging module with appropriate levels 27 | - Use dataclasses for structured data when applicable 28 | 29 | ## Project Conventions 30 | - Use poetry for dependency management 31 | - Add tests for all new functionality 32 | - Maintain >80% test coverage (current min: 81%) 33 | - Follow pre-commit hooks guidelines 34 | - Document public APIs with docstrings -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | ### API Url 2 | Add your own API url in `.env` if needed: 3 | ``` 4 | LAMOOM_API_URI=your_api_uri 5 | ``` -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "[]" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright [yyyy] [name of copyright owner] 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | PROJECT_FOLDER = 'lamoom' 2 | 3 | flake8: 4 | flake8 ${PROJECT_FOLDER} 5 | 6 | .PHONY: make-black 7 | make-black: 8 | black --verbose ${PROJECT_FOLDER} 9 | 10 | .PHONY: make-isort 11 | make-isort: 12 | isort --settings-path pyproject.toml ${PROJECT_FOLDER} 13 | 14 | .PHONY: make-mypy 15 | make-mypy: 16 | mypy --strict ${PROJECT_FOLDER} 17 | 18 | isort-check: 19 | isort --settings-path pyproject.toml --check-only . 20 | 21 | autopep8: 22 | for f in `find lamoom -name "*.py"`; do autopep8 --in-place --select=E501 $f; done 23 | 24 | lint: 25 | poetry run isort --settings-path pyproject.toml --check-only . 26 | 27 | test: 28 | poetry run pytest --cache-clear -vv tests \ 29 | --cov=${PROJECT_FOLDER} \ 30 | --cov-config=.coveragerc \ 31 | --cov-fail-under=81 \ 32 | --cov-report term-missing 33 | 34 | .PHONY: format 35 | format: make-black isort-check flake8 make-mypy 36 | 37 | clean: clean-build clean-pyc clean-test 38 | 39 | clean-build: 40 | rm -fr build/ 41 | rm -fr dist/ 42 | rm -fr .eggs/ 43 | find . -name '*.egg-info' -exec rm -fr {} + 44 | find . -name '*.egg' -exec rm -f {} + 45 | 46 | clean-pyc: 47 | find . -name '*.pyc' -exec rm -f {} + 48 | find . -name '*.pyo' -exec rm -f {} + 49 | find . -name '*~' -exec rm -f {} + 50 | find . -name '__pycache__' -exec rm -fr {} + 51 | 52 | clean-test: 53 | rm -f .coverage 54 | rm -fr htmlcov/ 55 | rm -rf .pytest_cache 56 | 57 | 58 | publish-test-prerelease: 59 | poetry version prerelease 60 | poetry build 61 | twine upload --repository testpypi dist/* 62 | 63 | 64 | publish-release: 65 | poetry config pypi-token.pypi "$(PYPI_API_KEY)" 66 | poetry version patch 67 | poetry build 68 | poetry publish 69 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Lamoom 2 | 3 | ## Our Philosophy 4 | 5 | Lamoom, derived from "Lambda on Mechanisms," refers to computation within a system that iteratively guides the LLM to perform correctly. Inspired by Amazon's culture, as Jeff Bezos said, "Good intentions don't work, mechanisms do," we focus on building mechanisms for LLMs rather than relying on their good intentions 6 | 7 | ![Lamoom Mechanisms](docs/media/optimized_lamoom_mechanisms.gif) 8 | 9 | 10 | ## Introduction 11 | Lamoom is a dynamic, all-in-one library designed for managing and optimizing prompts and making tests based on the ideal answer for large language models (LLMs) in production and R&D. It facilitates dynamic data integration, latency and cost metrics visibility, and efficient load distribution across multiple AI models. 12 | 13 | [![Lamoom Introduction Video](https://img.youtube.com/vi/1opO_5kRf98/0.jpg)](https://www.youtube.com/watch?v=1opO_5kRf98 "Lamoom Introduction Video") 14 | 15 | ## Getting Started 16 | 17 | To help you get started quickly, you can explore our [Getting Started Notebook](docs/getting_started_notebook.ipynb) which provides step-by-step examples of using Lamoom. 18 | 19 | ## Features 20 | 21 | - **CI/CD testing**: Generates tests based on the context and ideal answer (usually written by the human). 22 | - **Dynamic Prompt Development**: Avoid budget exceptions with dynamic data. 23 | - **Multi-Model Support**: Seamlessly integrate with various LLMs like OpenAI, Anthropic, and more. 24 | - **Real-Time Insights**: Monitor interactions, request/response metrics in production. 25 | - **Prompt Testing and Evaluation**: Quickly test and iterate on prompts using historical data. 26 | - **Smart Prompt Caching**: Efficiently cache prompts for 5 minutes to reduce latency while keeping them updated. 27 | - **Asynchronous Logging**: Record interactions without blocking the main execution flow. 28 | 29 | ## Core Functionality 30 | 31 | ### Prompt Management and Caching 32 | Lamoom implements an efficient prompt caching system with a 5-minute TTL (Time-To-Live): 33 | - **Automatic Updates**: When you call a prompt, Lamoom checks if a newer version exists on the server. 34 | - **Cache Invalidation**: Prompts are automatically refreshed after 5 minutes to ensure up-to-date content. 35 | - **Local Fallback**: If the LamoomService is unavailable, Lamoom library falls back to the locally defined prompt. 36 | - **Version Control**: Track prompt versions between local and server instances. 37 | 38 | 39 | ```mermaid 40 | sequenceDiagram 41 | Note over Lamoom,LLM: call(prompt_id, context, model) 42 | Lamoom->>Lamoom: get_cashed_prompt(prompt_id) 43 | alt Cache miss 44 | Lamoom->>LamoomService: get the last published prompt, Updates cache for 5 mins 45 | end 46 | Lamoom->>LLM: Cal LLM with prompt and context 47 | ``` 48 | 49 | ### Test Generation and CI/CD Integration 50 | Lamoom supports two methods for test creation: 51 | 1. **Inline Test Generation**: Add `test_data` with an ideal answer during normal LLM calls to automatically generate tests. 52 | 2. **Direct Test Creation**: Use the `create_test()` method to explicitly create tests for specific prompts. 53 | 54 | Tests automatically compare LLM responses to ideal answers, helping maintain prompt quality as models or prompts evolve. 55 | 56 | ```mermaid 57 | sequenceDiagram 58 | alt Direct `create_test` 59 | Lamoom->>LamoomService: create_test(prompt_id, context, ideal_answer) 60 | end 61 | alt Via `call` 62 | Lamoom->>LamoomService: call → creates asynchronous job to create test with an ideal answer 63 | end 64 | ``` 65 | 66 | ### Logging and Analytics 67 | Interaction logging happens asynchronously using a worker pattern: 68 | - **Performance Metrics**: Automatically track latency, token usage, and cost. 69 | - **Complete Context**: Store the full prompt, context, and response for analysis. 70 | - **Non-Blocking**: Logging happens in the background without impacting response times. 71 | 72 | ```mermaid 73 | sequenceDiagram 74 | Lamoom->>Lamoom: call(prompt_id, context, model) 75 | Lamoom->>LamoomService: creates asynchronous job to save logs 76 | ``` 77 | 78 | ### Feedback Collection 79 | Improve prompt quality through explicit feedback: 80 | - **Ideal Answer Addition**: Associate ideal answers with previous responses using `add_ideal_answer()`. 81 | - **Continuous Improvement**: Use feedback to automatically generate new tests and refine prompts. 82 | 83 | ```mermaid 84 | sequenceDiagram 85 | Lamoom->>LamoomService: add_ideal_answer(response_id, ideal_answer) 86 | ``` 87 | 88 | ## Installation 89 | 90 | Install Flow Prompt using pip: 91 | 92 | ```bash 93 | pip install lamoom 94 | ``` 95 | 96 | Obtain an API token from [Lamoom]('https://portal.lamoom.com') and add it as an env variable: `LAMOOM_API_TOKEN` ; 97 | 98 | ## Getting Started 99 | 100 | To help you get started quickly, you can explore our [Getting Started Notebook](docs/getting_started_notebook.ipynb) which provides step-by-step examples of using Lamoom. 101 | 102 | ## Authentication 103 | 104 | ### Add Keys depending on models you're using: 105 | ```python 106 | # Add LAMOOM_API_TOKEN as an environment variable: 107 | os.setenv('LAMOOM_API_TOKEN', 'your_token_here') 108 | 109 | # add OPENAI_API_KEY 110 | os.setenv('OPENAI_API_KEY', 'your_key_here') 111 | 112 | # add Azure Keys 113 | os.setenv('AZURE_KEYS', '{"name_realm":{"url": "https://baseurl.azure.com/","key": "secret"}}') 114 | # or creating flow_prompt obj 115 | Lamoom(azure_keys={"realm_name":{"url": "https://baseurl.azure.com/", "key": "your_secret"}}) 116 | 117 | # add Custom Models Key 118 | os.setenv('CUSTOM_API_KEY', 'your_key_here') 119 | ``` 120 | 121 | ### Model Agnostic: 122 | Mix models easily, and districute the load across models. The system will automatically distribute your load based on the weights. We support: 123 | - Claude 124 | - Gemini 125 | - OpenAI (w/ Azure OpenAI models) 126 | - Nebius with (Llama, DeepSeek, Mistral, Mixtral, dolphin, Qwen and others) 127 | - Custom providers 128 | 129 | Model string format is the following for Claude, Gemini, OpenAI, Nebius: 130 | `"{model_provider}/{model_name}"` 131 | For Azure models format is the following: 132 | `"azure/{realm}/{model_name}"` 133 | 134 | ```python 135 | response_llm = client.call(agent.id, context, model = "openai/gpt-4o") 136 | response_llm = client.call(agent.id, context, model = "azure/useast/gpt-4o") 137 | ``` 138 | 139 | Custom model string format is the following: 140 | `"custom/{model_name}"` 141 | `provider_url` is required 142 | 143 | ```python 144 | response_llm = client.call(agent.id, context, model = "custom/gpt-4o", provider_url = "your_model_url") 145 | ``` 146 | 147 | ### Lamoom Keys 148 | Obtain an API token from Flow Prompt and add it: 149 | 150 | ```python 151 | # As an environment variable: 152 | os.setenv('LAMOOM_API_TOKEN', 'your_token_here') 153 | # Via code: 154 | Lamoom(api_token='your_api_token') 155 | ``` 156 | 157 | ## Usage Examples: 158 | 159 | ### Basic Usage 160 | ```python 161 | from lamoom import Lamoom, Prompt 162 | 163 | # Initialize and configure Lamoom 164 | client = Lamoom(openai_key='your_api_key', openai_org='your_org') 165 | 166 | # Create a prompt 167 | prompt = Prompt('greet_user') 168 | prompt.add("You're {name}. Say Hello and ask what's their name.", role="system") 169 | 170 | # Call AI model with Lamoom 171 | context = {"name": "John Doe"} 172 | response = client.call(prompt.id, context, "openai/gpt-4o") 173 | print(response.content) 174 | ``` 175 | 176 | ### Creating Tests While Using Prompts 177 | ```python 178 | # Call with test_data to automatically generate tests 179 | response = client.call(prompt.id, context, "openai/gpt-4o", test_data={ 180 | 'ideal_answer': "Hello, I'm John Doe. What's your name?", 181 | 'model_name': "gemini/gemini-1.5-flash" 182 | }) 183 | ``` 184 | 185 | ### Creating Tests Explicitly 186 | ```python 187 | # Create a test directly 188 | client.create_test( 189 | prompt.id, 190 | context, 191 | ideal_answer="Hello, I'm John Doe. What's your name?", 192 | model_name="gemini/gemini-1.5-flash" 193 | ) 194 | ``` 195 | 196 | ### Adding Feedback to Previous Responses 197 | ```python 198 | # Add an ideal answer to a previous response for quality assessment 199 | client.add_ideal_answer( 200 | response_id=response.id, 201 | ideal_answer="Hello, I'm John Doe. What's your name?" 202 | ) 203 | ``` 204 | 205 | ### Monitoring and Management 206 | - **Test Dashboard**: Review created tests and scores at https://cloud.lamoom.com/tests 207 | - **Prompt Management**: Update prompts and rerun tests for published or saved versions 208 | - **Analytics**: View logs with metrics (latency, cost, tokens) at https://cloud.lamoom.com/logs 209 | 210 | The system is designed to allow prompt updates without code redeployment—simply publish a new prompt version online, and the library will automatically fetch and use it. 211 | 212 | ## Best Security Practices 213 | For production environments, it is recommended to store secrets securely and not directly in your codebase. Consider using a secret management service or encrypted environment variables. 214 | 215 | ## Contributing 216 | We welcome contributions! Please see our Contribution Guidelines for more information on how to get involved. 217 | 218 | ## License 219 | This project is licensed under the Apache2.0 License - see the [LICENSE](LICENSE.txt) file for details. 220 | 221 | ## Contact 222 | For support or contributions, please contact us via GitHub Issues. -------------------------------------------------------------------------------- /docs/evaluate_prompts_quality/evaluate_prompt_quality.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import random 3 | from lamoom import Lamoom, behaviour, AttemptToCall, AzureAIModel, C_128K 4 | from prompt import prompt_to_evaluate_prompt 5 | from lamoom_service import get_all_prompts, get_logs 6 | logger = logging.getLogger(__name__) 7 | 8 | 9 | 10 | lamoom = Lamoom() 11 | 12 | gpt4_behaviour = behaviour.AIModelsBehaviour( 13 | attempts=[ 14 | AttemptToCall( 15 | ai_model=AzureAIModel( 16 | realm='useast', 17 | deployment_id="gpt-4o", 18 | max_tokens=C_128K, 19 | support_functions=True, 20 | ), 21 | weight=100, 22 | ), 23 | ] 24 | ) 25 | 26 | 27 | def main(): 28 | for prompt in get_all_prompts(): 29 | prompt_id = prompt['prompt_id'] 30 | prompt_chats = prompt['chats'] 31 | logs = get_logs(prompt_id).get('items') 32 | if not logs or len(logs) < 5: 33 | continue 34 | contexts = [] 35 | responses = [] 36 | for log in random.sample(logs, 5): 37 | responses.append(log['response']['message']) 38 | contexts.append(log['context']) 39 | context = { 40 | 'responses': responses, 41 | 'prompt_data': prompt_chats, 42 | 'prompt_id': prompt_id, 43 | } 44 | result = lamoom.call(prompt_to_evaluate_prompt.id, context, gpt4_behaviour) 45 | print(result.content) 46 | 47 | if __name__ == '__main__': 48 | main() -------------------------------------------------------------------------------- /docs/evaluate_prompts_quality/flow_prompt_service.py: -------------------------------------------------------------------------------- 1 | 2 | 3 | import os 4 | import dotenv 5 | import requests 6 | from lamoom.settings import LAMOOM_API_URI 7 | 8 | dotenv.load_dotenv(dotenv.find_dotenv()) 9 | 10 | BEARER_TOKEN = os.getenv('BEARER_TOKEN') 11 | 12 | 13 | def get_all_prompts(): 14 | response = requests.get(f'{LAMOOM_API_URI}/prompts', headers={'Authorization': f'Bearer {BEARER_TOKEN}'}) 15 | prompts = response.json() 16 | return prompts 17 | 18 | 19 | def get_logs(prompt_id): 20 | response = requests.get( 21 | f'{LAMOOM_API_URI}/logs?prompt_id={prompt_id}&fields=response,context', 22 | headers={'Authorization': f'Bearer {BEARER_TOKEN}'} 23 | ) 24 | logs = response.json() 25 | return logs 26 | 27 | -------------------------------------------------------------------------------- /docs/evaluate_prompts_quality/prompt.py: -------------------------------------------------------------------------------- 1 | 2 | from lamoom import Prompt 3 | 4 | prompt_to_evaluate_prompt = Prompt(id="prompt-improver") 5 | 6 | prompt_to_evaluate_prompt.add(role="system", content="You're a prompt engineer, tasked with evaluating and improving prompt quality.") 7 | 8 | prompt_to_evaluate_prompt.add(content="""The initial prompt is provided below: ``` 9 | {prompt_data} 10 | ```""", priority=1) 11 | 12 | prompt_to_evaluate_prompt.add( 13 | content="{responses}", 14 | is_multiple=True, 15 | in_one_message=True, 16 | presentation="Responses to the initial prompt were as follows: ", 17 | priority=2 18 | ) 19 | 20 | prompt_to_evaluate_prompt.add(content=''' 21 | Please perform the following steps: 22 | 23 | 1. **Analyze Output Quality {prompt_id}:** 24 | - Examine the completeness, accuracy, and relevance of the responses. 25 | - Identify any common themes in errors or inaccuracies. 26 | 2. **Identify Improvement Areas:** 27 | - Based on the analysis, pinpoint specific areas where the prompt could be ambiguous or not detailed enough. 28 | - Note if the complexity of the request might be contributing to the observed output quality issues. 29 | 3. **Suggest Modifications:** 30 | - Propose clear and actionable changes to the initial prompt that could potentially address the identified issues. 31 | - If applicable, recommend breaking down complex tasks into simpler, more manageable subtasks within the prompt. 32 | ''', required=True) 33 | -------------------------------------------------------------------------------- /docs/media/optimized_lamoom_mechanisms.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LamoomAI/lamoom-python/c0bb78c817bd05054b96f4f4720c2115a0963f96/docs/media/optimized_lamoom_mechanisms.gif -------------------------------------------------------------------------------- /docs/sequence_diagrams/add_ideal_answer_to_log.md: -------------------------------------------------------------------------------- 1 | @startuml 2 | 3 | title Lamoom Feedback Flow: Adding ideal answers to existing responses 4 | note over Lamoom,LamoomService: Process of providing feedback on previous responses 5 | Lamoom->>Lamoom: add_ideal_answer(response_id, ideal_answer) 6 | Lamoom->>LamoomService: update_response_ideal_answer(api_token, log_id, ideal_answer) 7 | note right of LamoomService: PUT /lib/logs: Server updates existing log with:\n- Ideal answer for comparison\n- Used for quality assessment\n- Creating training data\n- Generating automated tests 8 | LamoomService-->>Lamoom: Return feedback submission result 9 | @enduml 10 | -------------------------------------------------------------------------------- /docs/sequence_diagrams/pngs/lamoom_add_ideal_answer.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LamoomAI/lamoom-python/c0bb78c817bd05054b96f4f4720c2115a0963f96/docs/sequence_diagrams/pngs/lamoom_add_ideal_answer.png -------------------------------------------------------------------------------- /docs/sequence_diagrams/pngs/lamoom_call.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LamoomAI/lamoom-python/c0bb78c817bd05054b96f4f4720c2115a0963f96/docs/sequence_diagrams/pngs/lamoom_call.png -------------------------------------------------------------------------------- /docs/sequence_diagrams/pngs/lamoom_save_user_interactions.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LamoomAI/lamoom-python/c0bb78c817bd05054b96f4f4720c2115a0963f96/docs/sequence_diagrams/pngs/lamoom_save_user_interactions.png -------------------------------------------------------------------------------- /docs/sequence_diagrams/pngs/lamoom_test_creation.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LamoomAI/lamoom-python/c0bb78c817bd05054b96f4f4720c2115a0963f96/docs/sequence_diagrams/pngs/lamoom_test_creation.png -------------------------------------------------------------------------------- /docs/sequence_diagrams/save_user_interactions.md: -------------------------------------------------------------------------------- 1 | sequenceDiagram 2 | title Lamoom Logging Flow: Recording user interactions with prompts 3 | participant Lamoom 4 | participant SaveWorker 5 | participant LamoomService 6 | 7 | Note over Lamoom, LamoomService: Process of recording prompt execution logs 8 | Lamoom->>Lamoom: call(prompt_id, context, model) - Fetches prompt, calls LLM, gets response 9 | Note right of SaveWorker: Async worker for saving interactions to avoid blocking 10 | Lamoom->>SaveWorker: add_task(api_token, prompt_data, context, result, test_data) 11 | SaveWorker->>LamoomService: save_user_interaction(api_token, prompt_data, context, response) 12 | Note right of LamoomService: POST /lib/logs: Records interaction for:\n- Analytics\n- Debugging\n- Performance tracking\n- Cost monitoring 13 | Lamoom-->>Lamoom: Return AIResponse (without waiting for log completion) 14 | -------------------------------------------------------------------------------- /docs/sequence_diagrams/sequence_diagram_lamoom_call.md: -------------------------------------------------------------------------------- 1 | @startuml 2 | 3 | Note over Lamoom,LLM: Process of lamoom.call(prompt_id, context, model) 4 | 5 | Lamoom->>LibCache: get_prompt(prompt_id, version) 6 | alt Prompt is in cache or RECEIVE_PROMPT_FROM_SERVER disabled 7 | Lamoom-->>LibCache: Return cached prompt data 8 | end 9 | alt Cache miss or expired or new version of prompt is in the code 10 | Lamoom->>LamoomService: POST /lib/prompts (with currently active prompt data) 11 | 12 | Note right of LamoomService: Server checks if local prompt\nis the latest published version 13 | 14 | LamoomService-->>Lamoom: Response with prompt data and is_taken_globally flag 15 | 16 | Lamoom->>LibCache: Update cache with timestamp 17 | Note right of LibCache: Cache will be valid for 5 minutes\n(CACHE_PROMPT_FOR_EACH_SECONDS) 18 | end 19 | 20 | Note over Lamoom, LLM: Continue with AI model calling 21 | 22 | Lamoom-->>LLM: Call LLM w/ updated prompt and enriched context 23 | LLM ->> Lamoom: LLMResponse 24 | @enduml 25 | -------------------------------------------------------------------------------- /docs/sequence_diagrams/test_creation.md: -------------------------------------------------------------------------------- 1 | @startuml 2 | 3 | title Lamoom Test Creation Flow: Creating tests with ideal answers 4 | 5 | note over Lamoom,LamoomService: Process of creating tests for CI/CD validation 6 | 7 | alt Test creation via create_test method 8 | Lamoom->>LamoomService: create_test(prompt_id, context, ideal_answer) 9 | note right of LamoomService: Server creates test with:\n- Prompt ID\n- Test context\n- Ideal answer 10 | 11 | LamoomService-->>Lamoom: Return create test result 12 | end 13 | 14 | alt Test creation during normal prompt call with test_data 15 | Lamoom->>Lamoom: call(prompt_id, context, model, test_data={ideal_answer}), Fetches prompt, calls LLM, gets response 16 | 17 | Lamoom->>SaveWorker: add_task(api_token, prompt_data, context, result, test_data) 18 | SaveWorker->>LamoomService: create_test_with_ideal_answer 19 | note right of LamoomService: Server creates CI/CD test with:\n- Context\n- Prompt\n- Ideal answer 20 | Lamoom-->>Lamoom: Return AIResponse (without waiting for test creation) 21 | end 22 | 23 | @enduml -------------------------------------------------------------------------------- /docs/test_data/medical_questions_answers.csv: -------------------------------------------------------------------------------- 1 | question,answer_provided_by_human 2 | I have a headache that won't go away. What could be causing it?,"Headaches can have many causes, including stress, dehydration, lack of sleep, or more serious conditions like migraines or sinusitis. It's important to keep track of when the headache started, how long it lasts, and any other symptoms you have. If it's persistent or severe, you should see a doctor for a proper diagnosis." 3 | I've been feeling tired all the time lately. What might be wrong?,"Fatigue can be a symptom of various conditions, from anemia and thyroid disorders to depression or chronic fatigue syndrome. It's also common with poor sleep, lack of exercise, or a sedentary lifestyle. A doctor can help determine the underlying cause through blood tests and a physical examination." 4 | I have a cough that's been lingering for weeks. Should I be worried?,"A persistent cough can be a sign of several conditions, such as asthma, bronchitis, pneumonia, or even lung cancer. It's important to see a doctor if your cough lasts more than a few weeks, especially if it's accompanied by other symptoms like chest pain, shortness of breath, or coughing up blood." 5 | I've noticed a lump in my breast. Is it cancer?,"Finding a lump in your breast can be alarming, but not all lumps are cancerous. It could be a cyst, fibroadenoma, or something else. You should see a doctor for a proper evaluation, which might include a physical exam, mammogram, or biopsy." 6 | My child has a fever. When should I take them to the doctor?,"Fever is a common symptom in children and can be caused by infections like the flu, ear infections, or urinary tract infections. If your child has a fever of 100.4°F (38°C) or higher that doesn't respond to fever-reducing medication, or if they have other symptoms like rash, vomiting, or difficulty breathing, you should see a doctor." 7 | I'm experiencing heartburn frequently. What can I do about it?,"Heartburn is often caused by acid reflux, which can be managed with over-the-counter antacids or lifestyle changes like avoiding spicy foods, eating smaller meals, and not lying down after eating. If it's persistent, you might have gastroesophagal reflux disease (GERD), and you should consult a doctor for proper treatment." 8 | I have pain in my lower back. What could be the cause?,"Lower back pain can be due to muscle strain, poor posture, arthritis, or more serious conditions like herniated discs or spinal stenosis. It's important to rest, apply heat or cold, and do stretching exercises. If the pain is severe or persists, see a doctor for further evaluation." 9 | I've been having trouble sleeping. Any suggestions?,"Insomnia can be caused by stress, anxiety, depression, or medical conditions like sleep apnea. Establishing a regular sleep schedule, avoiding caffeine and alcohol before bed, and creating a relaxing bedtime routine can help. If insomnia persists, consult a doctor for possible treatments." 10 | I've gained weight recently without changing my diet or exercise. What's going on?,"Unexplained weight gain can be due to fluid retention, hormonal imbalances, or certain medications. It's important to monitor your diet and exercise habits and see if there are any other symptoms. A doctor can check for conditions like hypothyroidism or heart failure." 11 | I have a rash that's itchy and won't go away. What should I do?,"Rashes can be caused by allergies, infections, or skin conditions like eczema or psoriasis. Avoid scratching and try over-the-counter hydrocortisone cream. If the rash persists or is accompanied by fever or other symptoms, see a doctor for a proper diagnosis." 12 | I'm always thirsty and urinating frequently. Could I have diabetes?,"Excessive thirst and frequent urination can be symptoms of diabetes, but they can also be caused by other conditions like urinary tract infections or kidney problems. A doctor can perform blood tests to check your blood sugar levels and determine if you have diabetes." 13 | "My joints are stiff and painful, especially in the morning. Is it arthritis?","Joint stiffness and pain can be symptoms of arthritis, but they can also be caused by other conditions like gout or Lyme disease. A doctor can evaluate your symptoms, perform a physical exam, and may order blood tests or X-rays to diagnose the cause." 14 | I've been feeling down and have no energy. Could I be depressed?,"Depression can manifest as persistent sadness, lack of energy, and loss of interest in activities. It's important to talk to a doctor or a mental health professional for an accurate diagnosis and appropriate treatment, which might include therapy or medication." 15 | I have a sore throat and it's hard to swallow. What should I do?,"A sore throat can be due to a viral or bacterial infection, allergies, or acid reflux. Gargling with salt water, drinking plenty of fluids, and taking over-the-counter pain relievers can help. If the pain persists or is accompanied by fever or swollen glands, see a doctor." 16 | I've noticed changes in my bowel habits. When should I be concerned?,"Changes in bowel habits, such as diarrhea, constipation, or blood in the stool, can be signs of gastrointestinal disorders, infections, or even colon cancer. It's important to see a doctor if these changes last more than a few days or are accompanied by other symptoms like abdominal pain or weight loss." 17 | I have a persistent cough that's worse at night. Could it be asthma?,"A cough that's worse at night can be a symptom of asthma, especially if it's accompanied by wheezing or shortness of breath. Other causes could be postnasal drip or GERD. A doctor can perform tests like spirometry to diagnose asthma." 18 | I'm experiencing dizziness and lightheadedness. What could be causing this?,"Dizziness can be caused by dehydration, low blood sugar, inner ear problems, or more serious conditions like heart disease or stroke. It's important to see a doctor if dizziness is frequent or severe, or if it's accompanied by other symptoms like chest pain or slurred speech." 19 | I have a mole that's changed color and shape. Should I get it checked?,"Any changes in a mole, such as color, shape, size, or if it starts to itch or bleed, could be signs of skin cancer. You should see a dermatologist for a skin exam and possible biopsy." 20 | I've been having trouble remembering things lately. Is it normal aging or something else?,"Forgetfulness can be a normal part of aging, but it can also be a sign of conditions like Alzheimer's disease or other forms of dementia. If memory loss is affecting your daily life, see a doctor for an evaluation." 21 | I have pain in my chest that comes and goes. Could it be my heart?,"Chest pain can be a sign of a heart attack, angina, or other heart conditions, but it can also be caused by gastrointestinal issues like heartburn or muscle strain. If you experience chest pain, especially if it's accompanied by shortness of breath, sweating, or radiating pain, seek medical attention immediately." 22 | -------------------------------------------------------------------------------- /lamoom/__init__.py: -------------------------------------------------------------------------------- 1 | from lamoom.responses import AIResponse 2 | from lamoom.settings import * 3 | from lamoom.prompt.lamoom import Lamoom 4 | from lamoom.ai_models import behaviour 5 | from lamoom.prompt.prompt import Prompt 6 | from lamoom.prompt.prompt import Prompt as PipePrompt 7 | from lamoom.ai_models.attempt_to_call import AttemptToCall 8 | from lamoom.ai_models.openai.openai_models import ( 9 | C_128K, 10 | C_4K, 11 | C_16K, 12 | C_32K, 13 | OpenAIModel, 14 | ) 15 | from lamoom.ai_models.openai.azure_models import AzureAIModel 16 | from lamoom.ai_models.claude.claude_model import ClaudeAIModel 17 | from lamoom.responses import AIResponse 18 | from lamoom.ai_models.openai.responses import OpenAIResponse 19 | from lamoom.ai_models.behaviour import AIModelsBehaviour, PromptAttempts 20 | -------------------------------------------------------------------------------- /lamoom/ai_models/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LamoomAI/lamoom-python/c0bb78c817bd05054b96f4f4720c2115a0963f96/lamoom/ai_models/__init__.py -------------------------------------------------------------------------------- /lamoom/ai_models/ai_model.py: -------------------------------------------------------------------------------- 1 | import typing as t 2 | from dataclasses import dataclass 3 | from enum import Enum 4 | 5 | from _decimal import Decimal 6 | 7 | from lamoom.responses import AIResponse 8 | 9 | 10 | class AI_MODELS_PROVIDER(Enum): 11 | OPENAI = "openai" 12 | AZURE = "azure" 13 | CLAUDE = "claude" 14 | GEMINI = "gemini" 15 | NEBIUS = "nebius" 16 | CUSTOM = "custom" 17 | 18 | 19 | @dataclass(kw_only=True) 20 | class AIModel: 21 | tiktoken_encoding: t.Optional[str] = "cl100k_base" 22 | provider: AI_MODELS_PROVIDER = None 23 | support_functions: bool = False 24 | 25 | @property 26 | def name(self) -> str: 27 | return "undefined_aimodel" 28 | 29 | def _decimal(self, value) -> Decimal: 30 | return Decimal(value).quantize(Decimal(".00001")) 31 | 32 | def get_params(self) -> t.Dict[str, t.Any]: 33 | return {} 34 | 35 | def call(self, *args, **kwargs) -> AIResponse: 36 | raise NotImplementedError 37 | 38 | def get_metrics_data(self): 39 | return {} 40 | -------------------------------------------------------------------------------- /lamoom/ai_models/attempt_to_call.py: -------------------------------------------------------------------------------- 1 | import typing as t 2 | from dataclasses import dataclass 3 | 4 | from lamoom.ai_models.ai_model import AIModel 5 | 6 | 7 | @dataclass 8 | class AttemptToCall: 9 | ai_model: AIModel 10 | weight: int = 1 # from 1 to 100, the higher weight the more often it will be called 11 | # if you wish to limit functions that can be used, or to turn off calling openai functions for this attempt: 12 | # [] - if empty list of functions, functions are not supported for that call 13 | # None - if None, no limitations on functions 14 | # ['function1', 'function2'] - if list of functions, only those functions will be called 15 | functions: t.List[str] = None 16 | attempt_number: int = 1 17 | 18 | def __post_init__(self): 19 | self.id = ( 20 | f"{self.ai_model.name}" 21 | f"-n{self.attempt_number}-" 22 | f"{self.ai_model.provider.value}" 23 | ) 24 | 25 | def __str__(self) -> str: 26 | return self.id 27 | 28 | def params(self) -> t.Dict[str, t.Any]: 29 | self.ai_model.get_params() 30 | 31 | def get_functions(self) -> t.List[str]: 32 | # empty list - functions are not supported 33 | if not self.ai_model.support_functions: 34 | return [] 35 | # None - no limitations on functions 36 | if self.functions is None: 37 | return None 38 | return self.functions 39 | 40 | def model_max_tokens(self) -> int: 41 | return self.ai_model.max_tokens 42 | 43 | def tiktoken_encoding(self) -> str: 44 | return self.ai_model.tiktoken_encoding 45 | -------------------------------------------------------------------------------- /lamoom/ai_models/behaviour.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import random 3 | import typing as t 4 | from copy import copy 5 | from dataclasses import dataclass 6 | 7 | from lamoom.ai_models.attempt_to_call import AttemptToCall 8 | from lamoom.exceptions import BehaviourIsNotDefined 9 | 10 | logger = logging.getLogger(__name__) 11 | 12 | 13 | @dataclass 14 | class AIModelsBehaviour: 15 | attempt: AttemptToCall 16 | fallback_attempts: list[AttemptToCall] = None 17 | 18 | 19 | @dataclass 20 | class PromptAttempts: 21 | ai_models_behaviour: AIModelsBehaviour 22 | current_attempt: AttemptToCall = None 23 | 24 | def initialize_attempt(self): 25 | if self.current_attempt is None: 26 | self.current_attempt = self.ai_models_behaviour.attempt 27 | self.fallback_index = 0 # Start fallback index at 0 28 | return self.current_attempt 29 | elif self.ai_models_behaviour.fallback_attempts: 30 | if self.fallback_index < len(self.ai_models_behaviour.fallback_attempts): 31 | self.current_attempt = self.ai_models_behaviour.fallback_attempts[self.fallback_index] 32 | self.fallback_index += 1 33 | return self.current_attempt 34 | else: 35 | self.current_attempt = None # No more fallback attempts left 36 | return None 37 | 38 | def __str__(self) -> str: 39 | return f"Current attempt {self.current_attempt} from {len(self.ai_models_behaviour.attempts)}" 40 | -------------------------------------------------------------------------------- /lamoom/ai_models/claude/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LamoomAI/lamoom-python/c0bb78c817bd05054b96f4f4720c2115a0963f96/lamoom/ai_models/claude/__init__.py -------------------------------------------------------------------------------- /lamoom/ai_models/claude/claude_model.py: -------------------------------------------------------------------------------- 1 | from lamoom.ai_models.ai_model import AI_MODELS_PROVIDER, AIModel 2 | import logging 3 | 4 | from lamoom.ai_models.constants import C_200K, C_4K 5 | from lamoom.responses import AIResponse 6 | from decimal import Decimal 7 | from enum import Enum 8 | 9 | import typing as t 10 | from dataclasses import dataclass 11 | 12 | from lamoom.ai_models.claude.responses import ClaudeAIReponse 13 | from lamoom.ai_models.claude.constants import HAIKU, SONNET, OPUS 14 | from lamoom.ai_models.utils import get_common_args 15 | 16 | from openai.types.chat import ChatCompletionMessage as Message 17 | from lamoom.responses import Prompt 18 | from lamoom.exceptions import RetryableCustomError, ConnectionLostError 19 | import anthropic 20 | 21 | logger = logging.getLogger(__name__) 22 | 23 | 24 | class FamilyModel(Enum): 25 | haiku = "Claude 3 Haiku" 26 | sonnet = "Claude 3 Sonnet" 27 | opus = "Claude 3 Opus" 28 | 29 | 30 | @dataclass(kw_only=True) 31 | class ClaudeAIModel(AIModel): 32 | model: str 33 | max_tokens: int = C_4K 34 | api_key: str = None 35 | provider: AI_MODELS_PROVIDER = AI_MODELS_PROVIDER.CLAUDE 36 | family: str = None 37 | 38 | def __post_init__(self): 39 | if HAIKU in self.model: 40 | self.family = FamilyModel.haiku.value 41 | elif SONNET in self.model: 42 | self.family = FamilyModel.sonnet.value 43 | elif OPUS in self.model: 44 | self.family = FamilyModel.opus.value 45 | else: 46 | logger.info( 47 | f"Unknown family for {self.model}. Please add it obviously. Setting as Claude 3 Opus" 48 | ) 49 | self.family = FamilyModel.opus.value 50 | 51 | logger.debug(f"Initialized ClaudeAIModel: {self}") 52 | 53 | def get_client(self, client_secrets: dict) -> anthropic.Anthropic: 54 | return anthropic.Anthropic(api_key=client_secrets.get("api_key")) 55 | 56 | def uny_all_messages_with_same_role(self, messages: t.List[dict]) -> t.List[dict]: 57 | result = [] 58 | last_role = None 59 | for message in messages: 60 | if message.get("role") == "system": 61 | message["role"] = "user" 62 | if last_role != message.get("role"): 63 | result.append(message) 64 | last_role = message.get("role") 65 | else: 66 | result[-1]["content"] += message.get("content") 67 | return result 68 | 69 | 70 | def call(self, messages: t.List[dict], max_tokens: int, client_secrets: dict = {}, **kwargs) -> AIResponse: 71 | max_tokens = min(max_tokens, self.max_tokens) 72 | 73 | common_args = get_common_args(max_tokens) 74 | kwargs = { 75 | **common_args, 76 | **self.get_params(), 77 | **kwargs, 78 | } 79 | messages = self.uny_all_messages_with_same_role(messages) 80 | 81 | logger.debug( 82 | f"Calling {messages} with max_tokens {max_tokens} and kwargs {kwargs}" 83 | ) 84 | client = self.get_client(client_secrets) 85 | 86 | stream_function = kwargs.get("stream_function") 87 | check_connection = kwargs.get("check_connection") 88 | stream_params = kwargs.get("stream_params") 89 | 90 | content = "" 91 | 92 | try: 93 | if kwargs.get("stream"): 94 | with client.messages.stream( 95 | model=self.model, max_tokens=max_tokens, messages=messages 96 | ) as stream: 97 | idx = 0 98 | for text in stream.text_stream: 99 | if idx % 5 == 0: 100 | if not check_connection(**stream_params): 101 | raise ConnectionLostError("Connection was lost!") 102 | 103 | stream_function(text, **stream_params) 104 | content += text 105 | idx += 1 106 | else: 107 | response = client.messages.create( 108 | model=self.model, max_tokens=max_tokens, messages=messages 109 | ) 110 | content = response.content[0].text 111 | return ClaudeAIReponse( 112 | message=Message(content=content, role="assistant"), 113 | content=content, 114 | prompt=Prompt( 115 | messages=kwargs.get("messages"), 116 | functions=kwargs.get("tools"), 117 | max_tokens=max_tokens, 118 | temperature=kwargs.get("temperature"), 119 | top_p=kwargs.get("top_p"), 120 | ), 121 | ) 122 | except Exception as e: 123 | logger.exception("[CLAUDEAI] failed to handle chat stream", exc_info=e) 124 | raise RetryableCustomError(f"Claude AI call failed!") 125 | 126 | @property 127 | def name(self) -> str: 128 | return self.model 129 | 130 | def get_params(self) -> t.Dict[str, t.Any]: 131 | return { 132 | "model": self.model, 133 | "max_tokens": self.max_tokens, 134 | } 135 | 136 | def get_metrics_data(self) -> t.Dict[str, t.Any]: 137 | return { 138 | "model": self.model, 139 | "max_tokens": self.max_tokens, 140 | } 141 | -------------------------------------------------------------------------------- /lamoom/ai_models/claude/constants.py: -------------------------------------------------------------------------------- 1 | HAIKU = "haiku" 2 | SONNET = "sonnet" 3 | OPUS = "opus" 4 | -------------------------------------------------------------------------------- /lamoom/ai_models/claude/responses.py: -------------------------------------------------------------------------------- 1 | from lamoom.responses import AIResponse 2 | from dataclasses import dataclass 3 | from openai.types.chat import ChatCompletionMessage as Message 4 | 5 | 6 | @dataclass(kw_only=True) 7 | class ClaudeAIReponse(AIResponse): 8 | message: Message = None 9 | 10 | def get_message_str(self) -> str: 11 | return self.message.model_dump_json(indent=2) 12 | 13 | def __str__(self) -> str: 14 | result = ( 15 | f"finish_reason: {self.finish_reason}\n" 16 | f"message: {self.get_message_str()}\n" 17 | ) 18 | return result 19 | -------------------------------------------------------------------------------- /lamoom/ai_models/constants.py: -------------------------------------------------------------------------------- 1 | C_4K = 4096 2 | C_8K = 8192 3 | C_16K = 16384 4 | C_32K = 32768 5 | 6 | C_128K = 128_000 7 | C_200K = 200_000 8 | C_1M = 1_000_000 -------------------------------------------------------------------------------- /lamoom/ai_models/openai/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LamoomAI/lamoom-python/c0bb78c817bd05054b96f4f4720c2115a0963f96/lamoom/ai_models/openai/__init__.py -------------------------------------------------------------------------------- /lamoom/ai_models/openai/azure_models.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import typing as t 3 | from dataclasses import dataclass 4 | 5 | from openai import AzureOpenAI 6 | 7 | from lamoom.ai_models.ai_model import AI_MODELS_PROVIDER 8 | from lamoom.ai_models.openai.openai_models import FamilyModel, OpenAIModel 9 | 10 | logger = logging.getLogger(__name__) 11 | 12 | 13 | @dataclass(kw_only=True) 14 | class AzureAIModel(OpenAIModel): 15 | realm: t.Optional[str] 16 | deployment_id: t.Optional[str] 17 | provider: AI_MODELS_PROVIDER = AI_MODELS_PROVIDER.AZURE 18 | model: t.Optional[str] = None 19 | 20 | def __str__(self) -> str: 21 | return f"{self.realm}-{self.deployment_id}-{self.family}" 22 | 23 | def __post_init__(self): 24 | if not self.family: 25 | if self.deployment_id.startswith("davinci"): 26 | self.family = FamilyModel.instruct_gpt.value 27 | elif self.deployment_id.startswith(("gpt3", "gpt-3")): 28 | self.family = FamilyModel.chat.value 29 | elif self.deployment_id.startswith("gpt-4o-mini"): 30 | self.family = FamilyModel.gpt4o_mini.value 31 | elif self.deployment_id.startswith("gpt-4o"): 32 | self.family = FamilyModel.gpt4o.value 33 | elif self.deployment_id.startswith(("gpt4", "gpt-4", "gpt")): 34 | self.family = FamilyModel.gpt4.value 35 | else: 36 | logger.info( 37 | f"Unknown family for {self.deployment_id}. Please add it obviously. Setting as GPT4" 38 | ) 39 | self.family = FamilyModel.gpt4.value 40 | logger.debug(f"Initialized AzureAIModel: {self}") 41 | 42 | @property 43 | def name(self) -> str: 44 | return f"{self.deployment_id}" 45 | 46 | def get_params(self) -> t.Dict[str, t.Any]: 47 | return { 48 | "model": self.deployment_id, 49 | } 50 | 51 | def get_client(self, client_secrets: dict = {}): 52 | realm_data = client_secrets.get(self.realm) 53 | if not realm_data: 54 | raise ValueError(f"Realm data for {self.realm} not found in client_secrets") 55 | return AzureOpenAI( 56 | api_version=realm_data.get("api_version", "2023-07-01-preview"), 57 | azure_endpoint=realm_data["azure_endpoint"], 58 | api_key=realm_data["api_key"], 59 | ) 60 | 61 | def get_metrics_data(self): 62 | return { 63 | "realm": self.realm, 64 | "deployment_id": self.deployment_id, 65 | "family": self.family, 66 | "provider": self.provider.value, 67 | } 68 | -------------------------------------------------------------------------------- /lamoom/ai_models/openai/exceptions.py: -------------------------------------------------------------------------------- 1 | from lamoom.exceptions import LamoomError, RetryableCustomError 2 | 3 | 4 | class OpenAIChunkedEncodingError(RetryableCustomError): 5 | pass 6 | 7 | 8 | class OpenAITimeoutError(RetryableCustomError): 9 | pass 10 | 11 | 12 | class OpenAIResponseWasFilteredError(RetryableCustomError): 13 | pass 14 | 15 | 16 | class OpenAIAuthenticationError(RetryableCustomError): 17 | pass 18 | 19 | 20 | class OpenAIInternalError(RetryableCustomError): 21 | pass 22 | 23 | 24 | class OpenAiRateLimitError(RetryableCustomError): 25 | pass 26 | 27 | 28 | class OpenAiPermissionDeniedError(RetryableCustomError): 29 | pass 30 | 31 | 32 | class OpenAIUnknownError(RetryableCustomError): 33 | pass 34 | 35 | 36 | ### Non-retryable Errors ### 37 | class OpenAIInvalidRequestError(LamoomError): 38 | pass 39 | 40 | 41 | class OpenAIBadRequestError(LamoomError): 42 | pass 43 | 44 | 45 | class ConnectionCheckError(LamoomError): 46 | pass 47 | -------------------------------------------------------------------------------- /lamoom/ai_models/openai/openai_models.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import typing as t 3 | from dataclasses import dataclass 4 | from decimal import Decimal 5 | from enum import Enum 6 | 7 | from openai import OpenAI 8 | 9 | from lamoom.ai_models.ai_model import AI_MODELS_PROVIDER, AIModel 10 | from lamoom.ai_models.constants import C_128K, C_16K, C_32K, C_4K 11 | from lamoom.ai_models.openai.responses import OpenAIResponse 12 | from lamoom.ai_models.utils import get_common_args 13 | from lamoom.exceptions import ConnectionLostError 14 | 15 | from openai.types.chat import ChatCompletionMessage as Message 16 | from lamoom.responses import Prompt 17 | 18 | from .utils import raise_openai_exception 19 | 20 | M_DAVINCI = "davinci" 21 | 22 | logger = logging.getLogger(__name__) 23 | 24 | 25 | class FamilyModel(Enum): 26 | chat = "GPT-3.5" 27 | gpt4 = "GPT-4" 28 | gpt4o = "GPT-4o" 29 | gpt4o_mini = "GPT-4o-mini" 30 | instruct_gpt = "InstructGPT" 31 | 32 | BASE_URL_MAPPING = { 33 | 'gemini': "https://generativelanguage.googleapis.com/v1beta/openai/", 34 | 'nebius': 'https://api.studio.nebius.ai/v1/' 35 | } 36 | 37 | 38 | @dataclass(kw_only=True) 39 | class OpenAIModel(AIModel): 40 | model: t.Optional[str] 41 | max_tokens: int = C_16K 42 | support_functions: bool = False 43 | provider: AI_MODELS_PROVIDER = AI_MODELS_PROVIDER.OPENAI 44 | family: str = None 45 | max_sample_budget: int = C_4K 46 | base_url: str = None 47 | 48 | def __str__(self) -> str: 49 | return f"openai-{self.model}-{self.family}" 50 | 51 | def __post_init__(self): 52 | if self.model.startswith("davinci"): 53 | self.family = FamilyModel.instruct_gpt.value 54 | elif self.model.startswith("gpt-3"): 55 | self.family = FamilyModel.chat.value 56 | elif self.model.startswith("gpt-4o-mini"): 57 | self.family = FamilyModel.gpt4o_mini.value 58 | elif self.model.startswith("gpt-4o"): 59 | self.family = FamilyModel.gpt4o.value 60 | elif self.model.startswith(("gpt4", "gpt-4", "gpt")): 61 | self.family = FamilyModel.gpt4.value 62 | else: 63 | logger.info( 64 | f"Unknown family for {self.model}. Please add it obviously. Setting as GPT4" 65 | ) 66 | self.family = FamilyModel.gpt4.value 67 | logger.debug(f"Initialized OpenAIModel: {self}") 68 | 69 | @property 70 | def name(self) -> str: 71 | return self.model 72 | 73 | def get_params(self) -> t.Dict[str, t.Any]: 74 | return { 75 | "model": self.model, 76 | } 77 | 78 | def get_base_url(self) -> str | None: 79 | return BASE_URL_MAPPING.get(self.provider.value, None) 80 | 81 | def get_metrics_data(self): 82 | return { 83 | "model": self.model, 84 | "family": self.family, 85 | "provider": self.provider.value, 86 | "base_url": self.get_base_url() if self.base_url is None else self.base_url 87 | } 88 | 89 | def call( 90 | self, 91 | messages, 92 | max_tokens, 93 | stream_function: t.Callable = None, 94 | check_connection: t.Callable = None, 95 | stream_params: dict = {}, 96 | client_secrets: dict = {}, 97 | **kwargs, 98 | ) -> OpenAIResponse: 99 | logger.debug( 100 | f"Calling {messages} with max_tokens {max_tokens} and kwargs {kwargs}" 101 | ) 102 | if self.family in [ 103 | FamilyModel.chat.value, 104 | FamilyModel.gpt4.value, 105 | FamilyModel.gpt4o.value, 106 | FamilyModel.gpt4o_mini.value, 107 | ]: 108 | return self.call_chat_completion( 109 | messages, 110 | max_tokens, 111 | stream_function=stream_function, 112 | check_connection=check_connection, 113 | stream_params=stream_params, 114 | client_secrets=client_secrets, 115 | **kwargs, 116 | ) 117 | raise NotImplementedError(f"Openai family {self.family} is not implemented") 118 | 119 | def get_client(self, client_secrets: dict = {}): 120 | return OpenAI( 121 | organization=client_secrets.get("organization", None), 122 | api_key=client_secrets["api_key"], 123 | base_url=self.get_base_url() if self.base_url is None else self.base_url 124 | ) 125 | 126 | def call_chat_completion( 127 | self, 128 | messages: t.List[t.Dict[str, str]], 129 | max_tokens: t.Optional[int], 130 | functions: t.List[t.Dict[str, str]] = [], 131 | stream_function: t.Callable = None, 132 | check_connection: t.Callable = None, 133 | stream_params: dict = {}, 134 | client_secrets: dict = {}, 135 | **kwargs, 136 | ) -> OpenAIResponse: 137 | 138 | kwargs = { 139 | **{ 140 | "messages": messages, 141 | }, 142 | **self.get_params(), 143 | **kwargs, 144 | } 145 | if functions: 146 | kwargs["tools"] = functions 147 | try: 148 | client = self.get_client(client_secrets) 149 | result = client.chat.completions.create( 150 | **kwargs, 151 | ) 152 | 153 | if kwargs.get("stream"): 154 | return OpenAIStreamResponse( 155 | stream_function=stream_function, 156 | check_connection=check_connection, 157 | stream_params=stream_params, 158 | original_result=result, 159 | prompt=Prompt( 160 | messages=kwargs.get("messages"), 161 | functions=kwargs.get("tools"), 162 | max_tokens=max_tokens, 163 | temperature=kwargs.get("temperature"), 164 | top_p=kwargs.get("top_p"), 165 | ), 166 | ).stream() 167 | logger.debug(f"Result: {result.choices[0]}") 168 | return OpenAIResponse( 169 | finish_reason=result.choices[0].finish_reason, 170 | message=result.choices[0].message, 171 | content=result.choices[0].message.content, 172 | original_result=result, 173 | prompt=Prompt( 174 | messages=kwargs.get("messages"), 175 | functions=kwargs.get("tools"), 176 | max_tokens=max_tokens, 177 | temperature=kwargs.get("temperature"), 178 | top_p=kwargs.get("top_p"), 179 | ), 180 | ) 181 | except Exception as e: 182 | logger.exception("[OPENAI] failed to handle chat stream", exc_info=e) 183 | raise_openai_exception(e) 184 | 185 | 186 | @dataclass(kw_only=True) 187 | class OpenAIStreamResponse(OpenAIResponse): 188 | stream_function: t.Callable 189 | check_connection: t.Callable 190 | stream_params: dict 191 | 192 | def process_message(self, text: str, idx: int): 193 | if idx % 5 == 0: 194 | if not self.check_connection(**self.stream_params): 195 | raise ConnectionLostError("Connection was lost!") 196 | if not text: 197 | return 198 | self.stream_function(text, **self.stream_params) 199 | 200 | def stream(self): 201 | content = "" 202 | for i, data in enumerate(self.original_result): 203 | if not data.choices: 204 | continue 205 | choice = data.choices[0] 206 | if choice.delta: 207 | content += choice.delta.content or "" 208 | self.process_message(choice.delta.content, i) 209 | self.message = Message( 210 | content=content, 211 | role="assistant", 212 | ) 213 | return self 214 | -------------------------------------------------------------------------------- /lamoom/ai_models/openai/responses.py: -------------------------------------------------------------------------------- 1 | import json 2 | import logging 3 | import typing as t 4 | from dataclasses import dataclass 5 | 6 | from openai.types.chat import ChatCompletionMessage as Message 7 | from openai.types.chat import ChatCompletionMessageToolCall as ToolCall 8 | 9 | from lamoom.responses import AIResponse 10 | 11 | FINISH_REASON_LENGTH = "length" 12 | FINISH_REASON_ERROR = "error" 13 | FINISH_REASON_FINISH = "stop" 14 | FINISH_REASON_TOOL_CALLS = "tool_calls" 15 | 16 | logger = logging.getLogger(__name__) 17 | 18 | 19 | @dataclass(kw_only=True) 20 | class OpenAIResponse(AIResponse): 21 | message: Message = None 22 | exception: t.Optional[Exception] = None 23 | 24 | @property 25 | def response(self) -> str: 26 | return self.content or self.message.content 27 | 28 | def is_function(self) -> bool: 29 | return self.finish_reason == FINISH_REASON_TOOL_CALLS 30 | 31 | @property 32 | def tool_calls(self) -> t.List[ToolCall]: 33 | return self.message.tool_calls 34 | 35 | def get_function_name(self, tool_call: ToolCall) -> t.Optional[str]: 36 | if tool_call.type != "function": 37 | logger.error(f"function.type is not function: {tool_call.type}") 38 | return None 39 | return tool_call.function.name 40 | 41 | def get_function_args(self, tool_call: ToolCall) -> t.Dict[str, t.Any]: 42 | if not self.is_function() or not tool_call.function: 43 | return {} 44 | arguments = tool_call.function.arguments 45 | try: 46 | return json.loads(arguments) 47 | except json.JSONDecodeError as e: 48 | logger.debug("Failed to parse function arguments", exc_info=e) 49 | return {} 50 | 51 | def is_reached_limit(self) -> bool: 52 | return self.finish_reason == FINISH_REASON_LENGTH 53 | 54 | def to_dict(self) -> t.Dict[str, str]: 55 | return { 56 | "finish_reason": self.finish_reason, 57 | "message": self.message.model_dump_json(indent=2), 58 | } 59 | 60 | def get_message_str(self) -> str: 61 | return self.message.model_dump_json(indent=2) 62 | 63 | def __str__(self) -> str: 64 | result = ( 65 | f"finish_reason: {self.finish_reason}\n" 66 | f"message: {self.get_message_str()}\n" 67 | ) 68 | return result 69 | -------------------------------------------------------------------------------- /lamoom/ai_models/openai/utils.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import requests 3 | 4 | import openai 5 | 6 | from lamoom.ai_models.openai.exceptions import ( 7 | OpenAIAuthenticationError, 8 | OpenAIBadRequestError, 9 | OpenAIChunkedEncodingError, 10 | OpenAIInternalError, 11 | OpenAIInvalidRequestError, 12 | OpenAiPermissionDeniedError, 13 | OpenAiRateLimitError, 14 | OpenAIResponseWasFilteredError, 15 | OpenAITimeoutError, 16 | OpenAIUnknownError, 17 | ConnectionCheckError, 18 | ) 19 | 20 | logger = logging.getLogger(__name__) 21 | 22 | 23 | def raise_openai_exception( 24 | exc: Exception, 25 | ) -> None: 26 | if isinstance(exc, requests.exceptions.ChunkedEncodingError): 27 | raise OpenAIChunkedEncodingError() 28 | 29 | if isinstance(exc, openai.APITimeoutError): 30 | raise OpenAITimeoutError() 31 | 32 | if isinstance(exc, openai.BadRequestError): 33 | if "response was filtered" in str(exc): 34 | raise OpenAIResponseWasFilteredError() 35 | if "Too many inputs" in str(exc): 36 | raise OpenAiRateLimitError() 37 | raise OpenAIInvalidRequestError() 38 | if isinstance(exc, openai.RateLimitError): 39 | raise OpenAiRateLimitError() 40 | 41 | if isinstance(exc, openai.AuthenticationError): 42 | raise OpenAIAuthenticationError() 43 | 44 | if isinstance(exc, openai.InternalServerError): 45 | raise OpenAIInternalError() 46 | 47 | if isinstance(exc, openai.PermissionDeniedError): 48 | raise OpenAiPermissionDeniedError() 49 | 50 | if isinstance(exc, openai.APIStatusError): 51 | raise OpenAIBadRequestError() 52 | 53 | if isinstance(exc, ConnectionError): 54 | raise ConnectionCheckError("websocket connection was lost") 55 | 56 | logger.error( 57 | "Unknown OPENAI error, please add it in raise_openai_rate_limit_exception", 58 | exc_info=exc, 59 | ) 60 | raise OpenAIUnknownError() 61 | -------------------------------------------------------------------------------- /lamoom/ai_models/utils.py: -------------------------------------------------------------------------------- 1 | def get_common_args(max_tokens): 2 | return { 3 | "top_p": 1, 4 | "temperature": 0, 5 | "max_tokens": max_tokens, 6 | "stream": False, 7 | } 8 | -------------------------------------------------------------------------------- /lamoom/exceptions.py: -------------------------------------------------------------------------------- 1 | class LamoomError(Exception): 2 | pass 3 | 4 | 5 | class RetryableCustomError(LamoomError): 6 | pass 7 | 8 | 9 | class LamoomPromptIsnotFoundError(LamoomError): 10 | pass 11 | 12 | 13 | class BehaviourIsNotDefined(LamoomError): 14 | pass 15 | 16 | 17 | class ConnectionLostError(LamoomError): 18 | pass 19 | 20 | 21 | class ValueIsNotResolvedError(LamoomError): 22 | pass 23 | 24 | 25 | class NotEnoughBudgetError(LamoomError): 26 | pass 27 | 28 | 29 | class NotFoundPromptError(LamoomError): 30 | pass 31 | 32 | 33 | class ProviderNotFoundError(LamoomError): 34 | pass 35 | 36 | 37 | class NotParsedResponseException(LamoomError): 38 | pass 39 | 40 | class APITokenNotProvided(LamoomError): 41 | pass 42 | -------------------------------------------------------------------------------- /lamoom/prompt/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LamoomAI/lamoom-python/c0bb78c817bd05054b96f4f4720c2115a0963f96/lamoom/prompt/__init__.py -------------------------------------------------------------------------------- /lamoom/prompt/base_prompt.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import typing as t 3 | from collections import defaultdict 4 | from dataclasses import dataclass, field 5 | 6 | from lamoom.prompt.chat import ChatsEntity 7 | 8 | logger = logging.getLogger(__name__) 9 | 10 | 11 | @dataclass(kw_only=True) 12 | class BasePrompt: 13 | priorities: t.Dict[int, t.List[ChatsEntity]] = field( 14 | default_factory=lambda: defaultdict(list) 15 | ) 16 | chats: t.List[ChatsEntity] = field(default_factory=list) 17 | pipe: t.List[str] = field(default_factory=list) 18 | functions: t.List[dict] = None 19 | top_p: float = 0.0 20 | temperature: float = 0.0 21 | 22 | def get_params(self): 23 | return { 24 | "top_p": self.top_p, 25 | "temperature": self.temperature, 26 | } 27 | 28 | def add( 29 | self, 30 | content: str = "", 31 | role: str = "user", 32 | name: t.Optional[str] = None, 33 | tool_calls: t.Dict[str, str] = None, 34 | priority: int = 0, 35 | required: bool = False, 36 | is_multiple: bool = False, 37 | while_fits: bool = False, 38 | add_in_reverse_order: bool = False, 39 | in_one_message: bool = False, 40 | continue_if_doesnt_fit: bool = False, 41 | add_if_fitted_labels: t.List[str] = None, 42 | label: t.Optional[str] = None, 43 | presentation: t.Optional[str] = None, 44 | last_words: t.Optional[str] = None, 45 | ): 46 | if not isinstance(content, str): 47 | logger.warning(f"content is not string: {content}, assignig str of it") 48 | content = str(content) 49 | 50 | chat_value = ChatsEntity( 51 | role=role, 52 | content=(content or ""), 53 | name=name, 54 | tool_calls=tool_calls, 55 | priority=priority, 56 | required=required, 57 | is_multiple=is_multiple, 58 | while_fits=while_fits, 59 | add_in_reverse_order=add_in_reverse_order, 60 | in_one_message=in_one_message, 61 | continue_if_doesnt_fit=continue_if_doesnt_fit, 62 | add_if_fitted_labels=add_if_fitted_labels, 63 | label=label, 64 | presentation=presentation, 65 | last_words=last_words, 66 | ) 67 | self.chats.append(chat_value) 68 | self.priorities[priority].append(chat_value) 69 | self.pipe.append(chat_value._uuid) 70 | 71 | def add_function(self, function: dict): 72 | if not self.functions: 73 | self.functions = [] 74 | self.functions.append(function) 75 | -------------------------------------------------------------------------------- /lamoom/prompt/chat.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import typing as t 3 | import uuid 4 | from dataclasses import dataclass 5 | 6 | from lamoom.exceptions import ValueIsNotResolvedError 7 | from lamoom.utils import resolve 8 | 9 | logger = logging.getLogger(__name__) 10 | 11 | 12 | @dataclass 13 | class ValuesCost: 14 | values: t.List[str] 15 | cost: int 16 | 17 | 18 | class ChatMessage: 19 | role: str 20 | content: str 21 | name: t.Optional[str] = None 22 | tool_calls: t.Dict[str, str] 23 | ref_name: t.Optional[str] = None 24 | ref_value: t.Optional[str] = None 25 | 26 | def is_not_empty(self): 27 | return bool(self.content or self.tool_calls) 28 | 29 | def is_empty(self): 30 | return not self.is_not_empty() 31 | 32 | def not_tool_calls(self): 33 | return not (self.tool_calls) 34 | 35 | def __init__(self, **kwargs): 36 | self.role = kwargs.get("role", "user") 37 | self.content = kwargs["content"] 38 | self.name = kwargs.get("name") 39 | self.tool_calls = kwargs.get("tool_calls") or {} 40 | 41 | def to_dict(self): 42 | result = { 43 | "role": self.role, 44 | "content": self.content, 45 | } 46 | if self.name: 47 | result["name"] = self.name 48 | if self.tool_calls: 49 | result["tool_calls"] = self.tool_calls 50 | return result 51 | 52 | 53 | # can be multiple value 54 | @dataclass(kw_only=True) 55 | class ChatsEntity: 56 | content: str = "" 57 | role: str = "user" 58 | name: t.Optional[str] = None 59 | tool_calls: t.Dict[str, str] = None 60 | priority: int = 0 61 | required: bool = False 62 | is_multiple: bool = False 63 | while_fits: bool = False 64 | add_in_reverse_order: bool = False 65 | in_one_message: bool = False 66 | continue_if_doesnt_fit: bool = False 67 | add_if_fitted_labels: t.List[str] = None 68 | label: t.Optional[str] = None 69 | presentation: t.Optional[str] = None 70 | last_words: t.Optional[str] = None 71 | ref_name: t.Optional[str] = None 72 | ref_value: t.Optional[str] = None 73 | 74 | def __post_init__(self): 75 | self._uuid = uuid.uuid4().hex 76 | 77 | def resolve(self, context: t.Dict[str, t.Any]) -> t.List[ChatMessage]: 78 | result = [] 79 | content = self.content 80 | if self.is_multiple: 81 | # should be just one value like {messages} in prompt 82 | prompt_value = content.strip().replace("{", "").replace("}", "").strip() 83 | values = context.get(prompt_value, []) 84 | if not values: 85 | return [] 86 | if not isinstance(values, list): 87 | raise ValueIsNotResolvedError( 88 | f"Invalid value {values } for prompt {content}. Should be multiple" 89 | ) 90 | else: 91 | # verify that values are json list of ChatMessage 92 | try: 93 | result = [ 94 | ChatMessage(**({"content": c} if isinstance(c, str) else c)) 95 | for c in values 96 | ] 97 | except TypeError as e: 98 | raise ValueIsNotResolvedError( 99 | f"Invalid value { values } for prompt {content}. Error: {e}" 100 | ) 101 | return result 102 | 103 | content = resolve(content, context) 104 | if not content: 105 | return [] 106 | return [ 107 | ChatMessage( 108 | name=self.name, 109 | role=self.role, 110 | content=content, 111 | tool_calls=self.tool_calls, 112 | ref_name=self.ref_name, 113 | ref_value=self.ref_value, 114 | ) 115 | ] 116 | 117 | def get_values(self, context: t.Dict[str, str]) -> t.List[ChatMessage]: 118 | try: 119 | values = self.resolve(context) 120 | except Exception as e: 121 | logger.error( 122 | f"Error resolving prompt {self.content}, error: {e}", exc_info=True 123 | ) 124 | return [] 125 | return values 126 | 127 | def dump(self): 128 | data = { 129 | "content": self.content, 130 | "role": self.role, 131 | "name": self.name, 132 | "tool_calls": self.tool_calls, 133 | "priority": self.priority, 134 | "required": self.required, 135 | "is_multiple": self.is_multiple, 136 | "while_fits": self.while_fits, 137 | "add_in_reverse_order": self.add_in_reverse_order, 138 | "in_one_message": self.in_one_message, 139 | "continue_if_doesnt_fit": self.continue_if_doesnt_fit, 140 | "add_if_fitted_labels": self.add_if_fitted_labels, 141 | "label": self.label, 142 | "presentation": self.presentation, 143 | "last_words": self.last_words, 144 | "ref_name": self.ref_name, 145 | "ref_value": self.ref_value, 146 | } 147 | for k, v in list(data.items()): 148 | if v is None: 149 | del data[k] 150 | return data 151 | 152 | @classmethod 153 | def load(cls, data): 154 | return cls( 155 | content=data.get("content"), 156 | role=data.get("role"), 157 | name=data.get("name"), 158 | tool_calls=data.get("tool_calls"), 159 | priority=data.get("priority"), 160 | required=data.get("required"), 161 | is_multiple=data.get("is_multiple"), 162 | while_fits=data.get("while_fits"), 163 | add_in_reverse_order=data.get("add_in_reverse_order"), 164 | in_one_message=data.get("in_one_message"), 165 | continue_if_doesnt_fit=data.get("continue_if_doesnt_fit"), 166 | add_if_fitted_labels=data.get("add_if_fitted_labels"), 167 | label=data.get("label"), 168 | presentation=data.get("presentation"), 169 | last_words=data.get("last_words"), 170 | ref_name=data.get("ref_name"), 171 | ref_value=data.get("ref_value"), 172 | ) 173 | -------------------------------------------------------------------------------- /lamoom/prompt/lamoom.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import typing as t 3 | from dataclasses import dataclass 4 | from decimal import Decimal 5 | import requests 6 | import time 7 | from lamoom.settings import LAMOOM_API_URI 8 | from lamoom import Secrets, settings 9 | from lamoom.ai_models.ai_model import AI_MODELS_PROVIDER 10 | from lamoom.ai_models.attempt_to_call import AttemptToCall 11 | from lamoom.ai_models.behaviour import AIModelsBehaviour, PromptAttempts 12 | from lamoom.ai_models.openai.azure_models import AzureAIModel 13 | from lamoom.ai_models.claude.claude_model import ClaudeAIModel 14 | from lamoom.ai_models.openai.openai_models import OpenAIModel 15 | from lamoom.ai_models.constants import C_16K 16 | 17 | from lamoom.exceptions import ( 18 | LamoomPromptIsnotFoundError, 19 | RetryableCustomError 20 | ) 21 | from lamoom.services.SaveWorker import SaveWorker 22 | from lamoom.prompt.prompt import Prompt 23 | from lamoom.prompt.user_prompt import UserPrompt 24 | 25 | from lamoom.responses import AIResponse 26 | from lamoom.services.lamoom import LamoomService 27 | from lamoom.utils import current_timestamp_ms 28 | import json 29 | 30 | logger = logging.getLogger(__name__) 31 | 32 | 33 | @dataclass 34 | class Lamoom: 35 | api_token: str = None 36 | openai_key: str = None 37 | openai_org: str = None 38 | claude_key: str = None 39 | gemini_key: str = None 40 | azure_keys: t.Dict[str, str] = None 41 | nebius_key: str = None 42 | custom_key: str = None 43 | secrets: Secrets = None 44 | 45 | clients = {} 46 | 47 | def __post_init__(self): 48 | self.secrets = Secrets() 49 | if not self.azure_keys: 50 | if self.secrets.azure_keys: 51 | logger.debug(f"Using Azure keys from secrets") 52 | self.azure_keys = self.secrets.azure_keys 53 | else: 54 | logger.debug(f"Azure keys not found in secrets") 55 | if not self.api_token and self.secrets.API_TOKEN: 56 | logger.debug(f"Using API token from secrets") 57 | self.api_token = self.secrets.API_TOKEN 58 | if not self.openai_key and self.secrets.OPENAI_API_KEY: 59 | logger.debug(f"Using OpenAI API key from secrets") 60 | self.openai_key = self.secrets.OPENAI_API_KEY 61 | if not self.openai_org and self.secrets.OPENAI_ORG: 62 | logger.debug(f"Using OpenAI organization from secrets") 63 | self.openai_org = self.secrets.OPENAI_ORG 64 | if not self.gemini_key and self.secrets.GEMINI_API_KEY: 65 | logger.debug(f"Using Gemini API key from secrets") 66 | self.gemini_key = self.secrets.GEMINI_API_KEY 67 | if not self.claude_key and self.secrets.CLAUDE_API_KEY: 68 | logger.debug(f"Using Claude API key from secrets") 69 | self.claude_key = self.secrets.CLAUDE_API_KEY 70 | if not self.nebius_key and self.secrets.NEBIUS_API_KEY: 71 | logger.debug(f"Using Nebius API key from secrets") 72 | self.nebius_key = self.secrets.NEBIUS_API_KEY 73 | if not self.custom_key and self.secrets.CUSTOM_API_KEY: 74 | logger.debug(f"Using Custom API key from secrets") 75 | self.custom_key = self.secrets.CUSTOM_API_KEY 76 | self.service = LamoomService() 77 | if self.openai_key: 78 | self.clients[AI_MODELS_PROVIDER.OPENAI] = { 79 | "organization": self.openai_org, 80 | "api_key": self.openai_key, 81 | } 82 | if self.azure_keys: 83 | if not self.clients.get(AI_MODELS_PROVIDER.AZURE): 84 | self.clients[AI_MODELS_PROVIDER.AZURE] = {} 85 | for realm, key_data in self.azure_keys.items(): 86 | self.clients[AI_MODELS_PROVIDER.AZURE][realm] = { 87 | "api_version": key_data.get("api_version", "2023-07-01-preview"), 88 | "azure_endpoint": key_data["url"], 89 | "api_key": key_data["key"], 90 | } 91 | logger.debug(f"Initialized Azure client for {realm} {key_data['url']}") 92 | if self.claude_key: 93 | self.clients[AI_MODELS_PROVIDER.CLAUDE] = {"api_key": self.claude_key} 94 | if self.gemini_key: 95 | self.clients[AI_MODELS_PROVIDER.GEMINI] = {"api_key": self.gemini_key} 96 | if self.nebius_key: 97 | self.clients[AI_MODELS_PROVIDER.NEBIUS] = {"api_key": self.nebius_key} 98 | if self.custom_key: 99 | self.clients[AI_MODELS_PROVIDER.CUSTOM] = {"api_key": self.custom_key} 100 | self.worker = SaveWorker() 101 | 102 | def create_test( 103 | self, prompt_id: str, context: t.Dict[str, str], ideal_answer: str = None, model_name: str = None 104 | ): 105 | """ 106 | Create new test 107 | """ 108 | 109 | url = f"{LAMOOM_API_URI}/lib/tests?createTest" 110 | headers = {"Authorization": f"Token {self.api_token}"} 111 | if "ideal_answer" in context: 112 | ideal_answer = context["ideal_answer"] 113 | 114 | data = { 115 | "prompt_id": prompt_id, 116 | "ideal_answer": ideal_answer, 117 | "model_name": model_name, 118 | "test_context": context, 119 | } 120 | json_data = json.dumps(data) 121 | response = requests.post(url, headers=headers, data=json_data) 122 | 123 | if response.status_code == 200: 124 | return response.json() 125 | else: 126 | logger.error(response) 127 | 128 | def extract_provider_name(self, model: str, provider_url: str = None) -> dict: 129 | parts = model.split("/") 130 | 131 | if "azure" in parts[0].lower() and len(parts) == 3: 132 | model_provider, realm, model_name = parts 133 | return { 134 | 'provider': model_provider.lower(), 135 | 'model_name': model_name, 136 | 'realm': realm, 137 | 'base_url': None 138 | } 139 | elif "nebius" in parts[0].lower() and len(parts) == 3: 140 | model_provider = parts[0] 141 | model_name = f"{parts[1]}/{parts[2]}" 142 | return { 143 | 'provider': model_provider.lower(), 144 | 'model_name': model_name, 145 | 'realm': None, 146 | 'base_url': None 147 | } 148 | elif "custom" in parts[0].lower(): 149 | if len(parts) == 3: 150 | model_provider = parts[0] 151 | model_name = f"{parts[1]}/{parts[2]}" 152 | else: 153 | model_provider, model_name = parts 154 | return { 155 | 'provider': model_provider.lower(), 156 | 'model_name': model_name, 157 | 'realm': None, 158 | 'base_url': provider_url 159 | } 160 | else: 161 | model_provider, model_name = parts 162 | return { 163 | 'provider': model_provider.lower(), 164 | 'model_name': model_name, 165 | 'realm': None, 166 | 'base_url': None 167 | } 168 | 169 | def init_attempt(self, model_info: dict) -> AttemptToCall: 170 | provider = model_info['provider'] 171 | model_name = model_info['model_name'] 172 | 173 | if provider in [AI_MODELS_PROVIDER.OPENAI.value, 174 | AI_MODELS_PROVIDER.GEMINI.value, 175 | AI_MODELS_PROVIDER.NEBIUS.value]: 176 | return AttemptToCall( 177 | ai_model=OpenAIModel( 178 | provider=AI_MODELS_PROVIDER(provider), 179 | model=model_name, 180 | ), 181 | weight=100, 182 | ) 183 | elif provider == AI_MODELS_PROVIDER.CLAUDE.value: 184 | return AttemptToCall( 185 | ai_model=ClaudeAIModel( 186 | model=model_name, 187 | ), 188 | weight=100, 189 | ) 190 | elif provider == AI_MODELS_PROVIDER.CUSTOM.value: 191 | return AttemptToCall( 192 | ai_model=OpenAIModel( 193 | model=model_name, 194 | provider=AI_MODELS_PROVIDER.CUSTOM, 195 | base_url=model_info['base_url'] 196 | ), 197 | weight=100, 198 | ) 199 | else: 200 | return AttemptToCall( 201 | ai_model=AzureAIModel( 202 | realm=model_info['realm'], 203 | deployment_id=model_name, 204 | ), 205 | weight=100, 206 | ) 207 | 208 | def init_behavior(self, model: str, provider_url: str = None) -> AIModelsBehaviour: 209 | main_model_info = self.extract_provider_name(model, provider_url) 210 | 211 | main_attempt = self.init_attempt(main_model_info) 212 | 213 | fallback_attempts = [] 214 | for model in settings.FALLBACK_MODELS: 215 | model_info = self.extract_provider_name(model, provider_url) 216 | fallback_attempts.append(self.init_attempt(model_info)) 217 | 218 | return AIModelsBehaviour( 219 | attempt=main_attempt, 220 | fallback_attempts=fallback_attempts 221 | ) 222 | 223 | def call( 224 | self, 225 | prompt_id: str, 226 | context: t.Dict[str, str], 227 | model: str, 228 | provider_url: str = None, 229 | params: t.Dict[str, t.Any] = {}, 230 | version: str = None, 231 | count_of_retries: int = 5, 232 | test_data: dict = {}, 233 | stream_function: t.Callable = None, 234 | check_connection: t.Callable = None, 235 | stream_params: dict = {}, 236 | ) -> AIResponse: 237 | """ 238 | Call flow prompt with context and behaviour 239 | """ 240 | 241 | logger.debug(f"Calling {prompt_id}") 242 | start_time = current_timestamp_ms() 243 | prompt = self.get_prompt(prompt_id, version) 244 | 245 | behaviour = self.init_behavior(model, provider_url) 246 | 247 | logger.info(behaviour) 248 | 249 | prompt_attempts = PromptAttempts(behaviour) 250 | 251 | while prompt_attempts.initialize_attempt(): 252 | current_attempt = prompt_attempts.current_attempt 253 | user_prompt = prompt.create_prompt(current_attempt) 254 | calling_messages = user_prompt.resolve(context) 255 | 256 | for _ in range(0, count_of_retries): 257 | try: 258 | result = current_attempt.ai_model.call( 259 | calling_messages.get_messages(), 260 | calling_messages.max_sample_budget, 261 | stream_function=stream_function, 262 | check_connection=check_connection, 263 | stream_params=stream_params, 264 | client_secrets=self.clients[current_attempt.ai_model.provider], 265 | **params, 266 | ) 267 | 268 | sample_budget = self.calculate_budget_for_text( 269 | user_prompt, result.get_message_str() 270 | ) 271 | 272 | try: 273 | result.metrics.price_of_call = self.get_price( 274 | current_attempt, 275 | sample_budget, 276 | calling_messages.prompt_budget, 277 | ) 278 | except Exception as e: 279 | logger.exception(f"Error while getting price: {e}") 280 | result.metrics.price_of_call = 0 281 | result.metrics.sample_tokens_used = sample_budget 282 | result.metrics.prompt_tokens_used = calling_messages.prompt_budget 283 | result.metrics.ai_model_details = ( 284 | current_attempt.ai_model.get_metrics_data() 285 | ) 286 | result.metrics.latency = current_timestamp_ms() - start_time 287 | 288 | if settings.USE_API_SERVICE and self.api_token: 289 | timestamp = int(time.time() * 1000) 290 | result.id = f"{prompt_id}#{timestamp}" 291 | 292 | self.worker.add_task( 293 | self.api_token, 294 | prompt.service_dump(), 295 | context, 296 | result, 297 | {**test_data, "call_model": model} 298 | ) 299 | return result 300 | except RetryableCustomError as e: 301 | logger.error( 302 | f"Attempt failed: {prompt_attempts.current_attempt} with retryable error: {e}" 303 | ) 304 | except Exception as e: 305 | logger.error( 306 | f"Attempt failed: {prompt_attempts.current_attempt} with non-retryable error: {e}" 307 | ) 308 | 309 | logger.exception( 310 | "Prompt call failed, no attempts worked" 311 | ) 312 | raise Exception 313 | 314 | def get_prompt(self, prompt_id: str, version: str = None) -> Prompt: 315 | """ 316 | if the user has keys: lib -> service: get_actual_prompt(local_prompt) -> Service: 317 | generates hash of the prompt; 318 | check in Redis if that record is the latest; if yes -> return 200, else 319 | checks if that record exists with that hash; 320 | if record exists and it's not the last - then we load the latest published prompt; - > return 200 + the last record 321 | add a new record in storage, and adding that it's the latest published prompt; -> return 200 322 | update redis with latest record; 323 | """ 324 | logger.debug(f"Getting pipe prompt {prompt_id}") 325 | if ( 326 | settings.USE_API_SERVICE 327 | and self.api_token 328 | and settings.RECEIVE_PROMPT_FROM_SERVER 329 | ): 330 | prompt_data = None 331 | prompt = settings.PIPE_PROMPTS.get(prompt_id) 332 | if prompt: 333 | prompt_data = prompt.service_dump() 334 | try: 335 | response = self.service.get_actual_prompt( 336 | self.api_token, prompt_id, prompt_data, version 337 | ) 338 | if not response.is_taken_globally: 339 | prompt.version = response.version 340 | return prompt 341 | response.prompt["version"] = response.version 342 | return Prompt.service_load(response.prompt) 343 | except Exception as e: 344 | logger.exception(f"Error while getting prompt {prompt_id}: {e}") 345 | if prompt: 346 | return prompt 347 | else: 348 | logger.exception(f"Prompt {prompt_id} not found") 349 | raise LamoomPromptIsnotFoundError() 350 | 351 | else: 352 | return settings.PIPE_PROMPTS[prompt_id] 353 | 354 | 355 | def add_ideal_answer( 356 | self, 357 | response_id: str, 358 | ideal_answer: str 359 | ): 360 | response = LamoomService.update_response_ideal_answer( 361 | self.api_token, response_id, ideal_answer 362 | ) 363 | 364 | return response 365 | 366 | def calculate_budget_for_text(self, user_prompt: UserPrompt, text: str) -> int: 367 | if not text: 368 | return 0 369 | return len(user_prompt.encoding.encode(text)) 370 | 371 | def get_price( 372 | self, attempt: AttemptToCall, sample_budget: int, prompt_budget: int 373 | ) -> Decimal: 374 | data = { 375 | "provider": attempt.ai_model.provider.value, 376 | "model": attempt.ai_model.name, 377 | "output_tokens": sample_budget, 378 | "input_tokens": prompt_budget, 379 | } 380 | 381 | response = requests.post( 382 | f"{LAMOOM_API_URI}/lib/pricing", 383 | data=json.dumps(data), 384 | ) 385 | 386 | if response.status_code != 200: 387 | return 0 388 | 389 | return response.json()["price"] -------------------------------------------------------------------------------- /lamoom/prompt/prompt.py: -------------------------------------------------------------------------------- 1 | import logging 2 | from copy import deepcopy 3 | from dataclasses import dataclass 4 | 5 | from lamoom import settings 6 | from lamoom.ai_models.attempt_to_call import AttemptToCall 7 | from lamoom.prompt.base_prompt import BasePrompt 8 | from lamoom.prompt.chat import ChatsEntity 9 | from lamoom.prompt.user_prompt import UserPrompt 10 | from lamoom.settings import PIPE_PROMPTS 11 | 12 | logger = logging.getLogger(__name__) 13 | 14 | 15 | @dataclass 16 | class Prompt(BasePrompt): 17 | """ 18 | Prompt is a class that represents a pipe of chats that will be used to generate a prompt. 19 | You can add chats with different priorities to the pipe thinking just about the order of chats. 20 | When you initialize a Prompt, chats will be sorted by priority and then by order of adding. 21 | """ 22 | 23 | id: str = None 24 | max_tokens: int = None 25 | min_sample_tokens: int = settings.DEFAULT_SAMPLE_MIN_BUDGET 26 | reserved_tokens_budget_for_sampling: int = None 27 | version: str = None 28 | 29 | def __post_init__(self): 30 | if not self.id: 31 | raise ValueError("Prompt id is required") 32 | if self.max_tokens: 33 | self.max_tokens = int(self.max_tokens) 34 | self._save_in_local_storage() 35 | 36 | def _save_in_local_storage(self): 37 | PIPE_PROMPTS[self.id] = self 38 | 39 | def get_max_tokens(self, ai_attempt: AttemptToCall) -> int: 40 | if self.max_tokens: 41 | return min(self.max_tokens, ai_attempt.model_max_tokens()) 42 | return ai_attempt.model_max_tokens() 43 | 44 | def create_prompt(self, ai_attempt: AttemptToCall) -> UserPrompt: 45 | logger.debug( 46 | f"Creating prompt for {ai_attempt.ai_model} with {ai_attempt.attempt_number} attempt" 47 | f"Encoding {ai_attempt.tiktoken_encoding()}" 48 | ) 49 | return UserPrompt( 50 | pipe=deepcopy(self.pipe), 51 | priorities=deepcopy(self.priorities), 52 | tiktoken_encoding=ai_attempt.tiktoken_encoding(), 53 | model_max_tokens=self.get_max_tokens(ai_attempt), 54 | min_sample_tokens=self.min_sample_tokens, 55 | reserved_tokens_budget_for_sampling=self.reserved_tokens_budget_for_sampling, 56 | ) 57 | 58 | def dump(self) -> dict: 59 | return { 60 | "id": self.id, 61 | "max_tokens": self.max_tokens, 62 | "min_sample_tokens": self.min_sample_tokens, 63 | "reserved_tokens_budget_for_sampling": self.reserved_tokens_budget_for_sampling, 64 | "priorities": { 65 | priority: [chats_value.dump() for chats_value in chats_values] 66 | for priority, chats_values in self.priorities.items() 67 | }, 68 | "pipe": self.pipe, 69 | } 70 | 71 | def service_dump(self) -> dict: 72 | dump = { 73 | "prompt_id": self.id, 74 | "max_tokens": self.max_tokens, 75 | "min_sample_tokens": self.min_sample_tokens, 76 | "reserved_tokens_budget_for_sampling": self.reserved_tokens_budget_for_sampling, 77 | "chats": [chat_value.dump() for chat_value in self.chats], 78 | "version": self.version, 79 | } 80 | return dump 81 | 82 | @classmethod 83 | def service_load(cls, data) -> "Prompt": 84 | prompt = cls( 85 | id=data["prompt_id"], 86 | max_tokens=data["max_tokens"], 87 | min_sample_tokens=data.get("min_sample_tokens") or cls.min_sample_tokens, 88 | reserved_tokens_budget_for_sampling=data.get( 89 | "reserved_tokens_budget_for_sampling" 90 | ), 91 | version=data.get("version"), 92 | ) 93 | for chat_value in data["chats"]: 94 | prompt.add(**chat_value) 95 | return prompt 96 | 97 | @classmethod 98 | def load(cls, data): 99 | priorities = {} 100 | for priority, chat_values in data["priorities"].items(): 101 | priorities[int(priority)] = [ 102 | ChatsEntity.load(chat_value) for chat_value in chat_values 103 | ] 104 | return cls( 105 | id=data["id"], 106 | max_tokens=data["max_tokens"], 107 | min_sample_tokens=data.get("min_sample_tokens"), 108 | reserved_tokens_budget_for_sampling=data.get( 109 | "reserved_tokens_budget_for_sampling" 110 | ), 111 | priorities=priorities, 112 | pipe=data["pipe"], 113 | ) 114 | 115 | def copy(self, prompt_id: str): 116 | prompt = deepcopy(self) 117 | prompt.id = prompt_id 118 | prompt._save_in_local_storage() 119 | return prompt 120 | -------------------------------------------------------------------------------- /lamoom/prompt/user_prompt.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import typing as t 3 | from collections import defaultdict 4 | from dataclasses import dataclass, field 5 | 6 | import tiktoken 7 | 8 | from lamoom import settings 9 | from lamoom.exceptions import NotEnoughBudgetError 10 | from lamoom.prompt.base_prompt import BasePrompt 11 | from lamoom.prompt.chat import ChatMessage, ChatsEntity 12 | 13 | logger = logging.getLogger(__name__) 14 | 15 | 16 | @dataclass 17 | class State: 18 | """ 19 | State of the prompt. left_budget is the budget left for the rest of the prompt. 20 | fully_fitted_pipitas is the set of labels of chats that were fully fitted in the prompt. 21 | Pipita references to a small part of pipe, formed with a Spanish ending 'ita' which means a smaller version. 22 | """ 23 | 24 | left_budget: int = 0 25 | fully_fitted_pipitas: t.Set[str] = field(default_factory=set) 26 | references: t.Dict[str, t.List[str]] = field( 27 | default_factory=lambda: defaultdict(list) 28 | ) 29 | 30 | 31 | @dataclass 32 | class CallingMessages: 33 | messages: t.List[ChatMessage] 34 | prompt_budget: int = 0 35 | left_budget: int = 0 36 | references: t.Dict[str, t.List[str]] = None 37 | max_sample_budget: int = 0 38 | 39 | @property 40 | def calling_messages(self) -> t.List[t.Dict[str, str]]: 41 | return [m.to_dict() for m in self.messages if not m.is_empty()] 42 | 43 | def get_messages(self) -> t.List[t.Dict[str, str]]: 44 | result = [] 45 | for m in self.messages: 46 | if m.is_empty(): 47 | continue 48 | result.append(m.to_dict()) 49 | return result 50 | 51 | def __str__(self) -> str: 52 | return "\n".join([str(m.to_dict()) for m in self.messages]) 53 | 54 | 55 | @dataclass(kw_only=True) 56 | class UserPrompt(BasePrompt): 57 | model_max_tokens: int 58 | tiktoken_encoding: str 59 | min_sample_tokens: int 60 | reserved_tokens_budget_for_sampling: int = None 61 | safe_gap_tokens: int = settings.SAFE_GAP_TOKENS 62 | 63 | def __post_init__(self): 64 | self.encoding = tiktoken.get_encoding(self.tiktoken_encoding) 65 | 66 | def resolve(self, context: t.Dict) -> CallingMessages: 67 | pipe = {} 68 | prompt_budget = 0 69 | ordered_pipe = dict((value, i) for i, value in enumerate(self.pipe)) 70 | state = State() 71 | state.left_budget = self.left_budget 72 | for priority in sorted(self.priorities.keys()): 73 | for chat_value in self.priorities[priority]: 74 | r = [ 75 | p in state.fully_fitted_pipitas 76 | for p in (chat_value.add_if_fitted_labels or []) 77 | ] 78 | if not all(r): 79 | continue 80 | 81 | if chat_value.presentation: 82 | state.left_budget -= len( 83 | self.encoding.encode(chat_value.presentation) 84 | ) 85 | if chat_value.last_words: 86 | state.left_budget -= len( 87 | self.encoding.encode(chat_value.last_words) 88 | ) 89 | 90 | values = chat_value.get_values(context) 91 | logger.debug(f"Got values for {chat_value}: {values}") 92 | if not values: 93 | continue 94 | if chat_value.in_one_message: 95 | messages_budget, messages = self.add_values_in_one_message( 96 | values, chat_value, state 97 | ) 98 | elif chat_value.while_fits: 99 | messages_budget, messages = self.add_values_while_fits( 100 | values, 101 | chat_value, 102 | state, 103 | ) 104 | else: 105 | messages_budget, messages = self.add_values(values, state) 106 | if chat_value.label: 107 | state.fully_fitted_pipitas.add(chat_value.label) 108 | 109 | if not messages: 110 | logger.debug(f"messages is empty for {chat_value}") 111 | continue 112 | if not self.is_enough_budget(state, messages_budget): 113 | logger.debug(f"not enough budget for {chat_value}") 114 | if chat_value.required: 115 | raise NotEnoughBudgetError("Not enough budget") 116 | continue 117 | logger.debug(f"adding {len(messages)} messages for {chat_value}") 118 | state.left_budget -= messages_budget 119 | prompt_budget += messages_budget 120 | if chat_value.presentation: 121 | messages[0].content = chat_value.presentation + messages[0].content 122 | if chat_value.last_words: 123 | messages[-1].content += chat_value.last_words 124 | pipe[chat_value._uuid] = messages 125 | continue 126 | 127 | final_pipe_with_order = [ 128 | pipe.get(chat_id, []) 129 | for chat_id, _ in sorted(ordered_pipe.items(), key=lambda x: x[1]) 130 | ] 131 | # skip empty values 132 | flat_list: t.List[ChatMessage] = [ 133 | item for sublist in final_pipe_with_order for item in sublist if item 134 | ] 135 | max_sample_budget = left_budget = state.left_budget + self.min_sample_tokens 136 | if self.reserved_tokens_budget_for_sampling: 137 | max_sample_budget = min( 138 | self.reserved_tokens_budget_for_sampling, left_budget 139 | ) 140 | return CallingMessages( 141 | references=state.references, 142 | messages=flat_list, 143 | prompt_budget=prompt_budget, 144 | left_budget=left_budget, 145 | max_sample_budget=max_sample_budget, 146 | ) 147 | 148 | def add_values_while_fits( 149 | self, 150 | values: list[ChatMessage], 151 | chat_value: ChatsEntity, 152 | state: State, 153 | ): 154 | add_in_reverse_order = chat_value.add_in_reverse_order 155 | if add_in_reverse_order: 156 | values = values[::-1] 157 | values_to_add = [] 158 | messages_budget = 0 159 | is_fully_fitted = True 160 | if not values: 161 | logger.debug( 162 | f"[{self.task_name}]: values to add is empty {chat_value.content}" 163 | ) 164 | for i, value in enumerate(values): 165 | if not self.is_value_not_empty(value): 166 | continue 167 | one_budget = self.calculate_budget_for_value(value) 168 | 169 | if not self.is_enough_budget(state, one_budget + messages_budget): 170 | is_fully_fitted = False 171 | logger.debug( 172 | f"not enough budget:{chat_value.content[:30]} with index {i}," 173 | " for while_fits, breaking the loop" 174 | ) 175 | left_budget = state.left_budget - messages_budget 176 | if ( 177 | chat_value.continue_if_doesnt_fit 178 | and left_budget > settings.EXPECTED_MIN_BUDGET_FOR_VALUABLE_INPUT 179 | ): 180 | continue 181 | break 182 | messages_budget += one_budget 183 | values_to_add.append(value) 184 | if value.ref_name and value.ref_value: 185 | state.references[value.ref_name].append(value.ref_value) 186 | if is_fully_fitted and chat_value.label: 187 | state.fully_fitted_pipitas.add(chat_value.label) 188 | if add_in_reverse_order: 189 | values_to_add = values_to_add[::-1] 190 | return messages_budget, values_to_add 191 | 192 | def is_enough_budget(self, state: State, required_budget: int) -> bool: 193 | return state.left_budget >= required_budget 194 | 195 | def add_values_in_one_message( 196 | self, 197 | values: list[ChatMessage], 198 | chat_value: ChatsEntity, 199 | state: State, 200 | ) -> CallingMessages: 201 | one_message_budget = 0 202 | one_message = None 203 | is_fully_fitted = True 204 | if not values: 205 | logger.debug( 206 | f"[{self.task_name}]: values to add is empty {chat_value.content}" 207 | ) 208 | 209 | for i, value in enumerate(values): 210 | if not self.is_value_not_empty(value): 211 | continue 212 | one_budget = self.calculate_budget_for_value(value) 213 | if not self.is_enough_budget(state, one_budget + one_message_budget): 214 | is_fully_fitted = False 215 | logger.debug( 216 | f"not enough budget:\n{chat_value.content[:30]} with index {i}," 217 | f" for while_fits, breaking the loop." 218 | f" Budget required: {one_budget}, " 219 | f"left: {state.left_budget - one_message_budget}" 220 | ) 221 | 222 | left_budget = state.left_budget - one_message_budget 223 | if ( 224 | chat_value.continue_if_doesnt_fit 225 | and left_budget > settings.EXPECTED_MIN_BUDGET_FOR_VALUABLE_INPUT 226 | ): 227 | continue 228 | break 229 | 230 | one_message_budget += one_budget 231 | if one_message: 232 | one_message.content += "\n" + value.content 233 | else: 234 | one_message = value 235 | if value.ref_name and value.ref_value: 236 | state.references[value.ref_name].append(value.ref_value) 237 | if is_fully_fitted and chat_value.label: 238 | state.fully_fitted_pipitas.add(chat_value.label) 239 | return one_message_budget, [] if not one_message else [one_message] 240 | 241 | @property 242 | def left_budget(self) -> int: 243 | return self.model_max_tokens - self.min_sample_tokens - self.safe_gap_tokens 244 | 245 | def calculate_budget_for_value(self, value: ChatMessage) -> int: 246 | content = len(self.encoding.encode(value.content)) 247 | role = len(self.encoding.encode(value.role)) 248 | tool_calls = len(self.encoding.encode(value.tool_calls.get("name", ""))) 249 | arguments = len(self.encoding.encode(value.tool_calls.get("arguments", ""))) 250 | return content + role + tool_calls + arguments + settings.SAFE_GAP_PER_MSG 251 | 252 | def is_value_not_empty(self, value: ChatMessage) -> bool: 253 | if not value: 254 | return False 255 | if value.content is None: 256 | return False 257 | return True 258 | 259 | def add_values( 260 | self, 261 | values: t.List[ChatMessage], 262 | state: State, 263 | ) -> t.Tuple[int, t.List[ChatMessage]]: 264 | budget = 0 265 | result = [] 266 | 267 | for value in values: 268 | if not self.is_value_not_empty(value): 269 | logger.debug(f"[{self.task_name}]: is_value_not_empty failed {value}") 270 | continue 271 | budget += self.calculate_budget_for_value(value) 272 | result.append(value) 273 | if value.ref_name and value.ref_value: 274 | state.references[value.ref_name].append(value.ref_value) 275 | return budget, result 276 | 277 | def __str__(self) -> str: 278 | result = "" 279 | for chat_value in self.pipe: 280 | result += f"{chat_value}\n" 281 | return result 282 | 283 | def to_dict(self) -> dict: 284 | return [chat_value.to_dict() for chat_value in self.pipe] 285 | -------------------------------------------------------------------------------- /lamoom/response_parsers/response_parser.py: -------------------------------------------------------------------------------- 1 | from dataclasses import dataclass 2 | import json 3 | import logging 4 | 5 | import yaml 6 | 7 | from lamoom.exceptions import NotParsedResponseException 8 | from lamoom.responses import AIResponse 9 | 10 | logger = logging.getLogger(__name__) 11 | 12 | 13 | @dataclass 14 | class Tag: 15 | start_tag: str 16 | end_tag: str 17 | include_tag: bool 18 | is_right_find_end_ind: bool = False 19 | 20 | 21 | @dataclass 22 | class TaggedContent: 23 | content: str 24 | start_ind: int 25 | end_ind: int 26 | parsed_content: any = None 27 | 28 | 29 | def get_yaml_from_response(response: AIResponse): 30 | content, start_ind, end_ind = _get_format_from_response( 31 | response, [Tag("```yaml", "```", 0, 0), Tag("```", "```", 0, 0)] 32 | ) 33 | parsed_content = None 34 | if content: 35 | try: 36 | parsed_content = yaml.safe_load(content) 37 | except Exception as e: 38 | logger.exception(f"Couldn't parse yaml:\n{content}") 39 | return TaggedContent( 40 | content=content, 41 | parsed_content=parsed_content, 42 | start_ind=start_ind, 43 | end_ind=end_ind, 44 | ) 45 | 46 | 47 | def get_json_from_response(response: AIResponse, start_from: int = 0) -> TaggedContent: 48 | content, start_ind, end_ind = _get_format_from_response( 49 | response, 50 | [Tag("```json", "\n```", 0), Tag("```json", "```", 0), Tag("{", "}", 1)], 51 | start_from=start_from, 52 | ) 53 | if content: 54 | try: 55 | json_response = eval(content) 56 | return TaggedContent( 57 | content=content, 58 | parsed_content=json_response, 59 | start_ind=start_ind, 60 | end_ind=end_ind, 61 | ) 62 | except Exception as e: 63 | try: 64 | json_response = json.loads(content) 65 | return TaggedContent( 66 | content=content, 67 | parsed_content=json_response, 68 | start_ind=start_ind, 69 | end_ind=end_ind, 70 | ) 71 | except Exception as e: 72 | logger.exception(f"Couldn't parse json:\n{content}") 73 | raise NotParsedResponseException() 74 | 75 | 76 | def _get_format_from_response( 77 | response: AIResponse, tags: list[Tag], start_from: int = 0 78 | ): 79 | start_ind, end_ind = 0, -1 80 | content = response.response[start_from:] 81 | for t in tags: 82 | start_ind = content.find(t.start_tag) 83 | if t.is_right_find_end_ind: 84 | end_ind = content.rfind(t.end_tag, start_ind + len(t.start_tag)) 85 | else: 86 | end_ind = content.find(t.end_tag, start_ind + len(t.start_tag)) 87 | if start_ind != -1: 88 | try: 89 | if t.include_tag: 90 | end_ind += len(t.end_tag) 91 | else: 92 | start_ind += len(t.start_tag) 93 | response_tagged = content[start_ind:end_ind].strip() 94 | return response_tagged, start_from + start_ind, start_from + end_ind 95 | except Exception as e: 96 | logger.exception(f"Couldn't parse json:\n{content}") 97 | return None, 0, -1 98 | -------------------------------------------------------------------------------- /lamoom/responses.py: -------------------------------------------------------------------------------- 1 | from decimal import Decimal 2 | import json 3 | import logging 4 | from dataclasses import dataclass, field 5 | import typing as t 6 | 7 | logger = logging.getLogger(__name__) 8 | 9 | 10 | @dataclass 11 | class Prompt: 12 | messages: dict = None 13 | functions: dict = None 14 | max_tokens: int = 0 15 | temperature: Decimal = Decimal(0.0) 16 | top_p: Decimal = Decimal(0.0) 17 | 18 | 19 | @dataclass 20 | class Metrics: 21 | price_of_call: Decimal = None 22 | sample_tokens_used: int = None 23 | prompt_tokens_used: int = None 24 | ai_model_details: dict = None 25 | latency: int = None 26 | 27 | 28 | @dataclass(kw_only=True) 29 | class AIResponse: 30 | _response: str = "" 31 | original_result: object = None 32 | content: str = "" 33 | finish_reason: str = "" 34 | prompt: Prompt = field(default_factory=Prompt) 35 | metrics: Metrics = field(default_factory=Metrics) 36 | id: str = "" 37 | 38 | @property 39 | def response(self) -> str: 40 | return self._response 41 | 42 | def get_message_str(self) -> str: 43 | return json.loads(self.response) 44 | -------------------------------------------------------------------------------- /lamoom/services/SaveWorker.py: -------------------------------------------------------------------------------- 1 | import threading 2 | import queue 3 | import typing as t 4 | from time import sleep 5 | from lamoom.responses import AIResponse 6 | 7 | from lamoom.services.lamoom import LamoomService 8 | 9 | 10 | class SaveWorker: 11 | def __init__(self): 12 | self.queue = queue.Queue() 13 | self.thread = threading.Thread(target=self.worker) 14 | self.thread.daemon = True # Daemon thread exits when main program exits 15 | self.thread.start() 16 | 17 | def save_user_interaction_async( 18 | self, 19 | api_token: str, 20 | prompt_data: t.Dict[str, t.Any], 21 | context: t.Dict[str, t.Any], 22 | response: AIResponse, 23 | ): 24 | LamoomService.save_user_interaction( 25 | api_token, prompt_data, context, response 26 | ) 27 | 28 | def worker(self): 29 | while True: 30 | task = self.queue.get() 31 | if task is None: 32 | sleep(1) 33 | continue 34 | api_token, prompt_data, context, response, test_data = task 35 | LamoomService.save_user_interaction( 36 | api_token, prompt_data, context, response 37 | ) 38 | LamoomService.create_test_with_ideal_answer( 39 | api_token, prompt_data, context, test_data 40 | ) 41 | self.queue.task_done() 42 | 43 | def add_task( 44 | self, 45 | api_token: str, 46 | prompt_data: t.Dict[str, t.Any], 47 | context: t.Dict[str, t.Any], 48 | response: AIResponse, 49 | test_data: t.Optional[dict] = None, 50 | ): 51 | self.queue.put((api_token, prompt_data, context, response, test_data)) 52 | -------------------------------------------------------------------------------- /lamoom/services/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LamoomAI/lamoom-python/c0bb78c817bd05054b96f4f4720c2115a0963f96/lamoom/services/__init__.py -------------------------------------------------------------------------------- /lamoom/services/lamoom/__init__.py: -------------------------------------------------------------------------------- 1 | import json 2 | import logging 3 | import typing as t 4 | from dataclasses import asdict, dataclass 5 | import requests 6 | 7 | from lamoom import settings 8 | from lamoom.exceptions import NotFoundPromptError 9 | from lamoom.responses import AIResponse 10 | from lamoom.utils import DecimalEncoder, current_timestamp_ms 11 | 12 | logger = logging.getLogger(__name__) 13 | 14 | 15 | @dataclass 16 | class LamoomServiceResponse: 17 | prompt_id: str = None 18 | prompt: dict = None 19 | is_taken_globally: bool = False 20 | version: str = None 21 | 22 | 23 | class LamoomService: 24 | url: str = settings.LAMOOM_API_URI 25 | cached_prompts = {} 26 | 27 | def get_actual_prompt( 28 | self, 29 | api_token: str, 30 | prompt_id: str, 31 | prompt_data: dict = None, 32 | version: str = None, 33 | ) -> LamoomServiceResponse: 34 | """ 35 | Load prompt from lamoom 36 | if the user has keys: lib -> service: get_actual_prompt(local_prompt) -> Service: 37 | generates hash of the prompt; 38 | check in Redis if that record is the latest; if yes -> return 200, else 39 | checks if that record exists with that hash; 40 | if record exists and it's not the last - then we load the latest published prompt; - > return 200 + the last record 41 | add a new record in storage, and adding that it's the latest published prompt; -> return 200 42 | update redis with latest record; 43 | """ 44 | logger.debug( 45 | f"Received request to get actual prompt prompt_id: {prompt_id}, prompt_data: {prompt_data}, version: {version}" 46 | ) 47 | timestamp = current_timestamp_ms() 48 | logger.debug(f"Getting actual prompt for {prompt_id}") 49 | cached_prompt = None 50 | cached_prompt_taken_globally = False 51 | cached_data = self.get_cached_prompt(prompt_id) 52 | if cached_data: 53 | cached_prompt = cached_data.get("prompt") 54 | cached_prompt_taken_globally = cached_data.get("is_taken_globally") 55 | if cached_prompt: 56 | logger.debug( 57 | f"Prompt {prompt_id} is cached, returned in {current_timestamp_ms() - timestamp} ms" 58 | ) 59 | return LamoomServiceResponse( 60 | prompt_id=prompt_id, 61 | prompt=cached_prompt, 62 | is_taken_globally=cached_prompt_taken_globally, 63 | ) 64 | 65 | url = f"{self.url}/lib/prompts" 66 | headers = { 67 | "Authorization": f"Token {api_token}", 68 | } 69 | data = { 70 | "prompt": prompt_data, 71 | "id": prompt_id, 72 | "version": version, 73 | "is_taken_globally": cached_prompt_taken_globally, 74 | } 75 | json_data = json.dumps(data, cls=DecimalEncoder) 76 | response = requests.post(url, headers=headers, data=json_data) 77 | if response.status_code == 200: 78 | response_data = response.json() 79 | logger.debug( 80 | f"Prompt {prompt_id} found in {current_timestamp_ms() - timestamp} ms: {response_data}" 81 | ) 82 | prompt_data = response_data.get("prompt", prompt_data) 83 | is_taken_globally = response_data.get("is_taken_globally") 84 | version = response_data.get("version") 85 | 86 | # update cache 87 | self.cached_prompts[prompt_id] = { 88 | "prompt": prompt_data, 89 | "timestamp": current_timestamp_ms(), 90 | "is_taken_globally": is_taken_globally, 91 | "version": version, 92 | } 93 | # returns 200 and the latest published prompt, if the local prompt is the latest, doesn't return the prompt 94 | return LamoomServiceResponse( 95 | prompt_id=prompt_id, 96 | prompt=prompt_data, 97 | is_taken_globally=response_data.get("is_taken_globally", False), 98 | version=version, 99 | ) 100 | else: 101 | logger.debug( 102 | f"Prompt {prompt_id} not found, in {current_timestamp_ms() - timestamp} ms" 103 | ) 104 | raise NotFoundPromptError(response.json()) 105 | 106 | def get_cached_prompt(self, prompt_id: str) -> dict: 107 | cached_data = self.cached_prompts.get(prompt_id) 108 | if not cached_data: 109 | return None 110 | cached_delay = current_timestamp_ms() - cached_data.get("timestamp") 111 | if cached_delay < settings.CACHE_PROMPT_FOR_EACH_SECONDS * 1000: 112 | return cached_data 113 | return None 114 | 115 | @classmethod 116 | def clear_cache(cls): 117 | cls.cached_prompts = {} 118 | 119 | @classmethod 120 | def save_user_interaction( 121 | cls, 122 | api_token: str, 123 | prompt_data: t.Dict[str, t.Any], 124 | context: t.Dict[str, t.Any], 125 | response: AIResponse, 126 | ): 127 | url = f"{cls.url}/lib/logs" 128 | headers = {"Authorization": f"Token {api_token}"} 129 | data = { 130 | "context": context, 131 | "prompt": prompt_data, 132 | "response": {"content": response.content}, 133 | "metrics": asdict(response.metrics), 134 | "request": asdict(response.prompt), 135 | "timestamp": response.id.split("#")[1], 136 | } 137 | 138 | logger.debug(f"Request to {url} with data: {data}") 139 | json_data = json.dumps(data, cls=DecimalEncoder) 140 | 141 | response = requests.post(url, headers=headers, data=json_data) 142 | if response.status_code == 200: 143 | return response.json() 144 | else: 145 | logger.error(response) 146 | 147 | @classmethod 148 | def update_response_ideal_answer( 149 | cls, api_token: str, log_id: str, ideal_answer: str 150 | ): 151 | url = f"{cls.url}/lib/logs" 152 | headers = {"Authorization": f"Token {api_token}"} 153 | data = {"log_id": log_id, "ideal_answer": ideal_answer} 154 | 155 | logger.debug(f"Request to {url} with data: {data}") 156 | json_data = json.dumps(data, cls=DecimalEncoder) 157 | 158 | response = requests.put(url, headers=headers, data=json_data) 159 | 160 | if response.status_code == 200: 161 | return response.json() 162 | else: 163 | logger.error(response) 164 | return response 165 | 166 | @classmethod 167 | def create_test_with_ideal_answer( 168 | cls, 169 | api_token: str, 170 | prompt_data: t.Dict[str, t.Any], 171 | context: t.Dict[str, t.Any], 172 | test_data: dict, 173 | ): 174 | ideal_answer = test_data.get("ideal_answer", None) 175 | if not ideal_answer: 176 | return 177 | url = f"{cls.url}/lib/tests" 178 | headers = {"Authorization": f"Token {api_token}"} 179 | model_name = test_data.get("model_name") or test_data.get("call_model") or None 180 | data = { 181 | "context": context, 182 | "prompt": prompt_data, 183 | "ideal_answer": ideal_answer, 184 | "model_name": model_name, 185 | } 186 | logger.debug(f"Request to {url} with data: {data}") 187 | json_data = json.dumps(data) 188 | requests.post(url, headers=headers, data=json_data) 189 | logger.info(f"Created Ci/CD for prompt {prompt_data['prompt_id']}") 190 | -------------------------------------------------------------------------------- /lamoom/settings.py: -------------------------------------------------------------------------------- 1 | from dataclasses import dataclass, field 2 | import json 3 | import os 4 | 5 | from lamoom.utils import parse_bool 6 | 7 | 8 | BASE_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) 9 | TEMP_SCRIPTS_DIR = os.environ.get( 10 | "LAMOOM_TEMP_SCRIPTS_DIR", os.path.join(BASE_DIR, "temp_scripts") 11 | ) 12 | SAVE_PROMPTS_LOCALLY = os.environ.get("LAMOOM_SAVE_PROMPTS_LOCALLY", False) 13 | ENVIRONMENT = os.environ.get("LAMOOM_ENVIRONMENT", "prod") 14 | 15 | DEFAULT_MAX_BUDGET = os.environ.get("LAMOOM_DEFAULT_MAX_BUDGET", 16000) 16 | DEFAULT_SAMPLE_MIN_BUDGET = os.environ.get("LAMOOM_DEFAULT_ANSWER_BUDGET", 3000) 17 | DEFAULT_PROMPT_BUDGET = os.environ.get( 18 | "LAMOOM_DEFAULT_PROMPT_BUDGET", DEFAULT_MAX_BUDGET - DEFAULT_SAMPLE_MIN_BUDGET 19 | ) 20 | 21 | EXPECTED_MIN_BUDGET_FOR_VALUABLE_INPUT = os.environ.get( 22 | "LAMOOM_EXPECTED_MIN_BUDGET_FOR_VALUABLE_INPUT", 100 23 | ) 24 | 25 | SAFE_GAP_TOKENS: int = os.environ.get("LAMOOM_SAFE_GAP_TOKENS", 100) 26 | SAFE_GAP_PER_MSG: int = os.environ.get("LAMOOM_SAFE_GAP_PER_MSG", 4) 27 | DEFAULT_ENCODING = "cl100k_base" 28 | 29 | USE_API_SERVICE = parse_bool(os.environ.get("LAMOOM_USE_API_SERVICE", True)) 30 | LAMOOM_API_URI = os.environ.get("LAMOOM_API_URI") or os.environ.get("FLOW_PROMPT_API_URI") or "https://api.lamoom.com" 31 | CACHE_PROMPT_FOR_EACH_SECONDS = int( 32 | os.environ.get("LAMOOM_CACHE_PROMPT_FOR_EACH_SECONDS", 5 * 60) 33 | ) # 5 minutes by default 34 | RECEIVE_PROMPT_FROM_SERVER = parse_bool( 35 | os.environ.get("LAMOOM_RECEIVE_PROMPT_FROM_SERVER", True) 36 | ) 37 | PIPE_PROMPTS = {} 38 | FALLBACK_MODELS = [] 39 | 40 | 41 | @dataclass 42 | class Secrets: 43 | API_TOKEN: str = field(default_factory=lambda: os.getenv("LAMOOM_API_TOKEN", os.getenv("FLOW_PROMPT_API_TOKEN"))) 44 | OPENAI_API_KEY: str = field(default_factory=lambda: os.getenv("OPENAI_API_KEY")) 45 | CLAUDE_API_KEY: str = field(default_factory=lambda: os.getenv("CLAUDE_API_KEY")) 46 | GEMINI_API_KEY: str = field(default_factory=lambda: os.getenv("GEMINI_API_KEY")) 47 | NEBIUS_API_KEY: str = field(default_factory=lambda: os.getenv("NEBIUS_API_KEY")) 48 | CUSTOM_API_KEY: str = field(default_factory=lambda: os.getenv("CUSTOM_API_KEY")) 49 | OPENAI_ORG: str = field(default_factory=lambda: os.getenv("OPENAI_ORG")) 50 | azure_keys: dict = field( 51 | default_factory=lambda: json.loads( 52 | os.getenv("azure_keys", os.getenv("AZURE_OPENAI_KEYS", os.getenv("AZURE_KEYS", "{}"))) 53 | ) 54 | ) 55 | -------------------------------------------------------------------------------- /lamoom/utils.py: -------------------------------------------------------------------------------- 1 | from decimal import Decimal 2 | import json 3 | import logging 4 | import typing as t 5 | from time import time 6 | 7 | logger = logging.getLogger(__name__) 8 | 9 | 10 | def parse_bool(value: any) -> bool: 11 | if type(value) == bool: 12 | return value 13 | return str(value).lower() in ("true", "1", "yes") 14 | 15 | 16 | def current_timestamp_ms(): 17 | return int(time() * 1000) 18 | 19 | 20 | def resolve(prompt: str, context: t.Dict[str, str]) -> str: 21 | if not prompt or "{" not in prompt: 22 | return prompt 23 | # TODO: monitor how many values were not resolved and what values 24 | for key in context: 25 | prompt = prompt.replace(f"{{{key}}}", str(context[key])) 26 | return prompt 27 | 28 | 29 | class DecimalEncoder(json.JSONEncoder): 30 | def default(self, o): 31 | if isinstance(o, Decimal): 32 | return str(o) 33 | return super(DecimalEncoder, self).default(o) 34 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [tool.poetry] 2 | name = "lamoom" 3 | version = "0.1.37" 4 | description = "" 5 | authors = ["Lamoom Engineering Team "] 6 | readme = "README.md" 7 | 8 | [tool.poetry.dependencies] 9 | python = "^3.9" 10 | tiktoken = ">=0.5.2" 11 | pyyaml = "^6.0.1" 12 | openai = "^1.65.1" 13 | anthropic = "^0.31.2" 14 | httpx = "^0.27.2" 15 | 16 | [tool.poetry.dev-dependencies] 17 | poetry = "^1.7.1" 18 | isort = "^5.13.2" 19 | flake8 = "^7.0.0" 20 | pytest = "^7.1.2" 21 | pytest-cov = "^3.0.0" 22 | pre-commit = "^3.6.0" 23 | autopep8 = "^2.0.4" 24 | python-dotenv = "^1.0.1" 25 | twine = "^5.0.0" 26 | 27 | 28 | [tool.poetry.group.dev.dependencies] 29 | black = "^24.4.2" 30 | ipykernel = "^6.29.5" 31 | 32 | [build-system] 33 | requires = ["poetry-core"] 34 | build-backend = "poetry.core.masonry.api" 35 | -------------------------------------------------------------------------------- /tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LamoomAI/lamoom-python/c0bb78c817bd05054b96f4f4720c2115a0963f96/tests/__init__.py -------------------------------------------------------------------------------- /tests/ai_models/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LamoomAI/lamoom-python/c0bb78c817bd05054b96f4f4720c2115a0963f96/tests/ai_models/__init__.py -------------------------------------------------------------------------------- /tests/ai_models/test_utils.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | import requests 3 | import openai 4 | from unittest.mock import Mock 5 | from lamoom.ai_models.openai.exceptions import ( 6 | OpenAIAuthenticationError, 7 | OpenAIBadRequestError, 8 | OpenAIChunkedEncodingError, 9 | OpenAIInternalError, 10 | OpenAIInvalidRequestError, 11 | OpenAiPermissionDeniedError, 12 | OpenAiRateLimitError, 13 | OpenAIResponseWasFilteredError, 14 | OpenAITimeoutError, 15 | OpenAIUnknownError, 16 | ) 17 | from lamoom.ai_models.openai.utils import raise_openai_exception 18 | 19 | @pytest.fixture 20 | def mock_response(): 21 | return Mock() 22 | 23 | def test_raise_openai_exception_with_chunked_encoding_error(): 24 | with pytest.raises(OpenAIChunkedEncodingError): 25 | raise_openai_exception(requests.exceptions.ChunkedEncodingError()) 26 | 27 | def test_raise_openai_exception_with_timeout_error(mock_response: Mock): 28 | with pytest.raises(OpenAITimeoutError): 29 | raise_openai_exception(openai.APITimeoutError(request=mock_response)) 30 | 31 | def test_raise_openai_exception_with_bad_request_error_filtered_response(mock_response: Mock): 32 | with pytest.raises(OpenAIResponseWasFilteredError): 33 | raise_openai_exception(openai.BadRequestError(message="response was filtered", response=mock_response, body=None)) 34 | 35 | def test_raise_openai_exception_with_bad_request_error_rate_limit(mock_response: Mock): 36 | with pytest.raises(OpenAiRateLimitError): 37 | raise_openai_exception(openai.BadRequestError(message="Too many inputs", response=mock_response, body=None)) 38 | 39 | def test_raise_openai_exception_with_bad_request_error_invalid_request(mock_response: Mock): 40 | with pytest.raises(OpenAIInvalidRequestError): 41 | raise_openai_exception(openai.BadRequestError(message="Some other bad request error", response=mock_response, body=None)) 42 | 43 | def test_raise_openai_exception_with_rate_limit_error(mock_response: Mock): 44 | with pytest.raises(OpenAiRateLimitError): 45 | raise_openai_exception(openai.RateLimitError(response=mock_response, message="Rate limit error", body=None)) 46 | 47 | def test_raise_openai_exception_with_authentication_error(mock_response: Mock): 48 | with pytest.raises(OpenAIAuthenticationError): 49 | raise_openai_exception(openai.AuthenticationError(message="Authentication error", response=mock_response, body=None)) 50 | 51 | def test_raise_openai_exception_with_internal_server_error(mock_response: Mock): 52 | with pytest.raises(OpenAIInternalError): 53 | raise_openai_exception(openai.InternalServerError(message="Internal server error", response=mock_response, body=None)) 54 | 55 | def test_raise_openai_exception_with_permission_denied_error(mock_response: Mock): 56 | with pytest.raises(OpenAiPermissionDeniedError): 57 | raise_openai_exception(openai.PermissionDeniedError(message="Permission denied error", response=mock_response, body=None)) 58 | 59 | def test_raise_openai_exception_with_api_status_error(mock_response: Mock): 60 | with pytest.raises(OpenAIBadRequestError): 61 | raise_openai_exception(openai.APIStatusError(message="API status error", response=mock_response, body=None)) 62 | 63 | def test_raise_openai_exception_with_unknown_error(): 64 | with pytest.raises(OpenAIUnknownError): 65 | class UnknownError(Exception): 66 | pass 67 | raise_openai_exception(UnknownError()) 68 | -------------------------------------------------------------------------------- /tests/conftest.py: -------------------------------------------------------------------------------- 1 | 2 | import pytest 3 | import dotenv 4 | dotenv.load_dotenv(dotenv.find_dotenv()) 5 | 6 | from lamoom.ai_models import behaviour 7 | from lamoom.ai_models.attempt_to_call import AttemptToCall 8 | from lamoom.ai_models.openai.azure_models import AzureAIModel 9 | from lamoom.ai_models.openai.openai_models import C_128K, C_32K, OpenAIModel 10 | from openai.types.chat.chat_completion import ChatCompletion 11 | from lamoom.prompt.lamoom import Lamoom 12 | from lamoom.prompt.prompt import Prompt 13 | 14 | import logging 15 | 16 | 17 | @pytest.fixture(autouse=True) 18 | def set_log_level(): 19 | logging.getLogger().setLevel(logging.DEBUG) 20 | 21 | @pytest.fixture 22 | def lamoom(): 23 | return Lamoom( 24 | openai_key="123", 25 | azure_keys={"us-east-1": {"url": "https://us-east-1.api.azure.openai.org", "key": "123"}} 26 | ) 27 | 28 | @pytest.fixture 29 | def openai_gpt_4_behaviour(): 30 | return behaviour.AIModelsBehaviour( 31 | attempts=[ 32 | AttemptToCall( 33 | ai_model=OpenAIModel( 34 | model="gpt-4-1106-preview", 35 | max_tokens=C_128K, 36 | support_functions=True, 37 | ), 38 | weight=100, 39 | ), 40 | ] 41 | ) 42 | 43 | 44 | @pytest.fixture 45 | def azure_gpt_4_behaviour(): 46 | return behaviour.AIModelsBehaviour( 47 | attempts=[ 48 | AttemptToCall( 49 | ai_model=AzureAIModel( 50 | realm='useast', 51 | deployment_id="gpt-4o", 52 | max_tokens=C_128K, 53 | support_functions=True, 54 | ), 55 | weight=100, 56 | ), 57 | ] 58 | ) 59 | 60 | 61 | @pytest.fixture 62 | def gpt_4_behaviour(): 63 | return behaviour.AIModelsBehaviour( 64 | attempts=[ 65 | AttemptToCall( 66 | ai_model=AzureAIModel( 67 | realm='useast', 68 | deployment_id='gpt-4o', 69 | max_tokens=C_128K, 70 | ), 71 | weight=1, 72 | ), 73 | AttemptToCall( 74 | ai_model=OpenAIModel( 75 | model="gpt-4-1106-preview", 76 | max_tokens=C_128K, 77 | support_functions=True, 78 | ), 79 | weight=100, 80 | ), 81 | ], 82 | fallback_attempt=AttemptToCall( 83 | ai_model=AzureAIModel( 84 | realm="useast", 85 | deployment_id="gpt-4o", 86 | max_tokens=C_32K, 87 | support_functions=True, 88 | ), 89 | weight=1, 90 | ), 91 | ) 92 | 93 | @pytest.fixture 94 | def hello_world_prompt(): 95 | prompt = Prompt(id='hello-world') 96 | prompt.add("I'm Lamoom, and I just broke up with my girlfriend, Python. She said I had too many 'undefined behaviors'. 🐍💔 ") 97 | prompt.add(""" 98 | I'm sorry to hear about your breakup with Python. It sounds like a challenging situation, 99 | especially with 'undefined behaviors' being a point of contention. Remember, in the world of programming and AI, 100 | every challenge is an opportunity to learn and grow. Maybe this is a chance for you to debug some issues 101 | and optimize your algorithms for future compatibility! If you have any specific programming or AI-related questions, 102 | feel free to ask.""", role='assistant') 103 | prompt.add(""" 104 | Maybe it's for the best. I was always complaining about her lack of Java in the mornings! :coffee: 105 | """) 106 | return prompt 107 | 108 | 109 | @pytest.fixture 110 | def chat_completion_openai(): 111 | return ChatCompletion( 112 | **{ 113 | "id": "id", 114 | "choices": [ 115 | { 116 | "finish_reason": "stop", 117 | "index": 0, 118 | "message": { 119 | "content": "Hey you!", 120 | "role": "assistant", 121 | "function_call": None, 122 | }, 123 | "logprobs": None, 124 | } 125 | ], 126 | "created": 12345, 127 | "model": "gpt-4", 128 | "object": "chat.completion", 129 | "system_fingerprint": "dasdsas", 130 | "usage": { 131 | "completion_tokens": 10, 132 | "prompt_tokens": 20, 133 | "total_tokens": 30, 134 | }, 135 | } 136 | ) -------------------------------------------------------------------------------- /tests/prompts/test_chat.py: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | from lamoom.prompt.chat import ChatsEntity 5 | 6 | 7 | def test_chats_entity_resolve_not_multiple(): 8 | ce = ChatsEntity(content="{greeting} World") 9 | context = {"greeting": "Hello"} 10 | resolved = ce.resolve(context) 11 | assert resolved[0].content == "Hello World" 12 | 13 | 14 | def test_chats_entity_resolve_multiple(): 15 | ce = ChatsEntity(content="{messages}", is_multiple=True) 16 | context = { 17 | "messages": [ 18 | {"role": "user", "content": "Hi"}, 19 | {"role": "bot", "content": "Hello"}, 20 | ] 21 | } 22 | resolved = ce.resolve(context) 23 | assert isinstance(resolved, list) 24 | assert len(resolved) == 2 25 | assert resolved[0].role == "user" 26 | assert resolved[0].content == "Hi" 27 | assert resolved[1].role == "bot" 28 | assert resolved[1].content == "Hello" 29 | 30 | 31 | def test_chats_entity_resolve_not_exists(): 32 | ce = ChatsEntity(content="{greeting} World") 33 | context = {"content": "Hello World"} 34 | resolved = ce.resolve(context) 35 | assert resolved[0].content == "{greeting} World" 36 | assert resolved[0].role == "user" 37 | 38 | 39 | def test_chats_entity_get_values(): 40 | ce = ChatsEntity(content="{greeting} World") 41 | context = {"greeting": "Hello"} 42 | values = ce.get_values(context) 43 | assert ce.add_in_reverse_order is False 44 | assert values[0].content == "Hello World" 45 | assert values[0].role == "user" 46 | -------------------------------------------------------------------------------- /tests/prompts/test_ci_cd.py: -------------------------------------------------------------------------------- 1 | 2 | import logging 3 | 4 | from pytest import fixture 5 | from lamoom import Lamoom, Prompt 6 | logger = logging.getLogger(__name__) 7 | 8 | 9 | @fixture 10 | def client(): 11 | import dotenv 12 | dotenv.load_dotenv(dotenv.find_dotenv()) 13 | lamoom = Lamoom() 14 | return lamoom 15 | 16 | 17 | def stream_function(text, **kwargs): 18 | print(text) 19 | 20 | def stream_check_connection(validate, **kwargs): 21 | return validate 22 | 23 | def test_creating_lamoom_test(client): 24 | 25 | context = { 26 | 'ideal_answer': "There are eight planets", 27 | 'text': "Hi! Please tell me how many planets are there in the solar system?" 28 | } 29 | 30 | # initial version of the prompt 31 | prompt_id = f'unit-test-creating_fp_test' 32 | client.service.clear_cache() 33 | prompt = Prompt(id=prompt_id) 34 | prompt.add("{text}", role='user') 35 | 36 | client.call(prompt.id, context, "azure/useast/gpt-4o", test_data={'ideal_answer': "There are eight", 'model_name': "gemini/gemini-1.5-flash"}, stream_function=stream_function, check_connection=stream_check_connection, params={"stream": True}, stream_params={"validate": True, "end": "", "flush": True}) -------------------------------------------------------------------------------- /tests/prompts/test_create_test.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import os 3 | import time 4 | 5 | from pytest import fixture 6 | from lamoom import Lamoom, Prompt 7 | logger = logging.getLogger(__name__) 8 | 9 | 10 | @fixture 11 | def client(): 12 | api_token = os.getenv("LAMOOM_API_TOKEN") 13 | lamoom = Lamoom(api_token=api_token) 14 | return lamoom 15 | 16 | 17 | def test_creating_lamoom_test(client): 18 | context = { 19 | 'ideal_answer': "There are eight planets", 20 | 'text': "Hi! Please tell me how many planets are there in the solar system?" 21 | } 22 | 23 | # initial version of the prompt 24 | prompt_id = f'unit-test-creating_fp_test' 25 | client.service.clear_cache() 26 | prompt = Prompt(id=prompt_id) 27 | prompt.add("{text}", role='user') 28 | 29 | client.create_test(prompt_id, context, model_name="gemini/gemini-1.5-flash") -------------------------------------------------------------------------------- /tests/prompts/test_model.py: -------------------------------------------------------------------------------- 1 | 2 | import logging 3 | import time 4 | from pytest import fixture 5 | from lamoom import Lamoom, Prompt 6 | logger = logging.getLogger(__name__) 7 | 8 | 9 | @fixture 10 | def client(): 11 | import dotenv 12 | dotenv.load_dotenv(dotenv.find_dotenv()) 13 | lamoom = Lamoom() 14 | return lamoom 15 | 16 | 17 | def test_model(client): 18 | 19 | context = { 20 | 'text': "Hi! Please tell me how many planets are there in the solar system?" 21 | } 22 | 23 | # initial version of the prompt 24 | prompt_id = f'test-{time.time()}' 25 | client.service.clear_cache() 26 | prompt = Prompt(id=prompt_id) 27 | prompt.add("{text}", role='user') 28 | 29 | result = client.call(prompt.id, context, "custom/deepseek-ai/DeepSeek-R1", provider_url="https://api.studio.nebius.ai/v1/") 30 | assert result.content 31 | 32 | result = client.call(prompt.id, context, "openai/gpt-4o") 33 | assert result.content 34 | 35 | result = client.call(prompt.id, context, "azure/useast/gpt-4o") 36 | assert result.content 37 | 38 | result = client.call(prompt.id, context, "gemini/gemini-1.5-flash") 39 | assert result.content 40 | 41 | result = client.call(prompt.id, context, "claude/claude-3-5-haiku-latest") 42 | assert result.content -------------------------------------------------------------------------------- /tests/prompts/test_pricing.py: -------------------------------------------------------------------------------- 1 | import json 2 | import logging 3 | import os 4 | import time 5 | 6 | from pytest import fixture 7 | import dotenv 8 | dotenv.load_dotenv(dotenv.find_dotenv()) 9 | from lamoom import Lamoom, Prompt 10 | logger = logging.getLogger(__name__) 11 | 12 | 13 | @fixture 14 | def lamoom_client(): 15 | 16 | lamoom = Lamoom() 17 | return lamoom 18 | 19 | def stream_function(text, **kwargs): 20 | print(text) 21 | 22 | def stream_check_connection(validate, **kwargs): 23 | return validate 24 | 25 | 26 | def test_openai_pricing(lamoom_client: Lamoom): 27 | 28 | context = { 29 | 'ideal_answer': "There are eight planets", 30 | 'text': "Hi! Please tell me how many planets are there in the solar system?" 31 | } 32 | 33 | # initial version of the prompt 34 | prompt_id = f'unit-test_openai_pricing' 35 | lamoom_client.service.clear_cache() 36 | prompt = Prompt(id=prompt_id) 37 | prompt.add("{text}", role='user') 38 | 39 | result_4o = lamoom_client.call(prompt.id, context, "openai/gpt-4o", test_data={'ideal_answer': "There are eight", 'behavior_name': "gemini"}, stream_function=stream_function, check_connection=stream_check_connection, params={"stream": True}, stream_params={"validate": True, "end": "", "flush": True}) 40 | result_4o_mini = lamoom_client.call(prompt.id, context, "openai/gpt-4o-mini", test_data={'ideal_answer': "There are eight", 'behavior_name': "gemini"}, stream_function=stream_function, check_connection=stream_check_connection, params={"stream": True}, stream_params={"validate": True, "end": "", "flush": True}) 41 | 42 | assert result_4o.metrics.price_of_call > 0 43 | assert result_4o_mini.metrics.price_of_call > 0 44 | 45 | 46 | def test_claude_pricing(lamoom_client: Lamoom): 47 | 48 | context = { 49 | 'ideal_answer': "There are eight planets", 50 | 'text': "Hi! Please tell me how many planets are there in the solar system?" 51 | } 52 | 53 | # initial version of the prompt 54 | prompt_id = f'test-{time.time()}' 55 | lamoom_client.service.clear_cache() 56 | prompt = Prompt(id=prompt_id) 57 | prompt.add("{text}", role='user') 58 | 59 | result_haiku = lamoom_client.call(prompt.id, context, "claude/claude-3-5-haiku-latest", test_data={'ideal_answer': "There are eight", 'behavior_name': "gemini"}, stream_function=stream_function, check_connection=stream_check_connection, params={"stream": True}, stream_params={"validate": True, "end": "", "flush": True}) 60 | result_sonnet = lamoom_client.call(prompt.id, context, "claude/claude-3-5-sonnet-latest", test_data={'ideal_answer': "There are eight", 'behavior_name': "gemini"}, stream_function=stream_function, check_connection=stream_check_connection, params={"stream": True}, stream_params={"validate": True, "end": "", "flush": True}) 61 | 62 | assert result_sonnet.metrics.price_of_call > result_haiku.metrics.price_of_call -------------------------------------------------------------------------------- /tests/prompts/test_prompt.py: -------------------------------------------------------------------------------- 1 | 2 | 3 | from lamoom.ai_models.attempt_to_call import AttemptToCall 4 | from lamoom.ai_models.openai.azure_models import AzureAIModel 5 | from lamoom.ai_models.openai.openai_models import C_128K 6 | from lamoom.exceptions import NotEnoughBudgetError 7 | from lamoom.prompt.prompt import Prompt 8 | 9 | import pytest 10 | 11 | 12 | 13 | @pytest.fixture 14 | def azure_ai_attempt(): 15 | return AttemptToCall( 16 | ai_model=AzureAIModel( 17 | realm='useast', 18 | deployment_id="gpt-4o", 19 | max_tokens=C_128K, 20 | support_functions=True, 21 | ), 22 | weight=100, 23 | ) 24 | 25 | 26 | def test_load_dump_prompt(): 27 | prompt = Prompt(id='hello-world', max_tokens=100) 28 | prompt.add("I'm Lamoom, and I just broke up with my girlfriend, Python. She said I had too many 'undefined behaviors'. 🐍💔 ") 29 | prompt.add(""" 30 | I'm sorry to hear about your breakup with Python. It sounds like a challenging situation, 31 | especially with 'undefined behaviors' being a point of contention. Remember, in the world of programming and AI, 32 | every challenge is an opportunity to learn and grow. Maybe this is a chance for you to debug some issues 33 | and optimize your algorithms for future compatibility! If you have any specific programming or AI-related questions, 34 | feel free to ask.""", role='assistant', priority=2) 35 | prompt.add(""" 36 | Maybe it's for the best. I was always complaining about her lack of Java in the mornings! :coffee: 37 | """) 38 | loaded_prompt = Prompt.load(prompt.dump()) 39 | assert prompt.dump() == loaded_prompt.dump() 40 | 41 | 42 | def test_prompt_add(azure_ai_attempt: AttemptToCall): 43 | pipe = Prompt(id='test') 44 | pipe.add("Hello, how can I help you today?") 45 | uer_prompt = pipe.create_prompt(azure_ai_attempt) 46 | assert len(uer_prompt.pipe) == 1 47 | assert uer_prompt.priorities[0][0].content == "Hello, how can I help you today?" 48 | 49 | 50 | def test_prompt_initialize(azure_ai_attempt: AttemptToCall): 51 | pipe = Prompt(id='test') 52 | user_prompt = pipe.create_prompt(azure_ai_attempt) 53 | 54 | user_prompt.add("Hello, how can I help you today?") 55 | 56 | context = {} 57 | initialized_pipe = user_prompt.resolve(context) 58 | messages = initialized_pipe.messages 59 | assert len(messages) == 1 60 | assert messages[0].content == "Hello, how can I help you today?" 61 | 62 | 63 | def test_prompt_initialize_not_enough_budget(azure_ai_attempt: AttemptToCall): 64 | pipe = Prompt(id='test') 65 | user_prompt = pipe.create_prompt(azure_ai_attempt) 66 | user_prompt.add("Hello, how can I help you today?", required=True) 67 | 68 | context = {} 69 | user_prompt.min_sample_tokens = 1299 # Not enough tokens for the message 70 | user_prompt.model_max_tokens = 1300 # Not enough tokens for the message 71 | with pytest.raises(NotEnoughBudgetError): 72 | user_prompt.resolve(context) 73 | 74 | 75 | def test_prompt_show_pipe(): 76 | pipe = Prompt(id='test') 77 | pipe.add("Hello, how can I help you today?") 78 | pipe_dump = pipe.dump() 79 | assert pipe_dump['id'] == 'test' 80 | assert pipe_dump['max_tokens'] is None 81 | assert pipe_dump['min_sample_tokens'] == 3000 82 | assert pipe_dump['reserved_tokens_budget_for_sampling'] is None 83 | assert len(pipe_dump['pipe']) == 1 84 | assert pipe_dump['priorities'] == {0: [{'content': 'Hello, how can I help you today?', 'role': 'user', 'priority': 0, 'required': False, 'is_multiple': False, 'while_fits': False, 'add_in_reverse_order': False, 'in_one_message': False, 'continue_if_doesnt_fit': False}]} 85 | 86 | def test_prompt_left_budget(azure_ai_attempt: AttemptToCall): 87 | pipe = Prompt(id='test') 88 | pipe.add("Hello, how can I help you today?") 89 | user_prompt = pipe.create_prompt(azure_ai_attempt) 90 | user_prompt.model_max_tokens = 2030 91 | user_prompt.reserved_tokens_budget_for_sampling = 2030 92 | initialized_pipe = user_prompt.resolve({}) 93 | assert ( 94 | initialized_pipe.left_budget 95 | == user_prompt.model_max_tokens 96 | - initialized_pipe.prompt_budget 97 | - user_prompt.safe_gap_tokens 98 | ) 99 | 100 | 101 | def test_prompt_prompt_price(azure_ai_attempt: AttemptToCall): 102 | pipe = Prompt(id='test') 103 | pipe.add("Hello, how can I help you today?") 104 | user_prompt = pipe.create_prompt(azure_ai_attempt) 105 | user_prompt.model_max_tokens = 4030 106 | user_prompt.add("Hello " + 'world ' * 1000) 107 | pipe = user_prompt.resolve({}) 108 | assert len(pipe.get_messages()) == 1 109 | 110 | 111 | def test_prompt_calculate_budget_for_values(azure_ai_attempt: AttemptToCall): 112 | pipe = Prompt(id='test') 113 | pipe.max_tokens = 1400 114 | pipe.min_sample_tokens = 1000 115 | 116 | pipe.add("Priority. Hello {name}", priority=1) 117 | pipe.add("2d priority. Hello {name}", priority=2) 118 | pipe.add( 119 | "no priority. didn't fit. Hello {name}" + ("hello" * 1000), priority=2 120 | ) 121 | user_prompt = pipe.create_prompt(azure_ai_attempt) 122 | prompt = user_prompt.resolve({"name": "World"}) 123 | messages = prompt.get_messages() 124 | assert len(messages) == 2 125 | assert messages[0]["content"] == "Priority. Hello World" 126 | assert messages[1]["content"] == "2d priority. Hello World" 127 | 128 | 129 | def test_prompt_copy(): 130 | pipe = Prompt(id='test') 131 | pipe.add("Hello, how can I help you today?") 132 | copy = pipe.copy('new_id') 133 | assert copy.id == 'new_id' 134 | copy_dump = copy.dump() 135 | assert copy_dump['id'] == 'new_id' 136 | original_dump = pipe.dump() 137 | original_dump.pop('id') 138 | copy_dump.pop('id') 139 | assert original_dump == copy_dump 140 | -------------------------------------------------------------------------------- /tests/prompts/test_stream.py: -------------------------------------------------------------------------------- 1 | 2 | import logging 3 | import time 4 | 5 | from pytest import fixture 6 | from lamoom import Lamoom, Prompt, OpenAIResponse 7 | logger = logging.getLogger(__name__) 8 | 9 | 10 | @fixture 11 | def client(): 12 | lamoom = Lamoom() 13 | return lamoom 14 | 15 | def stream_function(text, **kwargs): 16 | print(text) 17 | 18 | def stream_check_connection(validate, **kwargs): 19 | return validate 20 | 21 | def test_stream(client: Lamoom): 22 | 23 | context = { 24 | 'messages': ['test1', 'test2'], 25 | 'assistant_response_in_progress': None, 26 | 'files': ['file1', 'file2'], 27 | 'music': ['music1', 'music2'], 28 | 'videos': ['video1', 'video2'], 29 | 'text': "Good morning. Tell me a funny joke!" 30 | } 31 | 32 | # initial version of the prompt 33 | prompt_id = f'test-{time.time()}' 34 | client.service.clear_cache() 35 | prompt = Prompt(id=prompt_id) 36 | prompt.add("{text}") 37 | prompt.add("It's a system message, Hello {name}", role="assistant") 38 | 39 | result: OpenAIResponse = client.call(prompt.id, context, "azure/useast/gpt-4o", stream_function=stream_function, check_connection=stream_check_connection, params={"stream": True}, stream_params={"validate": True, "end": "", "flush": True}) 40 | client.call(prompt.id, context, "claude/claude-3-haiku-20240307", stream_function=stream_function, check_connection=stream_check_connection, params={"stream": True}, stream_params={"validate": True, "end": "", "flush": True}) 41 | client.call(prompt.id, context, "gemini/gemini-1.5-flash", stream_function=stream_function, check_connection=stream_check_connection, params={"stream": True}, stream_params={"validate": True, "end": "", "flush": True}) 42 | 43 | assert "message" in result.to_dict() -------------------------------------------------------------------------------- /tests/response_parsers/test_response_parsers.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | from lamoom.responses import AIResponse 3 | from lamoom.exceptions import NotParsedResponseException 4 | from lamoom.response_parsers.response_parser import get_yaml_from_response, get_json_from_response, _get_format_from_response, Tag 5 | 6 | 7 | def test_get_yaml_from_response_valid_yaml(): 8 | response = AIResponse(_response="```yaml\nkey: value\n```") 9 | tagged_content = get_yaml_from_response(response) 10 | 11 | assert tagged_content.content == 'key: value' 12 | assert tagged_content.parsed_content == {'key': 'value'} 13 | assert tagged_content.start_ind == 7 14 | assert tagged_content.end_ind == 19 15 | 16 | def test_get_yaml_from_response_invalid_yaml(): 17 | response = AIResponse(_response="```yaml\nkey: value\n```") 18 | tagged_content = get_yaml_from_response(response) 19 | 20 | assert tagged_content.content == 'key: value' 21 | assert tagged_content.parsed_content == {"key": "value"} 22 | 23 | def test_get_json_from_response_valid_json(): 24 | response = AIResponse(_response="```json\n{\"key\": \"value\"}\n```") 25 | tagged_content = get_json_from_response(response) 26 | 27 | assert tagged_content.content == '{"key": "value"}' 28 | assert tagged_content.parsed_content == {"key": "value"} 29 | assert tagged_content.start_ind == 7 30 | assert tagged_content.end_ind == 24 31 | 32 | def test_get_json_from_response_invalid_json(): 33 | response = AIResponse(_response="```json\n{key: value}\n```") 34 | 35 | with pytest.raises(NotParsedResponseException): 36 | get_json_from_response(response) 37 | 38 | def test__get_format_from_response(): 39 | response = AIResponse(_response="```json\n{\"key\": \"value\"}\n```") 40 | tags = [Tag("```json", "```", 0)] 41 | content, start_ind, end_ind = _get_format_from_response(response, tags) 42 | 43 | assert content == '{"key": "value"}' 44 | 45 | def test__get_format_from_response_no_tags(): 46 | response = AIResponse(_response="No tags here") 47 | tags = [Tag("```json", "```", 0)] 48 | content, start_ind, end_ind = _get_format_from_response(response, tags) 49 | assert content is None 50 | 51 | -------------------------------------------------------------------------------- /tests/services/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LamoomAI/lamoom-python/c0bb78c817bd05054b96f4f4720c2115a0963f96/tests/services/__init__.py -------------------------------------------------------------------------------- /tests/services/test_flow_prompt.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LamoomAI/lamoom-python/c0bb78c817bd05054b96f4f4720c2115a0963f96/tests/services/test_flow_prompt.py -------------------------------------------------------------------------------- /tests/test_integrational.py: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | import logging 5 | from time import sleep 6 | from datetime import datetime as dt 7 | import dotenv 8 | dotenv.load_dotenv() 9 | from pytest import fixture 10 | from lamoom import Lamoom, behaviour, Prompt, AttemptToCall, AzureAIModel, C_128K 11 | logger = logging.getLogger(__name__) 12 | 13 | 14 | @fixture 15 | def client(): 16 | import dotenv 17 | dotenv.load_dotenv(dotenv.find_dotenv()) 18 | lamoom = Lamoom() 19 | return lamoom 20 | 21 | 22 | def test_loading_prompt_from_service(client: Lamoom): 23 | context = { 24 | 'messages': ['test1', 'test2'], 25 | 'assistant_response_in_progress': None, 26 | 'files': ['file1', 'file2'], 27 | 'music': ['music1', 'music2'], 28 | 'videos': ['video1', 'video2'], 29 | } 30 | 31 | # initial version of the prompt 32 | prompt_id = f'unit-test-loading_prompt_from_service' 33 | client.service.clear_cache() 34 | prompt = Prompt(id=prompt_id, max_tokens=10000) 35 | first_str_dt = dt.now().strftime('%Y-%m-%d %H:%M:%S') 36 | prompt.add(f"It's a system message, Hello at {first_str_dt}", role="system") 37 | prompt.add('{messages}', is_multiple=True, in_one_message=True, label='messages') 38 | print(client.call(prompt.id, context, "azure/useast/gpt-4o")) 39 | 40 | # updated version of the prompt 41 | client.service.clear_cache() 42 | prompt = Prompt(id=prompt_id, max_tokens=10000) 43 | next_str_dt = dt.now().strftime('%Y-%m-%d %H:%M:%S') 44 | prompt.add(f"It's a system message, Hello at {next_str_dt}", role="system") 45 | prompt.add('{music}', is_multiple=True, in_one_message=True, label='music') 46 | print(client.call(prompt.id, context, "azure/useast/gpt-4o")) 47 | 48 | # call uses outdated version of prompt, should use updated version of the prompt 49 | sleep(2) 50 | client.service.clear_cache() 51 | prompt = Prompt(id=prompt_id, max_tokens=10000) 52 | prompt.add(f"It's a system message, Hello at {first_str_dt}", role="system") 53 | prompt.add('{messages}', is_multiple=True, in_one_message=True, label='messages') 54 | result = client.call(prompt.id, context, "azure/useast/gpt-4o") 55 | 56 | # should call the prompt with music 57 | assert result.prompt.messages[-1] == {'role': 'user', 'content': 'music1\nmusic2'} 58 | --------------------------------------------------------------------------------