├── .env.example ├── .github └── workflows │ ├── delpoy-slsc-prod-stable.yml │ ├── delpoy-slsc-prod.yml │ ├── delpoy-slsc.yml │ ├── docker-image.yml │ └── python-app.yml ├── .gitignore ├── CODE_OF_CONDUCT.md ├── Dockerfile ├── LICENSE ├── README.md ├── YC-Dockerfile ├── app ├── __init__.py ├── index.py ├── my_logger.py └── yandex │ ├── __init__.py │ ├── classification.py │ ├── completions.py │ ├── embeddings.py │ ├── models.py │ └── yc_log_handler.py ├── docker-compose.yml ├── etc └── promo.svg ├── examples ├── example.js ├── example.py └── langchain-example.py ├── main.py ├── requirements.txt ├── start_app.sh ├── tests ├── __init__.py ├── test_e2e.py ├── test_fastapi.py ├── test_langchain.py └── test_unit.py └── vercel.json /.env.example: -------------------------------------------------------------------------------- 1 | # пример файла конфигурации. Подробнее в документации https://ai-cookbook.ru/docs/adapter/deploy 2 | 3 | # dev, yc, volume, stdout. Подробнее в документации https://ai-cookbook.ru/docs/adapter/deploy 4 | LOG_TYPE=dev 5 | LOG_LEVEL=DEBUG 6 | 7 | # значение для заголовка x-data-logging-enabled по умолчанию, подробнее https://yandex.cloud/ru/docs/foundation-models/operations/disable-logging 8 | YC_FOMO_LOG_POLICY=true 9 | 10 | # настройка ретраев внутри адаптера 11 | YC_COMPLETION_RETRIES=True 12 | YC_EMBEDDINGS_RETRIES=True 13 | 14 | # настройка пакетной обработки для эндпоинта embeddings 15 | YC_EMBEDDINGS_RATE_LIMIT=1 16 | YC_EMBEDDINGS_TIME_WINDOW=1 17 | YC_EMBEDDINGS_MAX_RETRIES=3 18 | YC_EMBEDDINGS_BACKOFF_FACTOR=2 19 | 20 | # Настройка эндпоинтов 21 | YC_SERVICE_URL=https://llm.api.cloud.yandex.net 22 | 23 | # Настройка маппинга моделей 24 | YC_COMPLETIONS_MODEL_MAP=gpt-4o:yandexgpt/latest,gpt-4o-mini:yandexgpt-lite/latest,gpt-3.5:yandexgpt/latest,gpt-3.5-turbo:yandexgpt/latest,gpt-5:yandexgpt/latest,llama-3.1-8b-instruct:llama-lite/latest,llama-3.1-70b-instruct:llama/latest 25 | YC_EMBEDDINGS_MODEL_MAP=text-embedding-3-large:text-search-doc/latest,text-embedding-3-small:text-search-doc/latest,text-embedding-ada-002:text-search-doc/latest 26 | 27 | # Настройка детекции вызова классификатора 28 | CLASSIFIER_DETECTION=true 29 | TUNED_CLASSIFIER_CALL_SEQ=///Classify next text/// 30 | FS_CLASSIFIER_CALL_SEQ=///Classify next text with few shot model/// 31 | 32 | # Modes: full_answer, best_label 33 | CLASSIFIER_MODE=best_label 34 | CLASSIFIER_THRESHOLD=0 35 | -------------------------------------------------------------------------------- /.github/workflows/delpoy-slsc-prod-stable.yml: -------------------------------------------------------------------------------- 1 | name: Deploy Serverless Container to Yandex Cloud (prod-stable) 2 | 3 | on: 4 | push: 5 | branches: [ "stable" ] 6 | 7 | jobs: 8 | 9 | delpoy-stable: 10 | 11 | runs-on: ubuntu-latest 12 | 13 | steps: 14 | - uses: actions/checkout@v4 15 | 16 | - name: Login to Yandex Cloud Container Registry 17 | id: login-cr 18 | uses: yc-actions/yc-cr-login@v1 19 | with: 20 | yc-sa-json-credentials: ${{ secrets.YC_SA_JSON_CREDENTIALS }} 21 | 22 | - name: Build, tag, and push image to Yandex Cloud Container Registry 23 | env: 24 | CR_REGISTRY: crpel88gtvoc0esr11nd 25 | CR_REPOSITORY: my-repo-prod-stable 26 | IMAGE_TAG: ${{ github.sha }} 27 | 28 | run: | 29 | ls -a 30 | docker build -t cr.yandex/$CR_REGISTRY/$CR_REPOSITORY:$IMAGE_TAG -f YC-Dockerfile . 31 | docker push cr.yandex/$CR_REGISTRY/$CR_REPOSITORY:$IMAGE_TAG 32 | 33 | - name: Deploy Serverless Container 34 | id: deploy-sls-container 35 | uses: yc-actions/yc-sls-container-deploy@v2.7.0 36 | 37 | with: 38 | yc-sa-json-credentials: ${{ secrets.YC_SA_JSON_CREDENTIALS }} 39 | container-name: adapter-0-prod-stable 40 | folder-id: b1gkvpmciuf1at2nkvcb 41 | revision-service-account-id: ajekbtndj8s14af74itc 42 | revision-cores: 1 43 | revision-memory: 128Mb 44 | revision-core-fraction: 100 45 | revision-concurrency: 16 46 | revision-provisioned: 1 47 | revision-image-url: cr.yandex/crpel88gtvoc0esr11nd/my-repo-prod-stable:${{ github.sha }} 48 | revision-execution-timeout: 120 49 | public: true 50 | 51 | revision-log-options-log-group-id: e23p2mbdc5gis4an2ess 52 | revision-log-options-min-level: level_unspecified 53 | 54 | revision-env: | 55 | GITHUB_SHA=${{ github.sha }} 56 | GITHUB_REF=${{ github.ref }} 57 | 58 | LOG_TYPE=yc 59 | LOG_LEVEL=INFO 60 | 61 | -------------------------------------------------------------------------------- /.github/workflows/delpoy-slsc-prod.yml: -------------------------------------------------------------------------------- 1 | name: Deploy Serverless Container to Yandex Cloud (prod-latest) 2 | 3 | on: 4 | push: 5 | branches: [ "main" ] 6 | 7 | jobs: 8 | 9 | delpoy-dev: 10 | 11 | runs-on: ubuntu-latest 12 | 13 | steps: 14 | - uses: actions/checkout@v4 15 | 16 | - name: Login to Yandex Cloud Container Registry 17 | id: login-cr 18 | uses: yc-actions/yc-cr-login@v1 19 | with: 20 | yc-sa-json-credentials: ${{ secrets.YC_SA_JSON_CREDENTIALS }} 21 | 22 | - name: Build, tag, and push image to Yandex Cloud Container Registry 23 | env: 24 | CR_REGISTRY: crppsup4msn8hhtat156 25 | CR_REPOSITORY: my-repo-prod 26 | IMAGE_TAG: ${{ github.sha }} 27 | 28 | run: | 29 | ls -a 30 | docker build -t cr.yandex/$CR_REGISTRY/$CR_REPOSITORY:$IMAGE_TAG -f YC-Dockerfile . 31 | docker push cr.yandex/$CR_REGISTRY/$CR_REPOSITORY:$IMAGE_TAG 32 | 33 | - name: Deploy Serverless Container 34 | id: deploy-sls-container 35 | uses: yc-actions/yc-sls-container-deploy@v2.7.0 36 | 37 | with: 38 | yc-sa-json-credentials: ${{ secrets.YC_SA_JSON_CREDENTIALS }} 39 | container-name: adapter-0-prod 40 | folder-id: b1gkvpmciuf1at2nkvcb 41 | revision-service-account-id: ajekbtndj8s14af74itc 42 | revision-cores: 1 43 | revision-memory: 128Mb 44 | revision-core-fraction: 100 45 | revision-concurrency: 16 46 | revision-provisioned: 1 47 | revision-image-url: cr.yandex/crppsup4msn8hhtat156/my-repo-prod:${{ github.sha }} 48 | revision-execution-timeout: 120 49 | public: true 50 | 51 | revision-log-options-log-group-id: e23p2mbdc5gis4an2ess 52 | revision-log-options-min-level: level_unspecified 53 | 54 | revision-env: | 55 | GITHUB_SHA=${{ github.sha }} 56 | GITHUB_REF=${{ github.ref }} 57 | 58 | LOG_TYPE=yc 59 | LOG_LEVEL=INFO 60 | 61 | -------------------------------------------------------------------------------- /.github/workflows/delpoy-slsc.yml: -------------------------------------------------------------------------------- 1 | name: Deploy Serverless Container to Yandex Cloud 2 | 3 | on: 4 | push: 5 | branches: [ "dev" ] 6 | 7 | jobs: 8 | 9 | delpoy-dev: 10 | 11 | runs-on: ubuntu-latest 12 | 13 | steps: 14 | - uses: actions/checkout@v4 15 | 16 | - name: Login to Yandex Cloud Container Registry 17 | id: login-cr 18 | uses: yc-actions/yc-cr-login@v1 19 | with: 20 | yc-sa-json-credentials: ${{ secrets.YC_SA_JSON_CREDENTIALS }} 21 | 22 | - name: Build, tag, and push image to Yandex Cloud Container Registry 23 | env: 24 | CR_REGISTRY: crp0bmvdd91dv86c170d 25 | CR_REPOSITORY: my-cr-repo 26 | IMAGE_TAG: ${{ github.sha }} 27 | 28 | run: | 29 | ls -a 30 | docker build -t cr.yandex/$CR_REGISTRY/$CR_REPOSITORY:$IMAGE_TAG -f YC-Dockerfile . 31 | docker push cr.yandex/$CR_REGISTRY/$CR_REPOSITORY:$IMAGE_TAG 32 | 33 | - name: Deploy Serverless Container 34 | id: deploy-sls-container 35 | uses: yc-actions/yc-sls-container-deploy@v2.7.0 36 | 37 | with: 38 | yc-sa-json-credentials: ${{ secrets.YC_SA_JSON_CREDENTIALS }} 39 | container-name: adapter-0 40 | folder-id: b1gkvpmciuf1at2nkvcb 41 | revision-service-account-id: ajekbtndj8s14af74itc 42 | revision-cores: 1 43 | revision-memory: 128Mb 44 | revision-core-fraction: 100 45 | revision-concurrency: 16 46 | revision-provisioned: 1 47 | revision-image-url: cr.yandex/crp0bmvdd91dv86c170d/my-cr-repo:${{ github.sha }} 48 | revision-execution-timeout: 120 49 | public: true 50 | 51 | revision-log-options-log-group-id: e239urj29boo302337co 52 | revision-log-options-min-level: level_unspecified 53 | 54 | revision-env: | 55 | GITHUB_SHA=${{ github.sha }} 56 | GITHUB_REF=${{ github.ref }} 57 | 58 | LOG_TYPE=yc 59 | LOG_LEVEL=DEBUG 60 | 61 | -------------------------------------------------------------------------------- /.github/workflows/docker-image.yml: -------------------------------------------------------------------------------- 1 | name: Docker Image 2 | 3 | on: 4 | push: 5 | branches: [ "main", "stable", "dev" ] 6 | pull_request: 7 | branches: [ "main", "stable", "dev" ] 8 | schedule: 9 | - cron: '0 0 * * *' 10 | 11 | jobs: 12 | 13 | build: 14 | 15 | runs-on: ubuntu-latest 16 | 17 | steps: 18 | - uses: actions/checkout@v4 19 | 20 | - name: Set up Docker image tag 21 | id: vars 22 | run: echo "IMAGE_TAG=my-image-name:$(date +%s)" >> $GITHUB_ENV 23 | 24 | - name: Build the Docker image 25 | run: docker build . --file Dockerfile --tag ${{ env.IMAGE_TAG }} 26 | 27 | - name: Run the Docker container 28 | run: docker run -d --name my-container ${{ env.IMAGE_TAG }} 29 | 30 | build-yc: 31 | 32 | runs-on: ubuntu-latest 33 | 34 | steps: 35 | - uses: actions/checkout@v4 36 | 37 | - name: Set up Docker image tag 38 | id: vars 39 | run: echo "IMAGE_TAG=my-image-name:$(date +%s)" >> $GITHUB_ENV 40 | 41 | - name: Build the Docker image 42 | run: docker build . --file YC-Dockerfile --tag ${{ env.IMAGE_TAG }} 43 | 44 | - name: Run the Docker container 45 | run: docker run -d --name my-container ${{ env.IMAGE_TAG }} -------------------------------------------------------------------------------- /.github/workflows/python-app.yml: -------------------------------------------------------------------------------- 1 | name: Tests 2 | 3 | on: 4 | push: 5 | branches: 6 | - main 7 | - stable 8 | pull_request: 9 | branches: 10 | - main 11 | schedule: 12 | - cron: '0 0 */3 * *' 13 | 14 | jobs: 15 | test_auth: 16 | runs-on: ubuntu-latest 17 | 18 | steps: 19 | - name: Checkout code 20 | uses: actions/checkout@v2 21 | 22 | - name: Set up Python 23 | uses: actions/setup-python@v4 24 | with: 25 | python-version: '3.12' 26 | 27 | - name: Install dependencies 28 | run: | 29 | cp .env.example .env 30 | python -m pip install --upgrade pip 31 | pip install -r requirements.txt 32 | pip install pytest langchain langchain-openai 33 | 34 | - name: Start FastAPI application 35 | run: | 36 | chmod +x start_app.sh 37 | ./start_app.sh 38 | env: 39 | PYTHONUNBUFFERED: '1' 40 | 41 | - name: Wait for the server to start 42 | run: sleep 5 43 | 44 | - name: Run simple tests 45 | env: 46 | PYTHONPATH: . 47 | run: | 48 | pytest tests/test_fastapi.py 49 | pytest tests/test_unit.py 50 | 51 | - name: Create .testenv file 52 | run: | 53 | echo "FOLDER_ID=${{ secrets.TEST_FOLDER_ID }}" > .testenv 54 | echo "YANDEX_API_KEY=${{ secrets.TEST_YANDEX_API_KEY }}" >> .testenv 55 | 56 | - name: Run e2e and langchain tests 57 | env: 58 | PYTHONPATH: . 59 | run: | 60 | pytest tests/test_e2e.py 61 | pytest tests/test_langchain.py 62 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .venv 2 | mytest.py 3 | __pycache__ 4 | .vscode 5 | logs/debug.log 6 | 4me 7 | otest.py 8 | ytest.py 9 | sex.py 10 | YStest.py 11 | OStest.py 12 | oaiemb.py 13 | mytest copy.py 14 | mytest copy 2.py 15 | mytest copy 3.py 16 | mytest copy 4.py 17 | mytest copy 5.py 18 | app/1 19 | app/2 20 | app/_yandex.py 21 | YStest tools.py 22 | ytest tools.py 23 | .testenv 24 | logs/test.log 25 | .env -------------------------------------------------------------------------------- /CODE_OF_CONDUCT.md: -------------------------------------------------------------------------------- 1 | # Contributor Covenant Code of Conduct 2 | 3 | ## Our Pledge 4 | 5 | We as members, contributors, and leaders pledge to make participation in our 6 | community a harassment-free experience for everyone, regardless of age, body 7 | size, visible or invisible disability, ethnicity, sex characteristics, gender 8 | identity and expression, level of experience, education, socio-economic status, 9 | nationality, personal appearance, race, religion, or sexual identity 10 | and orientation. 11 | 12 | We pledge to act and interact in ways that contribute to an open, welcoming, 13 | diverse, inclusive, and healthy community. 14 | 15 | ## Our Standards 16 | 17 | Examples of behavior that contributes to a positive environment for our 18 | community include: 19 | 20 | * Demonstrating empathy and kindness toward other people 21 | * Being respectful of differing opinions, viewpoints, and experiences 22 | * Giving and gracefully accepting constructive feedback 23 | * Accepting responsibility and apologizing to those affected by our mistakes, 24 | and learning from the experience 25 | * Focusing on what is best not just for us as individuals, but for the 26 | overall community 27 | 28 | Examples of unacceptable behavior include: 29 | 30 | * The use of sexualized language or imagery, and sexual attention or 31 | advances of any kind 32 | * Trolling, insulting or derogatory comments, and personal or political attacks 33 | * Public or private harassment 34 | * Publishing others' private information, such as a physical or email 35 | address, without their explicit permission 36 | * Other conduct which could reasonably be considered inappropriate in a 37 | professional setting 38 | 39 | ## Enforcement Responsibilities 40 | 41 | Community leaders are responsible for clarifying and enforcing our standards of 42 | acceptable behavior and will take appropriate and fair corrective action in 43 | response to any behavior that they deem inappropriate, threatening, offensive, 44 | or harmful. 45 | 46 | Community leaders have the right and responsibility to remove, edit, or reject 47 | comments, commits, code, wiki edits, issues, and other contributions that are 48 | not aligned to this Code of Conduct, and will communicate reasons for moderation 49 | decisions when appropriate. 50 | 51 | ## Scope 52 | 53 | This Code of Conduct applies within all community spaces, and also applies when 54 | an individual is officially representing the community in public spaces. 55 | Examples of representing our community include using an official e-mail address, 56 | posting via an official social media account, or acting as an appointed 57 | representative at an online or offline event. 58 | 59 | ## Enforcement 60 | 61 | Instances of abusive, harassing, or otherwise unacceptable behavior may be 62 | reported to the community leaders responsible for enforcement at 63 | tg nongilgameshj. 64 | All complaints will be reviewed and investigated promptly and fairly. 65 | 66 | All community leaders are obligated to respect the privacy and security of the 67 | reporter of any incident. 68 | 69 | ## Enforcement Guidelines 70 | 71 | Community leaders will follow these Community Impact Guidelines in determining 72 | the consequences for any action they deem in violation of this Code of Conduct: 73 | 74 | ### 1. Correction 75 | 76 | **Community Impact**: Use of inappropriate language or other behavior deemed 77 | unprofessional or unwelcome in the community. 78 | 79 | **Consequence**: A private, written warning from community leaders, providing 80 | clarity around the nature of the violation and an explanation of why the 81 | behavior was inappropriate. A public apology may be requested. 82 | 83 | ### 2. Warning 84 | 85 | **Community Impact**: A violation through a single incident or series 86 | of actions. 87 | 88 | **Consequence**: A warning with consequences for continued behavior. No 89 | interaction with the people involved, including unsolicited interaction with 90 | those enforcing the Code of Conduct, for a specified period of time. This 91 | includes avoiding interactions in community spaces as well as external channels 92 | like social media. Violating these terms may lead to a temporary or 93 | permanent ban. 94 | 95 | ### 3. Temporary Ban 96 | 97 | **Community Impact**: A serious violation of community standards, including 98 | sustained inappropriate behavior. 99 | 100 | **Consequence**: A temporary ban from any sort of interaction or public 101 | communication with the community for a specified period of time. No public or 102 | private interaction with the people involved, including unsolicited interaction 103 | with those enforcing the Code of Conduct, is allowed during this period. 104 | Violating these terms may lead to a permanent ban. 105 | 106 | ### 4. Permanent Ban 107 | 108 | **Community Impact**: Demonstrating a pattern of violation of community 109 | standards, including sustained inappropriate behavior, harassment of an 110 | individual, or aggression toward or disparagement of classes of individuals. 111 | 112 | **Consequence**: A permanent ban from any sort of public interaction within 113 | the community. 114 | 115 | ## Attribution 116 | 117 | This Code of Conduct is adapted from the [Contributor Covenant][homepage], 118 | version 2.0, available at 119 | https://www.contributor-covenant.org/version/2/0/code_of_conduct.html. 120 | 121 | Community Impact Guidelines were inspired by [Mozilla's code of conduct 122 | enforcement ladder](https://github.com/mozilla/diversity). 123 | 124 | [homepage]: https://www.contributor-covenant.org 125 | 126 | For answers to common questions about this code of conduct, see the FAQ at 127 | https://www.contributor-covenant.org/faq. Translations are available at 128 | https://www.contributor-covenant.org/translations. 129 | -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | # Используем официальный образ Python 2 | FROM python:3.12-slim 3 | 4 | # Устанавливаем рабочую директорию 5 | WORKDIR /app 6 | 7 | # Копируем файлы в контейнер 8 | COPY . /app/ 9 | 10 | # Устанавливаем FastAPI и Uvicorn 11 | RUN pip install -r requirements.txt 12 | 13 | EXPOSE 9041 14 | 15 | RUN cp .env.example .env 16 | 17 | # healthcheck 18 | HEALTHCHECK --interval=30s --timeout=10s --start-period=5s --retries=3 \ 19 | CMD curl --fail http://localhost:9041/health || exit 1 20 | 21 | # Команда для запуска приложения 22 | CMD ["gunicorn", "main:app", "--workers", "1", "--worker-class", "uvicorn.workers.UvicornWorker", "--bind", "0.0.0.0:9041"] 23 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2024 All Mute ✓ 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # OpenAI to Yandex GPT API Adapter 2 | 3 | [![PRs Welcome](https://img.shields.io/badge/PRs-welcome-brightgreen.svg?style=flat-square)](http://makeapullrequest.com) 4 | ![Test Status](https://github.com/ai-cookbook/openai-yandexgpt-adapter/actions/workflows/docker-image.yml/badge.svg) 5 | ![Test Status](https://github.com/ai-cookbook/openai-yandexgpt-adapter/actions/workflows/python-app.yml/badge.svg) 6 | ![Vercel](https://vercelbadge.vercel.app/api/all-mute/openai-yandexgpt-adapter) 7 | 8 | **Use Yandex Cloud models from anywhere!** 9 | 10 | **[Полная документация: ai-cookbook.ru/docs/adapter](https://ai-cookbook.ru/docs/adapter/)** 11 | 12 | Данное приложение преобразует API-запросы формата OpenAI в запросы формата Yandex Cloud Foundational Models, что позволяет использовать Yandex Cloud Foundational Models через OpenAI SDK, lite-LLM, langchain, других dev библиотеках а также готовых пользовательских приложениях. 13 | 14 | Рекомендуемый openai base_url: `https://o2y.ai-cookbook.ru/v1` ![badge](https://o2y.ai-cookbook.ru/badge) 15 | 16 | ## Быстрый старт: 17 | 18 | ```python 19 | import openai 20 | 21 | client = openai.Client(api_key=f"{FOLDER_ID}@{API_KEY_OR_IAM_KEY}", base_url="https://o2y.ai-cookbook.ru/v1") 22 | ``` 23 | 24 | *Вы можете использовать SDK на любом языке, в том числе js, go, и т.д.* 25 | 26 | Примеры: [python OpenAI SDK](./examples/example.py), [js](./examples/example.js), [langchain](./examples/langchain-example.py) 27 | 28 | ## Решение проблем 29 | 30 | Если у вас возникли проблемы при работе с этим приложением, **пожалуйста, создайте issue** в этом репозитории, он активно поддерживается. Оперативно по проблемам писать tg `@nongilgameshj` 31 | 32 | 33 | ### Дисклеймер 34 | 35 | Данный проект не является официальным продуктом Yandex Cloud. Поддерживается командой ai-cookbook.ru. 36 | -------------------------------------------------------------------------------- /YC-Dockerfile: -------------------------------------------------------------------------------- 1 | # Используем официальный образ Python 2 | FROM python:3.12-slim 3 | 4 | # Устанавливаем рабочую директорию 5 | WORKDIR /app 6 | 7 | # Копируем файлы в контейнер 8 | COPY . /app/ 9 | 10 | # Устанавливаем FastAPI и Uvicorn 11 | RUN pip install -r requirements.txt 12 | 13 | EXPOSE 8080 14 | 15 | RUN cp .env.example .env 16 | 17 | ENV LOG_TYPE=yc 18 | ENV LOG_LEVEL=INFO 19 | 20 | # healthcheck 21 | HEALTHCHECK --interval=30s --timeout=10s --start-period=5s --retries=3 \ 22 | CMD curl --fail http://localhost:8080/health || exit 1 23 | 24 | # Команда для запуска приложения 25 | CMD ["gunicorn", "main:app", "--workers", "1", "--worker-class", "uvicorn.workers.UvicornWorker", "--bind", "0.0.0.0:8080"] 26 | 27 | -------------------------------------------------------------------------------- /app/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ai-cookbook/openai-yandexgpt-adapter/975fd7af26df7250d497b3f6a7865ead894fd12f/app/__init__.py -------------------------------------------------------------------------------- /app/index.py: -------------------------------------------------------------------------------- 1 | from fastapi.responses import JSONResponse, RedirectResponse, StreamingResponse 2 | from fastapi import APIRouter, HTTPException, Request 3 | import os, sys, time, json 4 | from pydantic import ValidationError 5 | from app.my_logger import logger 6 | from dotenv import load_dotenv 7 | import asyncio 8 | import random 9 | import string 10 | import uuid 11 | from functools import wraps 12 | from typeguard import check_type 13 | 14 | from openai import BadRequestError 15 | from openai._exceptions import InternalServerError, APIStatusError 16 | 17 | from app.yandex.models import ( 18 | CompletionRequest as YaCompletionRequest, 19 | CompletionResponse as YaCompletionResponse, 20 | TextEmbeddingRequest as YaTextEmbeddingRequest, 21 | TextEmbeddingResponse as YaTextEmbeddingResponse, 22 | FewShotTextClassificationRequest as YaFewShotTextClassificationRequest, 23 | FewShotTextClassificationResponse as YaFewShotTextClassificationResponse, 24 | TunedTextClassificationRequest as YaTunedTextClassificationRequest, 25 | TunedTextClassificationResponse as YaTunedTextClassificationResponse, 26 | GetModelsResponse as YaGetModelsResponse, 27 | YaCompletionRequestWithClassificatiors, 28 | 29 | ToolResult as YaToolResult, 30 | ToolCall as YaToolCall, 31 | ToolCallList as YaToolCallList, 32 | Message as YaChatCompletionMessage, 33 | CompletionOptions as YaCompletionOptions 34 | ) 35 | 36 | from openai.types.chat import ( 37 | ChatCompletion as OpenAIChatCompletion, 38 | ChatCompletionChunk as OpenAIChatCompletionChunk, 39 | ChatCompletionMessage as OpenAIChatCompletionMessage, 40 | ) 41 | from openai.types.embedding import Embedding as OpenAIEmbedding 42 | from openai.types.embedding_model import EmbeddingModel as OpenAIEmbeddingModel 43 | from openai.types.embedding_create_params import EmbeddingCreateParams as OpenAIEmbeddingCreateParams 44 | from openai.types.create_embedding_response import CreateEmbeddingResponse as OpenAICreateEmbeddingResponse 45 | from openai.types.chat.completion_create_params import CompletionCreateParams as OpenAICompletionCreateParams 46 | from openai.types.chat.completion_create_params import ( 47 | CompletionCreateParamsNonStreaming as OpenAICompletionCreateParamsNonStreaming, 48 | CompletionCreateParamsStreaming as OpenAICompletionCreateParamsStreaming 49 | ) 50 | from openai.types.chat.chat_completion_message_param import ChatCompletionMessageParam as OpenAIChatCompletionMessageParam 51 | from openai import BadRequestError 52 | from app.yandex.completions import ( 53 | _adapt_openai_to_yc_completions, 54 | generate_yandexgpt_response 55 | ) 56 | from app.yandex.embeddings import ( 57 | _adapt_openai_to_yc_embeddings, 58 | generate_yandexgpt_embeddings_response_batch 59 | ) 60 | 61 | load_dotenv() 62 | 63 | GITHUB_SHA = os.getenv("GITHUB_SHA", "unknown_version") 64 | GITHUB_REF = os.getenv("GITHUB_REF", "unknown_branch") 65 | 66 | logger.configure(extra={ 67 | "GITHUB_SHA": GITHUB_SHA, 68 | "GITHUB_REF": GITHUB_REF 69 | }) 70 | logger.info("Index module initiated.") 71 | 72 | index = APIRouter() 73 | 74 | def handle_error(e, request_id): 75 | 76 | if isinstance(e, TypeError): 77 | logger.error(f"Ошибка типа: {str(e)}") 78 | return JSONResponse(status_code=422, content={ 79 | "error": { 80 | "message": str(e), 81 | "type": "type_error", 82 | "param": None, 83 | "code": 422 84 | } 85 | }) 86 | 87 | elif isinstance(e, ValidationError): 88 | logger.error(f"Ошибка валидации: {str(e)}") 89 | return JSONResponse(status_code=422, content={ 90 | "error": { 91 | "message": str(e), 92 | "type": "validation_error", 93 | "param": None, 94 | "code": 422 95 | } 96 | }) 97 | 98 | elif isinstance(e, HTTPException): 99 | logger.error(f"HTTP ошибка: {str(e)}") 100 | return JSONResponse(status_code=e.status_code, content={ 101 | "error": { 102 | "message": str(e), 103 | "type": "http_error", 104 | "param": None, 105 | "code": e.status_code 106 | } 107 | }) 108 | 109 | elif isinstance(e, APIStatusError): 110 | logger.error(f"OpenAI API ошибка: {str(e)}") 111 | return JSONResponse(status_code=e.status_code, content={ 112 | "error": { 113 | "message": e.message, 114 | "type": type(e).__name__, 115 | "param": None, 116 | "code": e.status_code 117 | } 118 | }) 119 | 120 | elif isinstance(e, InternalServerError): 121 | logger.error(f"InternalServerError ошибка: {str(e)}") 122 | return JSONResponse(status_code=e.status_code, content={ 123 | "error": { 124 | "message": e.message, 125 | "type": type(e).__name__, 126 | "param": None, 127 | "code": e.status_code 128 | } 129 | }) 130 | 131 | else: 132 | logger.critical(f"Неожиданная ошибка: {str(e)}") 133 | return JSONResponse(status_code=500, content={ 134 | "error": { 135 | "message": f"An unexpected error occurred. {request_id=}", 136 | "type": "unexpected_error", 137 | "param": None, 138 | "code": 500 139 | } 140 | }) 141 | 142 | def handle_request(func): 143 | @wraps(func) 144 | async def wrapper(request: Request, *args, **kwargs): 145 | request_id = uuid.uuid4() 146 | 147 | # Проверка наличия заголовка Authorization 148 | if "Authorization" not in request.headers: 149 | logger.error("Отсутствует заголовок Authorization") 150 | return JSONResponse(status_code=401, content={"error": "Authorization header is required"}) 151 | 152 | with logger.contextualize(request_id=request_id): 153 | try: 154 | return await func(request, *args, **kwargs) 155 | except Exception as e: 156 | return handle_error(e, request_id) 157 | 158 | return wrapper 159 | 160 | @index.post("/v1/chat/completions") 161 | @handle_request 162 | async def completion(request: Request): 163 | logger.debug(f"Получен запрос на генерацию в формате OpenAI. {request.method=}\n{request.url=}\n{request.headers=}\n{request.client.host=}\n{request.client.port=}") 164 | 165 | folder_id, yandex_api_key = _decode_openai_api_key(request) 166 | 167 | logger.info("Генерация текста в Foundational Models", extra={"folder_id": folder_id}) 168 | 169 | oai_completion_request: OpenAICompletionCreateParams = await request.json() 170 | 171 | # TODO add validation 172 | #check_type(oai_completion_request, OpenAICompletionCreateParams) 173 | 174 | logger.debug(f"Data: {oai_completion_request}") 175 | 176 | yc_completion_request: YaCompletionRequestWithClassificatiors = await _adapt_openai_to_yc_completions(oai_completion_request, folder_id) 177 | 178 | return await generate_yandexgpt_response(yc_completion_request, folder_id, yandex_api_key) 179 | 180 | @index.post("/v1/embeddings") 181 | @handle_request 182 | async def embeddings(request: Request): 183 | logger.debug(f"Получен запрос на эмбеддинг текста в формате OpenAI. {request.method=}\n{request.url=}\n{request.headers=}\n{request.client.host=}\n{request.client.port=}") 184 | 185 | folder_id, yandex_api_key = _decode_openai_api_key(request) 186 | 187 | logger.info("Генерация эмбеддинга в Foundational Models", extra={"folder_id": folder_id}) 188 | 189 | body = await request.json() 190 | logger.debug(f"Body: {body}") 191 | 192 | oai_text_embedding_request: OpenAIEmbeddingCreateParams = body 193 | 194 | yc_text_embedding_requests: list[YaTextEmbeddingRequest] = await _adapt_openai_to_yc_embeddings(oai_text_embedding_request, folder_id) 195 | 196 | return await generate_yandexgpt_embeddings_response_batch(yc_text_embedding_requests, folder_id, yandex_api_key) 197 | 198 | def _decode_openai_api_key(request): 199 | openai_api_key = request.headers.get("Authorization", "").split("Bearer ")[-1].strip() 200 | 201 | if not openai_api_key: 202 | logger.error("Пустой API ключ") 203 | raise HTTPException(status_code=401, detail="Invalid API key provided") 204 | 205 | logger.debug(f"OpenAI Api-key: {openai_api_key}") 206 | 207 | try: 208 | folder_id, yandex_api_key = openai_api_key.split("@") 209 | 210 | if not folder_id or not yandex_api_key: 211 | raise ValueError("Пустой folder_id или yandex_api_key") 212 | 213 | except ValueError as e: 214 | logger.error(f"Ошибка при разборе API ключа: {str(e)}") 215 | raise HTTPException( 216 | status_code=401, 217 | detail="Invalid API key format. Expected format: 'folder_id@yandex_api_key'" 218 | ) 219 | 220 | logger.debug(f"Folder ID: {folder_id}\nYandex Api-key: {yandex_api_key}") 221 | return folder_id, yandex_api_key 222 | 223 | 224 | ########################################################### 225 | # Checkers 226 | ########################################################### 227 | 228 | @index.get("/") 229 | def root(): 230 | return {"status": "Hello from Foundational Models Team! check .../docs for more info"} 231 | 232 | @index.get("/health") 233 | def health_check(): 234 | return {"status": "healthy"} 235 | 236 | @index.get("/readyz") 237 | def readiness_probe(): 238 | return {"status": "ready"} 239 | 240 | @index.get("/livez") 241 | def liveness_probe(): 242 | return {"status": "alive"} 243 | 244 | @index.get("/badge") 245 | def get_badge(): 246 | return RedirectResponse(f"https://img.shields.io/badge/status-healthy-green") 247 | 248 | @index.get("/badge-sha") 249 | def get_badge_sha(): 250 | return RedirectResponse(f"https://img.shields.io/badge/sha-{GITHUB_SHA}-blue") 251 | 252 | @index.get("/badge-ref") 253 | def get_badge_ref(): 254 | ref = GITHUB_REF.split('/')[-1] 255 | return RedirectResponse(f"https://img.shields.io/badge/ref-{ref}-blue") 256 | 257 | @index.route("/", methods=["GET", "POST", "PUT", "DELETE", "PATCH"]) 258 | def method_not_allowed(path): 259 | return HTTPException(status_code=405, detail="Method Not Allowed") -------------------------------------------------------------------------------- /app/my_logger.py: -------------------------------------------------------------------------------- 1 | from loguru import logger 2 | from app.yandex.yc_log_handler import ycLogHandler 3 | import os, sys, json 4 | from dotenv import load_dotenv 5 | 6 | load_dotenv() 7 | 8 | # Уровень логирования 9 | LOG_LEVEL = os.getenv("LOG_LEVEL", "INFO") 10 | log_type = os.getenv("LOG_TYPE", "stdout") 11 | 12 | # Настраиваем логирование 13 | if log_type == "dev": 14 | pass 15 | else: 16 | logger.remove() 17 | 18 | # Логи записываются в файл 19 | if log_type == "volume": 20 | logger.add("logs/debug.log", format="{time} {level} {message}", level=LOG_LEVEL, rotation="100 MB") 21 | 22 | # Логи выводятся в консоль 23 | elif log_type == "stdout": 24 | logger.add(sys.stdout, format="{time} {level} {message}", level=LOG_LEVEL) 25 | 26 | # Логи отправляются в Yandex Cloud Logging 27 | elif log_type == "yc": 28 | # Добавляем ycLogHandler 29 | logger.add(ycLogHandler, level=LOG_LEVEL) 30 | 31 | # Экспортируем настроенный logger 32 | __all__ = ["logger"] -------------------------------------------------------------------------------- /app/yandex/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ai-cookbook/openai-yandexgpt-adapter/975fd7af26df7250d497b3f6a7865ead894fd12f/app/yandex/__init__.py -------------------------------------------------------------------------------- /app/yandex/classification.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ai-cookbook/openai-yandexgpt-adapter/975fd7af26df7250d497b3f6a7865ead894fd12f/app/yandex/classification.py -------------------------------------------------------------------------------- /app/yandex/completions.py: -------------------------------------------------------------------------------- 1 | from fastapi import HTTPException 2 | from fastapi.responses import StreamingResponse 3 | from app.yandex.models import ( 4 | CompletionRequest as YaCompletionRequest, 5 | CompletionResponse as YaCompletionResponse, 6 | TextEmbeddingRequest as YaTextEmbeddingRequest, 7 | TextEmbeddingResponse as YaTextEmbeddingResponse, 8 | FewShotTextClassificationRequest as YaFewShotTextClassificationRequest, 9 | FewShotTextClassificationResponse as YaFewShotTextClassificationResponse, 10 | TunedTextClassificationRequest as YaTunedTextClassificationRequest, 11 | TunedTextClassificationResponse as YaTunedTextClassificationResponse, 12 | GetModelsResponse as YaGetModelsResponse, 13 | YaCompletionRequestWithClassificatiors, 14 | 15 | ToolResult as YaToolResult, 16 | FunctionResult as YaFunctionResult, 17 | ToolCall as YaToolCall, 18 | ToolCallList as YaToolCallList, 19 | Message as YaChatCompletionMessage, 20 | CompletionOptions as YaCompletionOptions 21 | ) 22 | 23 | from openai.types.chat import ( 24 | ChatCompletion as OpenAIChatCompletion, 25 | ChatCompletionChunk as OpenAIChatCompletionChunk, 26 | ChatCompletionMessage as OpenAIChatCompletionMessage, 27 | ) 28 | from openai.types.embedding import Embedding as OpenAIEmbedding 29 | from openai.types.embedding_model import EmbeddingModel as OpenAIEmbeddingModel 30 | from openai.types.embedding_create_params import EmbeddingCreateParams as OpenAIEmbeddingCreateParams 31 | from openai.types.create_embedding_response import CreateEmbeddingResponse as OpenAICreateEmbeddingResponse 32 | from openai.types.chat.completion_create_params import CompletionCreateParams as OpenAICompletionCreateParams 33 | from openai.types.chat.completion_create_params import ( 34 | CompletionCreateParamsNonStreaming as OpenAICompletionCreateParamsNonStreaming, 35 | CompletionCreateParamsStreaming as OpenAICompletionCreateParamsStreaming 36 | ) 37 | from openai.types.chat.chat_completion_message_param import ChatCompletionMessageParam as OpenAIChatCompletionMessageParam 38 | from openai import BadRequestError 39 | from openai._exceptions import InternalServerError, APIStatusError 40 | 41 | from app.my_logger import logger 42 | from dotenv import load_dotenv 43 | import os 44 | import json 45 | import httpx 46 | import time 47 | import random 48 | import string 49 | from tenacity import retry, stop_after_attempt, wait_exponential 50 | from typeguard import check_type 51 | from dataclasses import dataclass 52 | from typing import Optional 53 | 54 | load_dotenv() 55 | 56 | YC_COMPLETIONS_MODEL_MAP = os.getenv("YC_COMPLETIONS_MODEL_MAP", "gpt-4o:yandexgpt/latest,gpt-4o-mini:yandexgpt-lite/latest,gpt-3.5:yandexgpt/latest,gpt-3.5-turbo:yandexgpt/latest,gpt-5:yandexgpt/latest") 57 | YC_LOG_POLICY = os.getenv("YC_FOMO_LOG_POLICY", "True").lower() == "true" 58 | YC_SERVICE_URL = os.getenv("YC_SERVICE_URL", "https://llm.api.cloud.yandex.net") 59 | YC_COMPLETION_RETRIES = os.getenv("YC_COMPLETION_RETRIES", "True").lower() == "true" 60 | 61 | try: 62 | completions_model_map = {k: v for k, v in [item.split(":") for item in YC_COMPLETIONS_MODEL_MAP.split(",")]} 63 | except Exception as e: 64 | logger.error(f"Error parsing YC_COMPLETIONS_MODEL_MAP: {e}") 65 | raise e 66 | 67 | UNSUPPORTED_PARAMETERS = { 68 | # "messages", 69 | # "model", 70 | # "stream", 71 | "audio", 72 | "frequency_penalty", 73 | "function_call", 74 | "functions", 75 | "logit_bias", 76 | "logprobs", 77 | # "max_completion_tokens", 78 | # "max_tokens", 79 | "metadata", 80 | "modalities", 81 | "n", 82 | "parallel_tool_calls", 83 | "prediction", 84 | "presence_penalty", 85 | "response_format", 86 | "seed", 87 | "service_tier", 88 | # "stop", 89 | "store", 90 | "stream_options", 91 | # "temperature", 92 | "tool_choice", 93 | # "tools", 94 | "top_logprobs", 95 | "top_p", 96 | "user" 97 | } 98 | 99 | async def send_request(url: str, headers: dict, body: str, timeout: int = 60): 100 | if YC_COMPLETION_RETRIES: 101 | #if False: 102 | return await send_request_with_retry(url, headers, body, timeout) 103 | else: 104 | return await send_request_without_retry(url, headers, body, timeout) 105 | 106 | async def send_request_without_retry(url, headers, body, timeout): 107 | async with httpx.AsyncClient() as client: 108 | response = await client.post(url, headers=headers, content=body, timeout=timeout) 109 | return response 110 | 111 | @retry(stop=stop_after_attempt(3), wait=wait_exponential(multiplier=1, min=4, max=10)) 112 | async def send_request_with_retry(url, headers, body, timeout): 113 | async with httpx.AsyncClient() as client: 114 | response = await client.post(url, headers=headers, content=body, timeout=timeout) 115 | return response 116 | 117 | async def _adapt_openai_to_yc_completions( 118 | oai_completion_request: OpenAICompletionCreateParams, 119 | folder_id: str 120 | ) -> YaCompletionRequestWithClassificatiors: 121 | 122 | logger.debug(f"Transforming OpenAI completion request to Yandex GPT request. OaiCompletionRequest: {oai_completion_request}, folder_id: {folder_id}") 123 | 124 | logger.info("Модель для генерации текста в Foundational Models", extra={ 125 | "model": str(oai_completion_request.get("model")), 126 | "using_tools": str(bool(oai_completion_request.get("tools"))), 127 | "folder_id": folder_id 128 | }) 129 | 130 | model_uri = _get_completions_model_uri(oai_completion_request.get("model"), folder_id) 131 | 132 | _log_warning_on_unsupported_parameters(oai_completion_request, UNSUPPORTED_PARAMETERS) 133 | 134 | if model_uri.startswith("cls://"): 135 | pass 136 | else: 137 | yandex_parameters = YaCompletionOptions( 138 | stream=oai_completion_request.get("stream"), 139 | temperature=oai_completion_request.get("temperature"), 140 | maxTokens=_get_max_tokens(oai_completion_request) 141 | ) 142 | yandex_messages = _adapt_messages(oai_completion_request.get("messages")) 143 | yandex_tools = oai_completion_request.get("tools") 144 | 145 | yandex_request = YaCompletionRequest( 146 | modelUri=model_uri, 147 | messages=yandex_messages, 148 | completionOptions=yandex_parameters, 149 | tools=yandex_tools 150 | ) 151 | 152 | logger.debug(f"Transformed Yandex request: {yandex_request}") 153 | return yandex_request 154 | 155 | 156 | async def generate_yandexgpt_response( 157 | yc_completion_request: YaCompletionRequestWithClassificatiors, 158 | folder_id: str, 159 | yandex_api_key: str 160 | ) -> YaCompletionResponse | YaTunedTextClassificationResponse | YaFewShotTextClassificationResponse: 161 | logger.debug(f"Sending Yandex completion request to Yandex GPT. Yandex completion request: {yc_completion_request}, folder_id: {folder_id}, Api-key: {yandex_api_key}") 162 | 163 | if isinstance(yc_completion_request, YaCompletionRequest): 164 | logger.debug("Choosing completion response") 165 | return await generate_yandexgpt_completion_response(yc_completion_request, folder_id, yandex_api_key) 166 | elif isinstance(yc_completion_request, YaTunedTextClassificationRequest): 167 | logger.debug("Choosing tuned text classification response") 168 | pass 169 | # return tuned text classification response 170 | elif isinstance(yc_completion_request, YaFewShotTextClassificationRequest): 171 | logger.debug("Choosing few shot text classification response") 172 | pass 173 | # return few shot text classification response 174 | 175 | 176 | async def generate_yandexgpt_completion_response( 177 | yc_completion_request: YaCompletionRequest, 178 | folder_id: str, 179 | yandex_api_key: str 180 | ) -> YaCompletionResponse: 181 | if yc_completion_request.completionOptions.stream: 182 | return StreamingResponse(generate_yandexgpt_completion_response_streaming_with_tools(yc_completion_request, folder_id, yandex_api_key), media_type="text/event-stream") 183 | else: 184 | return await generate_yandexgpt_completion_response_non_streaming(yc_completion_request, folder_id, yandex_api_key) 185 | 186 | async def generate_yandexgpt_completion_response_non_streaming( 187 | yc_completion_request: YaCompletionRequest, 188 | folder_id: str, 189 | yandex_api_key: str 190 | ) -> OpenAIChatCompletion: 191 | logger.debug("Generating non streaming completion response") 192 | 193 | url = f"{YC_SERVICE_URL}/foundationModels/v1/completion" 194 | headers = { 195 | "Content-Type": "application/json", 196 | "Authorization": f"Bearer {yandex_api_key}" if yandex_api_key.startswith('t1') else f"Api-Key {yandex_api_key}", 197 | 'x-folder-id': folder_id, 198 | 'x-data-logging-enabled': str(YC_LOG_POLICY) 199 | } 200 | body = yc_completion_request.model_dump_json() 201 | 202 | logger.debug(f"Отправка запроса на {url} с заголовками: {headers} и данными: {body}") 203 | response: httpx.Response = await send_request(url, headers, body, 60) 204 | 205 | if response.status_code == 200: 206 | logger.debug(f"Получен ответ от Yandex GPT: {response.text}, {response.status_code}, {response.headers}") 207 | 208 | result = response.json().get('result') 209 | yandex_completion_response = YaCompletionResponse(**result) 210 | 211 | final_result: OpenAIChatCompletion = _transform_to_openai_response_format(yandex_completion_response, yc_completion_request, response.headers) 212 | 213 | logger.debug(f"Final result: {final_result}") 214 | _log_success_on_completion(final_result, yandex_completion_response, folder_id, yc_completion_request) 215 | 216 | return final_result 217 | else: 218 | logger.error(f"Error generating completion response: {response.status_code}, {response.text}, {response.headers}") 219 | logger.info(f"Error generating completion response", extra={ 220 | "folder_id": folder_id, 221 | "modelUri": yc_completion_request.modelUri, 222 | "model": yc_completion_request.modelUri.split("/")[-2:], 223 | "error_code": response.status_code, 224 | "error_message": response.text 225 | }) 226 | 227 | # TODO: map errors into OpenAI format with more details/codes/errors 228 | if str(response.status_code).startswith("4"): 229 | raise APIStatusError(message=response.text, response=response, body=response.text) 230 | elif str(response.status_code).startswith("5"): 231 | raise InternalServerError(message=response.text, response=response, body=response.text) 232 | else: 233 | raise HTTPException(status_code=response.status_code, detail=response.text) 234 | 235 | async def generate_yandexgpt_completion_response_streaming_with_tools( 236 | yc_completion_request: YaCompletionRequest, 237 | folder_id: str, 238 | yandex_api_key: str 239 | ): 240 | headers = _prepare_request_headers(yandex_api_key, folder_id) 241 | request_body = yc_completion_request.model_dump_json() 242 | 243 | logger.debug(f"Отправка стрим-запроса: URL={YC_SERVICE_URL}, headers={headers}") 244 | 245 | try: 246 | async with httpx.AsyncClient() as client: 247 | async with client.stream('POST', 248 | f"{YC_SERVICE_URL}/foundationModels/v1/completion", 249 | headers=headers, 250 | content=request_body, 251 | timeout=30.0) as response: 252 | 253 | await _validate_response(response) 254 | 255 | accumulated_text = "" 256 | last_chunk = None 257 | async for chunk in _process_response_stream(response, yc_completion_request): 258 | yield chunk 259 | if isinstance(chunk, str) and "data: [DONE]" in chunk: 260 | if last_chunk and isinstance(last_chunk, YaCompletionResponse): 261 | _log_success_on_streaming_completion( 262 | last_chunk, 263 | folder_id, 264 | yc_completion_request 265 | ) 266 | else: 267 | last_chunk = chunk 268 | 269 | except httpx.TimeoutException: 270 | logger.error("Таймаут при получении ответа от YandexGPT") 271 | raise HTTPException(status_code=504, detail="Gateway Timeout") 272 | except Exception as e: 273 | logger.error(f"Ошибка при стриминге ответа: {str(e)}") 274 | raise HTTPException(status_code=500, detail=str(e)) 275 | 276 | def _log_success_on_streaming_completion( 277 | yandex_completion_response: YaCompletionResponse, 278 | folder_id: str, 279 | yc_completion_request: YaCompletionRequest 280 | ): 281 | """Логирует успешное завершение стримингового запроса.""" 282 | logger.success("Стриминговая генерация текста в Foundational Models завершена", 283 | extra={ 284 | "folder_id": folder_id, 285 | "modelUri": yc_completion_request.modelUri, 286 | "modelPrefix": yc_completion_request.modelUri.split(":")[0], 287 | "model": yc_completion_request.modelUri.split("/")[-2:], 288 | "streaming": yc_completion_request.completionOptions.stream, 289 | "input_text_tokens": yandex_completion_response.usage.inputTextTokens, 290 | "completion_tokens": yandex_completion_response.usage.completionTokens, 291 | "total_tokens": yandex_completion_response.usage.totalTokens, 292 | "model_version": yandex_completion_response.modelVersion, 293 | "is_toolResult": str(bool(yandex_completion_response.alternatives[0].message.toolResultList)), 294 | "yandex_status": str(yandex_completion_response.alternatives[0].status) 295 | }) 296 | 297 | async def _process_response_stream(response: httpx.Response, yc_completion_request: YaCompletionRequest): 298 | """Обрабатывает поток ответов от API.""" 299 | accumulated_text = "" 300 | 301 | async for line in response.aiter_lines(): 302 | if not line: 303 | continue 304 | 305 | try: 306 | chunk = _parse_response_chunk(line, accumulated_text) 307 | if chunk.is_tool_call: 308 | yield _format_tool_call_response(chunk, yc_completion_request) 309 | yield "data: [DONE]\n\n" 310 | return 311 | 312 | accumulated_text = chunk.accumulated_text 313 | yield _format_text_chunk_response(chunk) 314 | 315 | if chunk.is_complete: 316 | yield "data: [DONE]\n\n" 317 | return 318 | 319 | except json.JSONDecodeError: 320 | logger.error(f"Ошибка парсинга JSON из строки: {line}") 321 | continue 322 | 323 | @dataclass 324 | class ResponseChunk: 325 | content: str 326 | is_complete: bool 327 | is_tool_call: bool 328 | accumulated_text: str 329 | response_obj: YaCompletionResponse 330 | 331 | def _parse_response_chunk(line: str, accumulated_text: str) -> ResponseChunk: 332 | """Парсит чанк ответа.""" 333 | json_data = json.loads(line) 334 | response_obj = YaCompletionResponse(**json_data['result']) 335 | 336 | is_tool_call = (response_obj.alternatives[0].status == 337 | "ALTERNATIVE_STATUS_TOOL_CALLS") 338 | 339 | if is_tool_call: 340 | return ResponseChunk( 341 | content="", 342 | is_complete=True, 343 | is_tool_call=True, 344 | accumulated_text=accumulated_text, 345 | response_obj=response_obj 346 | ) 347 | 348 | new_text = response_obj.alternatives[0].message.text 349 | content = new_text[len(accumulated_text):] 350 | 351 | return ResponseChunk( 352 | content=content, 353 | is_complete=response_obj.alternatives[0].status == "ALTERNATIVE_STATUS_COMPLETE", 354 | is_tool_call=False, 355 | accumulated_text=new_text, 356 | response_obj=response_obj 357 | ) 358 | 359 | def _format_text_chunk_response(chunk: ResponseChunk) -> str: 360 | """Форматирует текстовый чанк в формат SSE.""" 361 | response_data = { 362 | "id": _generate_completion_id(), 363 | "object": "chat.completion.chunk", 364 | "created": int(time.time()), 365 | "model": "yandexgpt-latest", 366 | "system_fingerprint": _generate_fingerprint(), 367 | "choices": [{ 368 | "index": 0, 369 | "delta": { 370 | "role": "assistant", 371 | "content": chunk.content 372 | }, 373 | "logprobs": None, 374 | "finish_reason": "stop" if chunk.is_complete else None 375 | }] 376 | } 377 | 378 | return f"data: {json.dumps(response_data, ensure_ascii=False)}\n\n" 379 | 380 | def _generate_completion_id(): 381 | return ''.join(random.choices(string.ascii_letters + string.digits, k=24)) 382 | 383 | def _generate_fingerprint(): 384 | return ''.join(random.choices(string.ascii_letters + string.digits, k=24)) 385 | 386 | def _format_tool_call_response(chunk: ResponseChunk, yc_completion_request: YaCompletionRequest) -> str: 387 | """Форматирует ответ с вызовом инструмента в формат SSE.""" 388 | response_data = _transform_to_openai_response_format( 389 | chunk.response_obj, 390 | yc_completion_request, 391 | {} # headers не используются для tool calls 392 | ) 393 | return f"data: {json.dumps(response_data.model_dump(), ensure_ascii=False)}\n\n" 394 | 395 | def _prepare_request_headers(api_key: str, folder_id: str) -> dict: 396 | """Подготавливает заголовки запроса.""" 397 | auth_value = f"Bearer {api_key}" if api_key.startswith('t1') else f"Api-Key {api_key}" 398 | return { 399 | "Content-Type": "application/json", 400 | "Authorization": auth_value, 401 | 'x-folder-id': folder_id, 402 | 'x-data-logging-enabled': str(YC_LOG_POLICY) 403 | } 404 | 405 | async def _validate_response(response: httpx.Response): 406 | """Проверяет корректность ответа.""" 407 | if response.status_code != 200: 408 | error_msg = f"Ошибка API: {response.text}" 409 | logger.error(error_msg) 410 | raise HTTPException(status_code=response.status_code, detail=error_msg) 411 | 412 | def _log_success_on_completion(final_result: OpenAIChatCompletion, yandex_completion_response: YaCompletionResponse, folder_id: str, yc_completion_request: YaCompletionRequest): 413 | logger.success("Генерация текста в Foundational Models завершена", 414 | extra={ 415 | "folder_id": folder_id, 416 | "modelUri": yc_completion_request.modelUri, 417 | "modelPrefix": yc_completion_request.modelUri.split(":")[0], 418 | "model": yc_completion_request.modelUri.split("/")[-2:], 419 | "streaming": yc_completion_request.completionOptions.stream, 420 | "input_text_tokens": yandex_completion_response.usage.inputTextTokens, 421 | "completion_tokens": yandex_completion_response.usage.completionTokens, 422 | "total_tokens": yandex_completion_response.usage.totalTokens, 423 | "model_version": yandex_completion_response.modelVersion, 424 | "is_toolResult": str(bool(yandex_completion_response.alternatives[0].message.toolResultList)), 425 | "yandex_status": str(yandex_completion_response.alternatives[0].status), 426 | "openai_status": final_result.choices[0].finish_reason, 427 | "openai_id": final_result.id, 428 | "openai_created": final_result.created, 429 | "openai_model": final_result.model, 430 | "openai_system_fingerprint": final_result.system_fingerprint 431 | }) 432 | 433 | def _transform_to_openai_response_format( 434 | yandex_response: YaCompletionResponse, 435 | yc_completion_request: YaCompletionRequest, 436 | headers: dict 437 | ) -> OpenAIChatCompletion: 438 | """ 439 | Преобразует ответ Yandex GPT в формат ответа OpenAI. 440 | 441 | Args: 442 | yandex_response (YaCompletionResponse): Ответ от Yandex GPT. 443 | yc_completion_request (YaCompletionRequest): Исходный запрос к Yandex GPT. 444 | headers (dict): Заголовки запроса. 445 | 446 | Returns: 447 | OpenAIChatCompletion: Ответ в формате OpenAI. 448 | """ 449 | try: 450 | request_id = headers.get('x-request-id', _generate_fallback_id()) 451 | is_tool_call = _check_if_tool_call(yandex_response) 452 | 453 | if is_tool_call: 454 | tool_calls = _extract_tool_calls(yandex_response) 455 | finish_reason = "tool_calls" 456 | content = None 457 | else: 458 | tool_calls = None 459 | finish_reason = "stop" 460 | content = _extract_content(yandex_response) 461 | 462 | openai_response = OpenAIChatCompletion( 463 | id=_generate_completion_id(), 464 | object="chat.completion", 465 | created=int(time.time()), 466 | model=_construct_model_name(yc_completion_request, yandex_response), 467 | system_fingerprint=request_id, 468 | choices=[ 469 | { 470 | "index": 0, 471 | "message": { 472 | "role": "assistant", 473 | "content": content, 474 | "tool_calls": tool_calls 475 | }, 476 | "logprobs": None, 477 | "finish_reason": finish_reason 478 | } 479 | ], 480 | usage={ 481 | "prompt_tokens": yandex_response.usage.inputTextTokens, 482 | "completion_tokens": yandex_response.usage.completionTokens, 483 | "total_tokens": yandex_response.usage.totalTokens 484 | } 485 | ) 486 | 487 | logger.debug(f"Формирование ответа в формате OpenAI завершено, ответ: {openai_response}") 488 | return openai_response 489 | 490 | except Exception as e: 491 | logger.error(f"Ошибка при преобразовании ответа: {e}") 492 | raise HTTPException(status_code=500, detail="Ошибка при обработке ответа от Yandex GPT") 493 | 494 | 495 | def _generate_completion_id(length: int = 24) -> str: 496 | """Генерирует уникальный идентификатор для завершения.""" 497 | return ''.join(random.choices(string.ascii_letters + string.digits, k=length)) 498 | 499 | 500 | def _generate_fallback_id() -> str: 501 | """Генерирует запасной идентификатор, если отсутствует в заголовках.""" 502 | return _generate_completion_id() 503 | 504 | 505 | def _check_if_tool_call(yandex_response: YaCompletionResponse) -> bool: 506 | """Определяет, содержит ли ответ вызовы инструментов.""" 507 | return yandex_response.alternatives[0].status == "ALTERNATIVE_STATUS_TOOL_CALLS" 508 | 509 | 510 | def _extract_tool_calls(yandex_response: YaCompletionResponse) -> Optional[list]: 511 | """Извлекает информацию о вызовах инструментов из ответа.""" 512 | try: 513 | return [ 514 | { 515 | "id": f"call_{_generate_completion_id()}", 516 | "type": "function", 517 | "function": { 518 | "name": tool_call.functionCall.name, 519 | "arguments": json.dumps(tool_call.functionCall.arguments) 520 | } 521 | } 522 | for tool_call in yandex_response.alternatives[0].message.toolCallList.toolCalls 523 | ] 524 | except AttributeError as e: 525 | logger.error(f"Ошибка при извлечении вызовов инструментов: {e}") 526 | return None 527 | 528 | 529 | def _extract_content(yandex_response: YaCompletionResponse) -> Optional[str]: 530 | """Извлекает текстовое содержимое из ответа.""" 531 | try: 532 | return yandex_response.alternatives[0].message.text 533 | except AttributeError as e: 534 | logger.error(f"Ошибка при извлечении содержимого: {e}") 535 | return None 536 | 537 | 538 | def _construct_model_name( 539 | yc_completion_request: YaCompletionRequest, 540 | yandex_response: YaCompletionResponse 541 | ) -> str: 542 | """Формирует имя модели в формате OpenAI.""" 543 | model_base = yc_completion_request.modelUri.split('/')[-2] 544 | model_version = yandex_response.modelVersion 545 | return f"{model_base}-by-{model_version}" 546 | 547 | def _get_completions_model_uri(model: str, folder_id: str) -> str: 548 | """ 549 | 1. map model to yc model 550 | 2. check clf mode, raise clf 551 | 3. construct yc model uri 552 | """ 553 | logger.debug(f"Model: {model}, folder_id: {folder_id}") 554 | 555 | # map model to yc model 556 | if model in completions_model_map: 557 | model = completions_model_map[model] 558 | 559 | if model.startswith(("gpt://", "ds://")): 560 | model_uri = model 561 | elif model.startswith("cls://"): 562 | raise HTTPException(status_code=400, detail="Classifier mode is not supported yet") 563 | model_uri = model 564 | else: 565 | model_uri = f"gpt://{folder_id}/{model}" 566 | 567 | logger.debug(f"Model URI: {model_uri}") 568 | return model_uri 569 | 570 | def _adapt_messages(messages: list[OpenAIChatCompletionMessageParam]) -> list[YaChatCompletionMessage]: 571 | logger.debug(f"Messages: {messages}") 572 | 573 | messages_transformed = [] 574 | called_functions = {} 575 | 576 | i = 0 577 | while i < len(messages): 578 | logger.debug(f"Processing message {i+1} of {len(messages)}") 579 | message = messages[i] 580 | 581 | try: 582 | 583 | if message.get('role') == 'function' or (message.get('role') == 'assistant' and message.get('function_call')): 584 | _raise_deprecated_function_call_error() 585 | 586 | if message.get('role') == 'tool': 587 | toolResults, i = _collect_tool_results(messages, i, called_functions) 588 | messages_transformed.append(YaChatCompletionMessage( 589 | role="assistant", 590 | toolResultList={ 591 | "toolResults": toolResults 592 | } 593 | )) 594 | elif message.get('role') == 'assistant' and message.get('tool_calls'): 595 | toolCalls = _process_tool_calls(message, called_functions) 596 | messages_transformed.append(YaChatCompletionMessage( 597 | role="assistant", 598 | toolCallList={ 599 | "toolCalls": toolCalls 600 | } 601 | )) 602 | else: 603 | content = _get_content_as_string(message.get('content')) 604 | yc_message = YaChatCompletionMessage( 605 | role=message.get('role'), 606 | text=content 607 | ) 608 | messages_transformed.append(yc_message) 609 | 610 | except Exception as e: 611 | logger.error(f"Ошибка при обработке сообщения: {message}, ошибка: {e}") 612 | raise e 613 | 614 | i += 1 615 | 616 | logger.debug(f"Преобразование сообщений в формат Yandex GPT завершено, результат: {messages_transformed}") 617 | return messages_transformed 618 | 619 | def _collect_tool_results(messages: list[OpenAIChatCompletionMessageParam], start_index: int, called_functions: dict): 620 | logger.debug(f"Collecting tool results from messages: {messages}, start_index: {start_index}, called_functions: {called_functions}") 621 | toolResults = [] 622 | i = start_index 623 | while i < len(messages) and messages[i].get('role') == 'tool': 624 | message = messages[i] 625 | name = called_functions.get(message.get('tool_call_id')) 626 | toolResults.append(YaToolResult( 627 | functionResult=YaFunctionResult( 628 | name=name, 629 | content=message.get('content') 630 | ) 631 | )) 632 | i += 1 633 | return toolResults, i - 1 634 | 635 | def _process_tool_calls(message: OpenAIChatCompletionMessageParam, called_functions: dict): 636 | logger.debug(f"Processing tool calls from message: {message}, called_functions: {called_functions}") 637 | toolCalls = [] 638 | for tool_call in message.get('tool_calls'): 639 | try: 640 | name = tool_call.get('function').get('name') 641 | arguments = json.loads(tool_call.get('function').get('arguments')) 642 | toolCalls.append(YaToolCall( 643 | functionCall={ 644 | "name": name, 645 | "arguments": arguments 646 | } 647 | )) 648 | called_functions[tool_call.get('id')] = name 649 | except Exception as e: 650 | logger.error(f"Ошибка при извлечении name и arguments из tool_call: {tool_call}, ошибка: {e}") 651 | return toolCalls 652 | 653 | def _get_content_as_string(content): 654 | if content and not isinstance(content, str): 655 | return str(content) 656 | return content 657 | 658 | def _raise_deprecated_function_call_error(): 659 | raise HTTPException(status_code=400, detail="Function calling is deprecated and not supported by OpenAI API to Yandex GPT Adapter. Use tool calling instead.") 660 | 661 | def _log_warning_on_unsupported_parameters(parameters: OpenAICompletionCreateParams, unsupported_parameters: set[str]): 662 | input_parameters = set(parameters.keys()) 663 | unsupported_parameters_in_input = input_parameters.intersection(unsupported_parameters) 664 | 665 | if unsupported_parameters_in_input: 666 | logger.warning(f"Unsupported parameters in input: {unsupported_parameters_in_input}") 667 | 668 | def _get_max_tokens(completion_request: OpenAICompletionCreateParams) -> str | None: 669 | """ 670 | Извлекает и обрабатывает параметр максимального количества токенов из запроса. 671 | 672 | Args: 673 | completion_request: Параметры запроса в формате OpenAI 674 | 675 | Returns: 676 | str | None: Строковое представление max_tokens или None, если не задано 677 | """ 678 | max_completion_tokens = completion_request.get("max_completion_tokens") 679 | max_tokens = completion_request.get("max_tokens") 680 | 681 | # Приоритет отдается max_completion_tokens, если он задан 682 | final_max_tokens = max_completion_tokens or max_tokens 683 | 684 | return str(final_max_tokens) if final_max_tokens is not None else None -------------------------------------------------------------------------------- /app/yandex/embeddings.py: -------------------------------------------------------------------------------- 1 | from fastapi import HTTPException 2 | from fastapi.responses import StreamingResponse 3 | from app.yandex.models import ( 4 | CompletionRequest as YaCompletionRequest, 5 | CompletionResponse as YaCompletionResponse, 6 | TextEmbeddingRequest as YaTextEmbeddingRequest, 7 | TextEmbeddingResponse as YaTextEmbeddingResponse, 8 | FewShotTextClassificationRequest as YaFewShotTextClassificationRequest, 9 | FewShotTextClassificationResponse as YaFewShotTextClassificationResponse, 10 | TunedTextClassificationRequest as YaTunedTextClassificationRequest, 11 | TunedTextClassificationResponse as YaTunedTextClassificationResponse, 12 | GetModelsResponse as YaGetModelsResponse, 13 | YaCompletionRequestWithClassificatiors, 14 | 15 | ToolResult as YaToolResult, 16 | FunctionResult as YaFunctionResult, 17 | ToolCall as YaToolCall, 18 | ToolCallList as YaToolCallList, 19 | Message as YaChatCompletionMessage, 20 | CompletionOptions as YaCompletionOptions 21 | ) 22 | 23 | from openai.types.chat import ( 24 | ChatCompletion as OpenAIChatCompletion, 25 | ChatCompletionChunk as OpenAIChatCompletionChunk, 26 | ChatCompletionMessage as OpenAIChatCompletionMessage, 27 | ) 28 | from openai.types.embedding import Embedding as OpenAIEmbedding 29 | from openai.types.embedding_model import EmbeddingModel as OpenAIEmbeddingModel 30 | from openai.types.embedding_create_params import EmbeddingCreateParams as OpenAIEmbeddingCreateParams 31 | from openai.types.create_embedding_response import CreateEmbeddingResponse as OpenAICreateEmbeddingResponse 32 | from openai.types.chat.completion_create_params import CompletionCreateParams as OpenAICompletionCreateParams 33 | from openai.types.chat.completion_create_params import ( 34 | CompletionCreateParamsNonStreaming as OpenAICompletionCreateParamsNonStreaming, 35 | CompletionCreateParamsStreaming as OpenAICompletionCreateParamsStreaming 36 | ) 37 | from openai.types.chat.chat_completion_message_param import ChatCompletionMessageParam as OpenAIChatCompletionMessageParam 38 | from openai import BadRequestError 39 | from openai._exceptions import InternalServerError, APIStatusError 40 | 41 | from app.my_logger import logger 42 | from dotenv import load_dotenv 43 | import os 44 | import json 45 | import httpx 46 | import time 47 | import random 48 | import string 49 | from tenacity import retry, stop_after_attempt, wait_exponential 50 | from typeguard import check_type 51 | from dataclasses import dataclass 52 | from typing import Optional 53 | import asyncio 54 | from asyncio import Queue 55 | 56 | load_dotenv() 57 | 58 | YC_EMBEDDINGS_MODEL_MAP = os.getenv("YC_EMBEDDINGS_MODEL_MAP", "text-embedding-3-large:text-search-doc/latest,text-embedding-3-small:text-search-doc/latest,text-embedding-ada-002:text-search-doc/latest") 59 | YC_LOG_POLICY = os.getenv("YC_FOMO_LOG_POLICY", "True").lower() == "true" 60 | YC_SERVICE_URL = os.getenv("YC_SERVICE_URL", "https://llm.api.cloud.yandex.net") 61 | YC_EMBEDDINGS_RETRIES = os.getenv("YC_EMBEDDINGS_RETRIES", "True").lower() == "true" 62 | YC_EMBEDDINGS_RATE_LIMIT = int(os.getenv("YC_EMBEDDINGS_RATE_LIMIT", "1")) 63 | YC_EMBEDDINGS_TIME_WINDOW = int(os.getenv("YC_EMBEDDINGS_TIME_WINDOW", "1")) 64 | YC_EMBEDDINGS_MAX_RETRIES = int(os.getenv("YC_EMBEDDINGS_MAX_RETRIES", "1")) 65 | YC_EMBEDDINGS_BACKOFF_FACTOR = int(os.getenv("YC_EMBEDDINGS_BACKOFF_FACTOR", "1")) 66 | 67 | try: 68 | embeddings_model_map = {k: v for k, v in [item.split(":") for item in YC_EMBEDDINGS_MODEL_MAP.split(",")]} 69 | except Exception as e: 70 | logger.error(f"Error parsing YC_EMBEDDINGS_MODEL_MAP: {e}") 71 | raise e 72 | 73 | UNSUPPORTED_PARAMETERS = { 74 | "dimensions", 75 | "encoding_format", 76 | "user" 77 | } 78 | 79 | async def send_request(url: str, headers: dict, body: str, timeout: int = 60): 80 | if YC_EMBEDDINGS_RETRIES: 81 | #if False: 82 | return await send_request_with_retry(url, headers, body, timeout) 83 | else: 84 | return await send_request_without_retry(url, headers, body, timeout) 85 | 86 | async def send_request_without_retry(url, headers, body, timeout): 87 | async with httpx.AsyncClient() as client: 88 | response = await client.post(url, headers=headers, content=body, timeout=timeout) 89 | return response 90 | 91 | @retry(stop=stop_after_attempt(3), wait=wait_exponential(multiplier=1, min=4, max=10)) 92 | async def send_request_with_retry(url, headers, body, timeout): 93 | async with httpx.AsyncClient() as client: 94 | response = await client.post(url, headers=headers, content=body, timeout=timeout) 95 | return response 96 | 97 | def _prepare_request_headers(api_key: str, folder_id: str) -> dict: 98 | """Подготавливает заголовки запроса.""" 99 | auth_value = f"Bearer {api_key}" if api_key.startswith('t1') else f"Api-Key {api_key}" 100 | return { 101 | "Content-Type": "application/json", 102 | "Authorization": auth_value, 103 | 'x-folder-id': folder_id, 104 | 'x-data-logging-enabled': str(YC_LOG_POLICY) 105 | } 106 | 107 | async def _validate_response(response: httpx.Response): 108 | """Проверяет корректность ответа.""" 109 | if response.status_code != 200: 110 | error_msg = f"Ошибка API: {response.text}" 111 | logger.error(error_msg) 112 | raise HTTPException(status_code=response.status_code, detail=error_msg) 113 | 114 | def _construct_model_name( 115 | yc_text_embedding_request: YaTextEmbeddingRequest, 116 | yandex_response: YaTextEmbeddingResponse 117 | ) -> str: 118 | """Формирует имя модели в формате OpenAI.""" 119 | model_base = yc_text_embedding_request.modelUri.split('/')[-2] 120 | model_version = yandex_response.modelVersion 121 | return f"{model_base}-by-{model_version}" 122 | 123 | def _log_warning_on_unsupported_parameters(parameters: OpenAIEmbeddingCreateParams, unsupported_parameters: set[str]): 124 | input_parameters = set(parameters.keys()) 125 | unsupported_parameters_in_input = input_parameters.intersection(unsupported_parameters) 126 | 127 | if unsupported_parameters_in_input: 128 | logger.warning(f"Unsupported parameters in input: {unsupported_parameters_in_input}") 129 | 130 | async def _adapt_openai_to_yc_embeddings(oai_text_embedding_request: OpenAIEmbeddingCreateParams, folder_id: str) -> list[YaTextEmbeddingRequest]: 131 | logger.debug(f"Transforming OpenAI embeddings request to Yandex GPT request_S. OaiCompletionRequest: {oai_text_embedding_request}, folder_id: {folder_id}") 132 | 133 | logger.info("Модель для генерации эмбеддингов в Foundational Models", extra={ 134 | "model": str(oai_text_embedding_request.get("model")), 135 | "folder_id": folder_id 136 | }) 137 | 138 | model_uri: str = _get_embeddings_model_uri(oai_text_embedding_request.get("model"), folder_id) 139 | 140 | _log_warning_on_unsupported_parameters(oai_text_embedding_request, UNSUPPORTED_PARAMETERS) 141 | 142 | input_texts: list[str] = oai_text_embedding_request.get("input") 143 | 144 | if isinstance(input_texts, str): 145 | input_texts = [input_texts] 146 | 147 | _warning_on_enourmos_amount_of_texts(input_texts) 148 | 149 | yandex_requests = [YaTextEmbeddingRequest( 150 | modelUri=model_uri, 151 | text=text 152 | ) for text in input_texts] 153 | 154 | logger.debug(f"Transformed Yandex request: {str(yandex_requests)[:100]}") 155 | return yandex_requests 156 | 157 | def _warning_on_enourmos_amount_of_texts(input_texts: list[str]): 158 | if len(input_texts) > 1000: 159 | logger.warning(f"Large amount of texts: {len(input_texts)}") 160 | 161 | async def generate_yandexgpt_embeddings_response_batch( 162 | yc_text_embedding_requests: list[YaTextEmbeddingRequest], 163 | folder_id: str, 164 | yandex_api_key: str 165 | ) -> OpenAICreateEmbeddingResponse: 166 | logger.debug( 167 | f"Sending Yandex embeddings request to Yandex GPT. " 168 | f"Yandex embeddings request: {str(yc_text_embedding_requests)[:100]}, " 169 | f"folder_id: {folder_id}, Api-key: {yandex_api_key}" 170 | ) 171 | 172 | if not yc_text_embedding_requests: 173 | raise HTTPException(status_code=400, detail="Empty request list") 174 | 175 | url = f"{YC_SERVICE_URL}/foundationModels/v1/textEmbedding" 176 | headers = _prepare_request_headers(yandex_api_key, folder_id) 177 | 178 | all_embeddings = [] 179 | all_token_counts = 0 180 | last_response = None 181 | 182 | RATE_LIMIT = YC_EMBEDDINGS_RATE_LIMIT 183 | TIME_WINDOW = YC_EMBEDDINGS_TIME_WINDOW 184 | MAX_RETRIES = YC_EMBEDDINGS_MAX_RETRIES 185 | BACKOFF_FACTOR = YC_EMBEDDINGS_BACKOFF_FACTOR 186 | 187 | queue = Queue() 188 | retry_queue = Queue() 189 | for request in yc_text_embedding_requests: 190 | queue.put_nowait((request, 0)) # (request, retry_count) 191 | 192 | async def worker(): 193 | nonlocal all_embeddings, all_token_counts, last_response 194 | while not queue.empty() or not retry_queue.empty(): 195 | try: 196 | # Сначала проверяем очередь повторных попыток 197 | if not retry_queue.empty(): 198 | request, retry_count = await retry_queue.get() 199 | else: 200 | request, retry_count = await queue.get() 201 | 202 | try: 203 | body = request.model_dump_json() 204 | response = await send_request(url, headers, body, timeout=119) 205 | await _validate_response(response) 206 | 207 | result = response.json() 208 | logger.debug(f"Yandex response: {str(result)[:100]}") 209 | 210 | yandex_response = YaTextEmbeddingResponse(**result) 211 | last_response = yandex_response 212 | all_token_counts += int(yandex_response.numTokens) 213 | 214 | embedding = OpenAIEmbedding( 215 | embedding=yandex_response.embedding, 216 | index=len(all_embeddings), 217 | object="embedding" 218 | ) 219 | all_embeddings.append(embedding) 220 | 221 | except HTTPException as e: 222 | if e.status_code == 429 and retry_count < MAX_RETRIES: 223 | # При ошибке rate limit добавляем запрос обратно с увеличенной задержкой 224 | await asyncio.sleep(TIME_WINDOW * BACKOFF_FACTOR ** retry_count) 225 | await retry_queue.put((request, retry_count + 1)) 226 | logger.warning(f"Rate limit hit, retrying request (attempt {retry_count + 1})") 227 | else: 228 | logger.error(f"Error during processing: {e}") 229 | raise 230 | except Exception as e: 231 | logger.error(f"Error during processing: {e}") 232 | raise 233 | 234 | finally: 235 | if not retry_queue.empty(): 236 | retry_queue.task_done() 237 | else: 238 | queue.task_done() 239 | 240 | try: 241 | async with asyncio.timeout(120): # 2 минуты максимум 242 | workers = [] 243 | for _ in range(RATE_LIMIT): 244 | worker_coroutine = worker() 245 | workers.append(asyncio.create_task(worker_coroutine)) 246 | 247 | # Ждем завершения обеих очередей 248 | await queue.join() 249 | await retry_queue.join() 250 | 251 | # Отменяем оставшиеся задачи 252 | for w in workers: 253 | w.cancel() 254 | 255 | # Ждем отмены всех задач 256 | await asyncio.gather(*workers, return_exceptions=True) 257 | 258 | except TimeoutError: 259 | for w in workers: 260 | w.cancel() 261 | await asyncio.gather(*workers, return_exceptions=True) 262 | raise HTTPException(status_code=504, detail="Request timeout") 263 | 264 | if not all_embeddings: 265 | raise HTTPException(status_code=500, detail="No embeddings were generated") 266 | 267 | final = OpenAICreateEmbeddingResponse( 268 | data=all_embeddings, 269 | model=_construct_model_name(yc_text_embedding_requests[0], last_response), 270 | object="list", 271 | usage={ 272 | "prompt_tokens": all_token_counts, 273 | "total_tokens": all_token_counts 274 | } 275 | ) 276 | 277 | await _log_success_on_embeddings(final, folder_id, yc_text_embedding_requests, last_response) 278 | return final 279 | 280 | async def _log_success_on_embeddings(final: OpenAICreateEmbeddingResponse, folder_id: str, yc_text_embedding_requests: list[YaTextEmbeddingRequest], last_response: YaTextEmbeddingResponse): 281 | yc_text_embedding_request = yc_text_embedding_requests[0] 282 | 283 | logger.success("Генерация текста в Foundational Models завершена", 284 | extra={ 285 | "folder_id": folder_id, 286 | "modelUri": yc_text_embedding_request.modelUri, 287 | "modelPrefix": yc_text_embedding_request.modelUri.split(":")[0], 288 | "model": yc_text_embedding_request.modelUri.split("/")[-2:], 289 | "total_texts": len(yc_text_embedding_requests), 290 | "total_tokens": final.usage.total_tokens, 291 | "model_version": last_response.modelVersion, 292 | "openai_model": final.model, 293 | }) 294 | 295 | def _get_embeddings_model_uri(model: str, folder_id: str) -> str: 296 | """ 297 | 1. map model to yc model 298 | 2. check clf mode, raise clf 299 | 3. construct yc model uri 300 | """ 301 | logger.debug(f"Model: {model}, folder_id: {folder_id}") 302 | 303 | # map model to yc model 304 | if model in embeddings_model_map: 305 | model = embeddings_model_map[model] 306 | 307 | if model.startswith(("emb://")): 308 | model_uri = model 309 | else: 310 | model_uri = f"emb://{folder_id}/{model}" 311 | 312 | logger.debug(f"Model URI: {model_uri}") 313 | return model_uri -------------------------------------------------------------------------------- /app/yandex/models.py: -------------------------------------------------------------------------------- 1 | from typing import List, Optional, Union, Dict, Any, Literal 2 | from pydantic import BaseModel, Field, model_validator 3 | from enum import Enum 4 | 5 | # Request Models 6 | class CompletionOptions(BaseModel): 7 | stream: Optional[bool] 8 | temperature: Optional[float] = Field(default=0.3, ge=0, le=1.0) 9 | maxTokens: Optional[str] 10 | 11 | class FunctionCall(BaseModel): 12 | name: str 13 | arguments: Dict[str, Any] 14 | 15 | class ToolCall(BaseModel): 16 | functionCall: FunctionCall 17 | 18 | class ToolCallList(BaseModel): 19 | toolCalls: List[ToolCall] 20 | 21 | class FunctionResult(BaseModel): 22 | name: str 23 | content: str 24 | 25 | class ToolResult(BaseModel): 26 | functionResult: FunctionResult 27 | 28 | class ToolResultList(BaseModel): 29 | toolResults: List[ToolResult] 30 | 31 | class Message(BaseModel): 32 | role: Literal['system', 'assistant', 'user'] 33 | text: Optional[str] = None 34 | toolCallList: Optional[ToolCallList] = None 35 | toolResultList: Optional[ToolResultList] = None 36 | 37 | @model_validator(mode='after') 38 | def check_only_one_field(cls, values): 39 | fields = ['text', 'toolCallList', 'toolResultList'] 40 | filled_fields = [field for field in fields if getattr(values, field) is not None] 41 | if len(filled_fields) != 1: 42 | raise ValueError("Only one of 'text', 'toolCallList', or 'toolResultList' must be provided.") 43 | return values 44 | 45 | class FunctionTool(BaseModel): 46 | name: str 47 | description: str = Field(default="") 48 | parameters: Dict[str, Any] = Field(default_factory=dict) 49 | 50 | @model_validator(mode='after') 51 | def set_empty_defaults(cls, values): 52 | if values.description is None: 53 | values.description = "" 54 | if values.parameters is None: 55 | values.parameters = {} 56 | return values 57 | 58 | class Tool(BaseModel): 59 | function: FunctionTool 60 | 61 | class CompletionRequest(BaseModel): 62 | modelUri: str 63 | completionOptions: CompletionOptions 64 | messages: List[Message] 65 | tools: Optional[List[Tool]] = None 66 | 67 | # Response Models 68 | class AlternativeStatus(str, Enum): 69 | UNSPECIFIED = "ALTERNATIVE_STATUS_UNSPECIFIED" 70 | PARTIAL = "ALTERNATIVE_STATUS_PARTIAL" 71 | TRUNCATED_FINAL = "ALTERNATIVE_STATUS_TRUNCATED_FINAL" 72 | FINAL = "ALTERNATIVE_STATUS_FINAL" 73 | CONTENT_FILTER = "ALTERNATIVE_STATUS_CONTENT_FILTER" 74 | TOOL_CALLS = "ALTERNATIVE_STATUS_TOOL_CALLS" 75 | 76 | class Alternative(BaseModel): 77 | message: Message 78 | status: AlternativeStatus 79 | 80 | class ContentUsage(BaseModel): 81 | inputTextTokens: str 82 | completionTokens: str 83 | totalTokens: str 84 | 85 | class CompletionResponse(BaseModel): 86 | alternatives: List[Alternative] 87 | usage: ContentUsage 88 | modelVersion: str 89 | 90 | class AdapterCompletionRequest(BaseModel): 91 | yaCompletionRequest: CompletionRequest 92 | folderId: str 93 | apiKey: str 94 | id: str 95 | 96 | # Ошибка для не поддерживаемых параметров в формате OpenAI 97 | class _UnsupportedParameterError(Exception): 98 | """Exception raised for unsupported parameters in OpenAI format.""" 99 | def __init__(self, parameter: str): 100 | self.parameter = parameter 101 | self.message = f"Unsupported parameter: {parameter}" 102 | super().__init__(self.message) 103 | 104 | # Embedding Models 105 | class TextEmbeddingRequest(BaseModel): 106 | modelUri: str 107 | text: str 108 | 109 | class TextEmbeddingResponse(BaseModel): 110 | embedding: List[float] 111 | numTokens: str 112 | modelVersion: str 113 | 114 | # Tuned Classification 115 | class TunedTextClassificationRequest(BaseModel): 116 | modelUri: str 117 | text: str 118 | 119 | class TunedClassificationLabel(BaseModel): 120 | label: str 121 | confidence: str 122 | 123 | class TunedTextClassificationResponse(BaseModel): 124 | predictions: List[TunedClassificationLabel] 125 | modelVersion: str 126 | 127 | # Few Shot Classification 128 | class ClassificationSample(BaseModel): 129 | text: str 130 | label: str 131 | 132 | class FewShotTextClassificationRequest(BaseModel): 133 | modelUri: str 134 | taskDescription: str 135 | labels: List[str] 136 | text: str 137 | samples: Optional[List[ClassificationSample]] = None 138 | 139 | class ClassificationLabel(BaseModel): 140 | label: str 141 | confidence: str 142 | 143 | class FewShotTextClassificationResponse(BaseModel): 144 | predictions: List[ClassificationLabel] 145 | modelVersion: str 146 | 147 | # Other 148 | class GetModelsResponse(BaseModel): 149 | models: List[str] 150 | mappedModels: Dict[str, str] 151 | 152 | YaCompletionRequestWithClassificatiors = Union[CompletionRequest, TunedTextClassificationRequest, FewShotTextClassificationRequest] -------------------------------------------------------------------------------- /app/yandex/yc_log_handler.py: -------------------------------------------------------------------------------- 1 | import logging 2 | from pythonjsonlogger import jsonlogger 3 | import dotenv 4 | import os 5 | import re 6 | 7 | dotenv.load_dotenv() 8 | 9 | YANDEX_API_KEY = os.getenv("YANDEX_API_KEY") 10 | 11 | def obfuscate_message(message: str): 12 | """Obfuscate sensitive information.""" 13 | result = re.sub(r'(Api-key|Api-Key|Bearer|OAuth|OPENAI_API_KEY:|Ключ:|ключ:) [A-Za-z0-9_\-@]+', "***API_KEY_OBFUSCATED***", message) 14 | return result 15 | 16 | class YcLoggingFormatter(jsonlogger.JsonFormatter): 17 | def add_fields(self, log_record, record, message_dict): 18 | record.message = obfuscate_message(record.getMessage()) 19 | 20 | super(YcLoggingFormatter, self).add_fields(log_record, record, message_dict) 21 | log_record['logger'] = record.name 22 | log_level = record.levelname 23 | if log_level == "WARNING": 24 | log_level = "WARN" 25 | elif log_level == "CRITICAL": 26 | log_level = "FATAL" 27 | elif log_level == "SUCCESS": 28 | log_level = "INFO" 29 | log_record['level'] = log_level 30 | 31 | ycLogHandler = logging.StreamHandler() 32 | ycLogHandler.setFormatter(YcLoggingFormatter('%(message)s %(level)s %(logger)s')) 33 | -------------------------------------------------------------------------------- /docker-compose.yml: -------------------------------------------------------------------------------- 1 | version: '3.8' 2 | 3 | services: 4 | openai_yandexgpt_adapter: 5 | build: 6 | context: . 7 | dockerfile: Dockerfile 8 | container_name: openai-yandexgpt-adapter 9 | ports: 10 | - "127.0.0.1:9041:9041" 11 | restart: unless-stopped 12 | healthcheck: 13 | test: ["CMD-SHELL", "curl -f http://localhost:9041/health || exit 1"] 14 | interval: 30s 15 | timeout: 10s 16 | retries: 3 17 | start_period: 5s 18 | environment: 19 | LOG_TYPE: volume 20 | LOG_LEVEL: DEBUG 21 | volumes: 22 | - type: bind 23 | source: ./logs 24 | target: /app/logs 25 | networks: 26 | - adapter_network 27 | logging: 28 | driver: "json-file" 29 | options: 30 | max-size: "10m" 31 | max-file: "3" 32 | 33 | networks: 34 | adapter_network: 35 | driver: bridge 36 | 37 | -------------------------------------------------------------------------------- /etc/promo.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 8 | 9 | 10 | Yandex CloudAI StudioOpenAI2YandextranslatorLangChain,LlamaIndexFlowisen8nLangflowOther usefulinstruments forDevsReady-to-go productsOpenAI SDKPlugins topopular products -------------------------------------------------------------------------------- /examples/example.js: -------------------------------------------------------------------------------- 1 | const OpenAI = require('openai'); 2 | 3 | const FOLDER_ID = ""; 4 | const YANDEX_API_KEY = ""; 5 | const key = `${FOLDER_ID}@${YANDEX_API_KEY}`; 6 | 7 | const proxyUrl = "https://o2y.ai-cookbook.ru"; 8 | 9 | // создайте клиент OpenAI с измененным base_url 10 | const openai = new OpenAI({ 11 | apiKey: key, 12 | baseURL: `${proxyUrl}/v1/`, 13 | }); 14 | 15 | async function generateTextOai(systemPrompt, userPrompt, maxTokens = 2000, temperature = 0.1, model = "yandexgpt/latest") { 16 | const response = await openai.chat.completions.create({ 17 | messages: [ 18 | { 19 | role: "system", 20 | content: systemPrompt, 21 | }, 22 | { 23 | role: "user", 24 | content: userPrompt, 25 | }, 26 | ], 27 | model: model, 28 | //max_tokens: maxTokens, 29 | //temperature: temperature, 30 | }); 31 | 32 | const generatedText = response.choices[0].message.content; 33 | return generatedText; 34 | } 35 | 36 | async function getEmbedding(text, model = "text-search-doc/latest") { 37 | const response = await openai.embeddings.create({ 38 | input: [text], 39 | model: model, 40 | }); 41 | return response.data[0].embedding; // Возвращаем эмбеддинг 42 | } 43 | 44 | async function getEmbeddingSyncBatch(texts, model = "text-search-doc/latest") { 45 | const response = await openai.embeddings.create({ 46 | input: texts, 47 | model: model, 48 | }); 49 | return response.data; // Возвращаем эмбеддинг 50 | } 51 | 52 | 53 | async function main() { 54 | // Поддерживаемые форматы моделей 55 | const model = 'yandexgpt/latest'; 56 | // или `gpt://${FOLDER_ID}/yandexgpt/latest` 57 | // или `ds://${MODEL_ID}` 58 | // Для эмбеддингов 'text-search-doc/latest' 59 | // или `emb://${FOLDER_ID}/text-search-doc/latest` 60 | // или `ds://${MODEL_ID}` 61 | 62 | const generatedText = await generateTextOai("You are a helpful assistant.", "What is the meaning of life? Answer in one word."); 63 | console.log(generatedText); 64 | 65 | const embedding = await getEmbedding("Hello Yandex!"); 66 | console.log(embedding.slice(0, 3), '...'); 67 | } 68 | 69 | main(); 70 | -------------------------------------------------------------------------------- /examples/example.py: -------------------------------------------------------------------------------- 1 | import openai 2 | 3 | # укажите кредиты Yandex CLoud 4 | FOLDER_ID = "" 5 | YANDEX_API_KEY = "" 6 | key = f"{FOLDER_ID}@{YANDEX_API_KEY}" 7 | 8 | # задайте адрес вашей прокси 9 | proxy_url = "https://o2y.ai-cookbook.ru" 10 | 11 | # создайте клиент OpenAI с измененным base_url 12 | oai = openai.Client(api_key=key, base_url=f"{proxy_url}/v1/") 13 | 14 | def generate_text_oai(system_prompt, user_prompt, max_tokens=2000, temperature=0.1, model=f"yandexgpt/latest"): 15 | response = oai.chat.completions.create( 16 | messages=[ 17 | { 18 | "role": "system", 19 | "content": system_prompt, 20 | }, 21 | { 22 | "role": "user", 23 | "content": user_prompt, 24 | } 25 | ], 26 | model=model, 27 | #max_tokens=max_tokens, 28 | #temperature=0.1, 29 | ) 30 | 31 | generated_text = response.choices[0].message.content 32 | return generated_text 33 | 34 | def get_embedding(text, model=f"text-search-doc/latest"): 35 | return oai.embeddings.create(input = [text], model=model).data[0].embedding 36 | 37 | def get_embedding_sync_batch(texts, model=f"text-search-doc/latest"): 38 | return oai.embeddings.create(input = texts, model=model).data 39 | 40 | if __name__ == "__main__": 41 | # Поддерживаемые форматы моделей 42 | model = 'yandexgpt/latest' 43 | # или f'gpt://{FOLDER_ID}/yandexgpt/latest' 44 | # или f'ds://{MODEL_ID}' 45 | # Для эмбеддингов 'text-search-doc/latest' 46 | # или 'emb://{FOLDER_ID}/text-search-doc/latest' 47 | # или 'ds://{MODEL_ID}' 48 | 49 | print(generate_text_oai("You are a helpful assistant.", "What is the meaning of life? Answer in one word.")) 50 | print(get_embedding("Hello Yandex!")[:3], '...') -------------------------------------------------------------------------------- /examples/langchain-example.py: -------------------------------------------------------------------------------- 1 | # pip install langcain langchain-openai 2 | 3 | from langchain_openai import ChatOpenAI 4 | from dotenv import load_dotenv 5 | import os 6 | 7 | load_dotenv() 8 | 9 | FOLDER_ID = os.getenv("FOLDER_ID") 10 | YANDEX_API_KEY = os.getenv("YANDEX_API_KEY") 11 | PROXY_URL = "https://o2y.ai-cookbook.ru" 12 | 13 | base_url = f"{PROXY_URL}/v1" 14 | api_key = f"{FOLDER_ID}@{YANDEX_API_KEY}" 15 | 16 | llm = ChatOpenAI( 17 | api_key=api_key, 18 | base_url=base_url, 19 | model="yandexgpt/latest", 20 | ) 21 | print(llm.invoke("Hello, world!")) -------------------------------------------------------------------------------- /main.py: -------------------------------------------------------------------------------- 1 | from fastapi import FastAPI 2 | from app.index import index 3 | import os 4 | from dotenv import load_dotenv 5 | 6 | load_dotenv() 7 | 8 | GITHUB_SHA = os.getenv("GITHUB_SHA", "unknown_version") 9 | GITHUB_REF = os.getenv("GITHUB_REF", "unknown_branch") 10 | 11 | app = FastAPI( 12 | title="OpenAI SDK Adapter", 13 | description="Adapter from OpenAI SDK to Yandex Cloud FoMo API, [full docs here!](https://ai-cookbook.ru/docs/adapter/)", 14 | version=f"{GITHUB_SHA=} - {GITHUB_REF=}" 15 | ) 16 | 17 | app.include_router(index) 18 | 19 | if __name__ == "__main__": 20 | import uvicorn 21 | 22 | uvicorn.run("main:app", host="0.0.0.0", port=9041, reload=True) 23 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | annotated-types==0.7.0 2 | anyio==4.7.0 3 | certifi==2024.12.14 4 | click==8.1.7 5 | distro==1.9.0 6 | fastapi==0.115.6 7 | gunicorn==23.0.0 8 | h11==0.14.0 9 | httpcore==1.0.7 10 | httpx==0.28.1 11 | idna==3.10 12 | iniconfig==2.0.0 13 | jiter==0.8.2 14 | loguru==0.7.3 15 | openai==1.57.4 16 | packaging==24.2 17 | pluggy==1.5.0 18 | pydantic==2.10.3 19 | pydantic_core==2.27.1 20 | pytest==8.3.4 21 | python-dotenv==1.0.1 22 | python-json-logger==3.2.1 23 | python-multipart==0.0.19 24 | sniffio==1.3.1 25 | starlette==0.41.3 26 | tenacity==9.0.0 27 | tqdm==4.67.1 28 | typeguard==4.4.1 29 | typing_extensions==4.12.2 30 | uvicorn==0.34.0 31 | -------------------------------------------------------------------------------- /start_app.sh: -------------------------------------------------------------------------------- 1 | # start_app.sh 2 | nohup gunicorn main:app --workers 1 --worker-class uvicorn.workers.UvicornWorker --bind 0.0.0.0:9041 & 3 | -------------------------------------------------------------------------------- /tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ai-cookbook/openai-yandexgpt-adapter/975fd7af26df7250d497b3f6a7865ead894fd12f/tests/__init__.py -------------------------------------------------------------------------------- /tests/test_e2e.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | import openai 3 | import time, json, os 4 | from dotenv import load_dotenv 5 | from loguru import logger 6 | 7 | logger.add("logs/test.log") 8 | 9 | load_dotenv('.testenv') 10 | 11 | FOLDER_ID = os.getenv("FOLDER_ID", "") 12 | API_KEY = os.getenv("YANDEX_API_KEY", "") 13 | #PROXY_URL = "https://d5det46m4e43042pnnfj.apigw.yandexcloud.net" 14 | PROXY_URL = "http://localhost:9041" 15 | #PROXY_URL = "https://bbafv6hdkrihhcvh9u78.containers.yandexcloud.net" 16 | 17 | system_prompt = "Answer with only one word to my question" 18 | user_prompt = "What is the meaning of life?" 19 | emb_prompt = "Hello Yandex!" 20 | ds_model_id = "bt120qtlha5a2aisl2ih" 21 | 22 | # Configure the OpenAI client to use the proxy server 23 | oai = openai.Client(api_key=f"{FOLDER_ID}@{API_KEY}", base_url=f"{PROXY_URL}/v1/") 24 | 25 | @pytest.mark.parametrize("system_prompt, user_prompt, model", [ 26 | (system_prompt, user_prompt, "gpt-4o"), 27 | (system_prompt, user_prompt, "gpt-4o-mini"), 28 | (system_prompt, user_prompt, "yandexgpt/latest"), 29 | (system_prompt, user_prompt, "yandexgpt-lite/latest"), 30 | (system_prompt, user_prompt, f"gpt://{FOLDER_ID}/yandexgpt/latest"), 31 | (system_prompt, user_prompt, f"gpt://{FOLDER_ID}/yandexgpt-lite/latest"), 32 | #(system_prompt, user_prompt, f"ds://{ds_model_id}"), 33 | ]) 34 | def test_completion_with_alternative_model(system_prompt, user_prompt, model): 35 | time.sleep(0.25) 36 | retries = 3 37 | 38 | for _ in range(retries): # Попробуем выполнить запрос до 3 раз 39 | response = oai.chat.completions.create( 40 | messages=[ 41 | { 42 | "role": "system", 43 | "content": system_prompt, 44 | }, 45 | { 46 | "role": "user", 47 | "content": user_prompt, 48 | } 49 | ], 50 | model=model, 51 | ) 52 | 53 | if response and hasattr(response, 'choices') and response.choices: 54 | content = response.choices[0].message.content 55 | if content is not None and content != "" and isinstance(content, str): 56 | break # Успешный ответ, выходим из цикла 57 | assert content is not None and content != "" and isinstance(content, str) 58 | 59 | @pytest.mark.parametrize("model", [ 60 | "gpt-4o", 61 | "gpt-4o-mini", 62 | "yandexgpt/latest", 63 | "yandexgpt-lite/latest", 64 | f"gpt://{FOLDER_ID}/yandexgpt/latest", 65 | f"gpt://{FOLDER_ID}/yandexgpt-lite/latest", 66 | #f"ds://{ds_model_id}", 67 | ]) 68 | def test_streaming_completion(model): 69 | time.sleep(0.5) # Allow some time for the server to be ready 70 | 71 | response = oai.chat.completions.create( 72 | messages=[ 73 | {"role": "system", "content": system_prompt}, 74 | {"role": "user", "content": user_prompt}, 75 | ], 76 | model=model, 77 | stream=True, 78 | ) 79 | 80 | collected_chunks = [] 81 | collected_messages = [] 82 | 83 | for chunk in response: 84 | collected_chunks.append(chunk) # save the event response 85 | chunk_message = chunk.choices[0].delta.content # extract the message 86 | collected_messages.append(chunk_message) 87 | 88 | collected_messages = [m for m in collected_messages if m is not None] 89 | full_reply_content = ''.join(collected_messages) 90 | assert full_reply_content is not None and full_reply_content != "" and isinstance(full_reply_content, str) 91 | 92 | @pytest.mark.parametrize("text, model", [ 93 | (emb_prompt, "text-search-doc/latest"), 94 | (emb_prompt, "text-search-query/latest"), 95 | (emb_prompt, "text-embedding-3-large"), 96 | (emb_prompt, "text-embedding-3-small"), 97 | (emb_prompt, f"emb://{FOLDER_ID}/text-search-doc/latest"), 98 | (emb_prompt, f"emb://{FOLDER_ID}/text-search-query/latest"), 99 | ]) 100 | def test_embeddings_with_alternative_model(text, model): 101 | response = oai.embeddings.create(input = [text], model=model) 102 | 103 | vector = response.data[0].embedding 104 | assert len(vector) > 0 and isinstance(vector, list) 105 | assert isinstance(vector[0], float) 106 | 107 | @pytest.mark.parametrize("text, model", [ 108 | (emb_prompt, "text-search-doc/latest"), 109 | (emb_prompt, "text-search-query/latest") 110 | ]) 111 | def test_embeddings_batch_with_alternative_model(text, model): 112 | n = 33 113 | retries = 2 114 | for attempt in range(retries): 115 | response = oai.embeddings.create(input=[text] * n, model=model) 116 | if response and hasattr(response, 'data') and len(response.data) == n: 117 | break 118 | else: 119 | pytest.fail("Не удалось получить корректный ответ после нескольких попыток.") 120 | 121 | vector = response.data[0].embedding 122 | assert len(vector) > 0 and isinstance(vector, list) 123 | assert isinstance(vector[0], float) 124 | 125 | 126 | def test_completion_with_invalid_model(): 127 | try: 128 | response = oai.chat.completions.create( 129 | model="invalid-model", 130 | messages=[ 131 | {"role": "system", "content": system_prompt}, 132 | {"role": "user", "content": user_prompt} 133 | ] 134 | ) 135 | except Exception as e: 136 | assert e.status_code == 404 137 | assert isinstance(e, openai.APIStatusError) 138 | 139 | 140 | def test_completion_with_unvalid_parameters(): 141 | try: 142 | response = oai.chat.completions.create( 143 | model=1000, 144 | messages=[ 145 | {"role": "system", "content": system_prompt}, 146 | {"role": "user", "content": user_prompt} 147 | ] 148 | ) 149 | except Exception as e: 150 | assert e.status_code == 500 151 | #assert isinstance(e, openai.APIStatusError) 152 | 153 | 154 | def test_completion_with_invalid_parameters(): 155 | try: 156 | response = oai.chat.completions.create( 157 | model="yandexgpt/latest", 158 | messages=[ 159 | {"role": "system", "content": system_prompt}, 160 | {"role": "user", "content": user_prompt} 161 | ], 162 | temperature=2, 163 | max_tokens=10_000, 164 | ) 165 | except Exception as e: 166 | assert e.status_code == 422 167 | assert isinstance(e, openai.APIStatusError) 168 | 169 | 170 | def test_completion_with_additional_parameters(): 171 | 172 | response = oai.chat.completions.create( 173 | model="yandexgpt/latest", 174 | messages=[ 175 | {"role": "system", "content": system_prompt}, 176 | {"role": "user", "content": user_prompt} 177 | ], 178 | presence_penalty=2, 179 | seed=42, 180 | ) 181 | 182 | assert not (hasattr(response, 'error') and response.error) 183 | 184 | def test_completion_with_empty_message(): 185 | try: 186 | response = oai.chat.completions.create( 187 | model="yandexgpt/latest", 188 | messages=[ 189 | {"role": "system", "content": system_prompt}, 190 | {"role": "user", "content": ""} # Пустое сообщение 191 | ] 192 | ) 193 | raise 194 | except Exception as e: 195 | assert e.status_code == 400 # Ожидаем ошибку из-за пустого сообщения 196 | 197 | 198 | def test_completion_with_long_message(): 199 | long_message = "Привет " * 100000 # Очень длинное сообщение 200 | try: 201 | response = oai.chat.completions.create( 202 | model="yandexgpt/latest", 203 | messages=[ 204 | {"role": "system", "content": system_prompt}, 205 | {"role": "user", "content": long_message} 206 | ] 207 | ) 208 | raise 209 | except Exception as e: 210 | assert e.status_code == 400 # Ожидаем ошибку из-за слишком длинного сообщения 211 | 212 | 213 | def test_completion_with_correct_parameters(): 214 | response = oai.chat.completions.create( 215 | model="yandexgpt/latest", 216 | messages=[ 217 | {"role": "system", "content": system_prompt}, 218 | {"role": "user", "content": user_prompt} 219 | ], 220 | temperature=0.7, 221 | max_tokens=100, 222 | ) 223 | assert response and hasattr(response, 'choices') and response.choices 224 | content = response.choices[0].message.content 225 | assert content is not None and content != "" and isinstance(content, str) 226 | 227 | def test_completion_with_tools(): 228 | tools = [ 229 | { 230 | "type": "function", 231 | "function": { 232 | "name": "get_weather", 233 | "description": "Получить текущую погоду в указанном городе", 234 | "parameters": { 235 | "type": "object", 236 | "properties": { 237 | "city": { 238 | "type": "string", 239 | "description": "Название города" 240 | } 241 | }, 242 | "required": ["city"] 243 | } 244 | } 245 | } 246 | ] 247 | 248 | response = oai.chat.completions.create( 249 | model="yandexgpt/latest", 250 | messages=[ 251 | {"role": "system", "content": "Вы - помощник, который может узнавать погоду"}, 252 | {"role": "user", "content": "Какая погода в Москве?"} 253 | ], 254 | tools=tools, 255 | temperature=0, 256 | #tool_choice="auto" 257 | ) 258 | 259 | assert response and hasattr(response, 'choices') and response.choices 260 | choice = response.choices[0] 261 | 262 | # Проверяем, что модель запросила использование функции 263 | assert choice.message.tool_calls is not None 264 | assert len(choice.message.tool_calls) > 0 265 | 266 | tool_call = choice.message.tool_calls[0] 267 | assert tool_call.function.name == "get_weather" 268 | 269 | # Проверяем параметры вызова функции 270 | function_args = json.loads(tool_call.function.arguments) 271 | assert "city" in function_args 272 | assert function_args["city"] == "Москва" 273 | 274 | 275 | 276 | def test_completion_with_correct_parameters_with_max_temp(): 277 | response = oai.chat.completions.create( 278 | model="yandexgpt/latest", 279 | messages=[ 280 | {"role": "system", "content": system_prompt}, 281 | {"role": "user", "content": user_prompt} 282 | ], 283 | temperature=1, 284 | max_tokens=100, 285 | ) 286 | assert response and hasattr(response, 'choices') and response.choices 287 | content = response.choices[0].message.content 288 | assert content is not None and content != "" and isinstance(content, str) 289 | 290 | 291 | 292 | @pytest.mark.skip(reason="no way of currently testing this") 293 | def test_embeddings_with_invalid_parameters(): 294 | 295 | response = oai.embeddings.create(input = [emb_prompt], model='invalid-model') 296 | 297 | assert hasattr(response, 'error') and response.error 298 | 299 | 300 | def test_streaming_completion_with_tools_e2e(): 301 | tools = [ 302 | { 303 | "type": "function", 304 | "function": { 305 | "name": "get_weather", 306 | "description": "Получить текущую погоду в указанном городе", 307 | "parameters": { 308 | "type": "object", 309 | "properties": { 310 | "city": { 311 | "type": "string", 312 | "description": "Название города" 313 | } 314 | }, 315 | "required": ["city"] 316 | } 317 | } 318 | } 319 | ] 320 | 321 | response: list[openai.ChatCompletionChunk] = oai.chat.completions.create( 322 | model="yandexgpt/latest", 323 | messages=[ 324 | {"role": "system", "content": "Вы - помощник, который может узнавать погоду"}, 325 | {"role": "user", "content": "Какая погода в Москве?"} 326 | ], 327 | tools=tools, 328 | temperature=0, 329 | stream=True 330 | ) 331 | 332 | collected_chunks = [] 333 | collected_tool_calls = [] 334 | 335 | # Собираем все чанки 336 | for chunk in response: 337 | logger.info(chunk) 338 | collected_chunks.append(chunk) 339 | if chunk.choices[0]: 340 | if chunk.choices[0].delta: 341 | if chunk.choices[0].delta.tool_calls: 342 | collected_tool_calls.extend(chunk.choices[0].delta.tool_calls) 343 | else: 344 | if chunk.choices[0].message and chunk.choices[0].message.get('tool_calls'): 345 | collected_tool_calls.extend(chunk.choices[0].message.get('tool_calls')) 346 | 347 | 348 | # Проверяем что получили только один чанк 349 | assert len(collected_chunks) == 1, "Должен быть получен только один чанк с tool_calls" 350 | 351 | # Проверяем что finish_reason именно tool_calls 352 | assert collected_chunks[0].choices[0].finish_reason == "tool_calls" 353 | 354 | # Проверяем что получили ровно один tool_call 355 | assert len(collected_tool_calls) == 1, "Должен быть получен ровно один tool_call" 356 | 357 | # Проверяем содержимое tool_call 358 | tool_call = collected_tool_calls[0] 359 | assert tool_call.get('function').get('name') == "get_weather" 360 | 361 | # Проверяем параметры вызова функции 362 | function_args = json.loads(tool_call.get('function').get('arguments')) 363 | assert "city" in function_args 364 | assert function_args["city"].lower() == "москва" 365 | 366 | # Проверяем что content пустой, так как это tool_call 367 | assert collected_chunks[0].choices[0].delta is None 368 | 369 | 370 | def test_completion_with_multiple_tool_calls_and_responses(): 371 | tools = [ 372 | { 373 | "type": "function", 374 | "function": { 375 | "name": "get_weather", 376 | "description": "Получить текущую погоду в указанном городе", 377 | "parameters": { 378 | "type": "object", 379 | "properties": { 380 | "city": { 381 | "type": "string", 382 | "description": "Название города" 383 | } 384 | }, 385 | "required": ["city"] 386 | } 387 | } 388 | }, 389 | { 390 | "type": "function", 391 | "function": { 392 | "name": "get_time", 393 | "description": "Получить текущее время в указанном городе", 394 | "parameters": { 395 | "type": "object", 396 | "properties": { 397 | "city": { 398 | "type": "string", 399 | "description": "Название города" 400 | } 401 | }, 402 | "required": ["city"] 403 | } 404 | } 405 | } 406 | ] 407 | 408 | response = oai.chat.completions.create( 409 | model="yandexgpt/latest", 410 | messages=[ 411 | {"role": "system", "content": "Вы - помощник, который может узнавать погоду и время"}, 412 | {"role": "user", "content": "Какая погода и время в Москве?"}, 413 | { 414 | "role": "assistant", 415 | "content": None, 416 | "tool_calls": [ 417 | { 418 | "id": "call_1", 419 | "type": "function", 420 | "function": { 421 | "name": "get_weather", 422 | "arguments": '{"city": "Москва"}' 423 | } 424 | }, 425 | { 426 | "id": "call_2", 427 | "type": "function", 428 | "function": { 429 | "name": "get_time", 430 | "arguments": '{"city": "Москва"}' 431 | } 432 | } 433 | ] 434 | }, 435 | { 436 | "role": "tool", 437 | "content": "Температура 20°C, солнечно", 438 | "tool_call_id": "call_1" 439 | }, 440 | { 441 | "role": "tool", 442 | "content": "Текущее время: 14:00", 443 | "tool_call_id": "call_2" 444 | } 445 | ], 446 | tools=tools, 447 | temperature=0 448 | ) 449 | 450 | assert response and hasattr(response, 'choices') and response.choices 451 | content = response.choices[0].message.content 452 | assert content is not None and content != "" and isinstance(content, str) 453 | assert "20" in content and "14" in content 454 | 455 | def test_completion_with_multiple_tool_calls_and_responses_tools_are_the_same(): 456 | tools = [ 457 | { 458 | "type": "function", 459 | "function": { 460 | "name": "get_weather", 461 | "description": "Получить текущую погоду в указанном городе", 462 | "parameters": { 463 | "type": "object", 464 | "properties": { 465 | "city": { 466 | "type": "string", 467 | "description": "Название города" 468 | } 469 | }, 470 | "required": ["city"] 471 | } 472 | } 473 | }, 474 | { 475 | "type": "function", 476 | "function": { 477 | "name": "get_time", 478 | "description": "Получить текущее время в указанном городе", 479 | "parameters": { 480 | "type": "object", 481 | "properties": { 482 | "city": { 483 | "type": "string", 484 | "description": "Название города" 485 | } 486 | }, 487 | "required": ["city"] 488 | } 489 | } 490 | } 491 | ] 492 | 493 | response = oai.chat.completions.create( 494 | model="yandexgpt/latest", 495 | messages=[ 496 | {"role": "system", "content": "Вы - помощник, который может узнавать погоду и время"}, 497 | {"role": "user", "content": "Какая погода в Москве?"}, 498 | { 499 | "role": "assistant", 500 | "content": None, 501 | "tool_calls": [ 502 | { 503 | "id": "call_1", 504 | "type": "function", 505 | "function": { 506 | "name": "get_weather", 507 | "arguments": '{"city": "Москва"}' 508 | } 509 | }, 510 | { 511 | "id": "call_2", 512 | "type": "function", 513 | "function": { 514 | "name": "get_weather", 515 | "arguments": '{"city": "Москва"}' 516 | } 517 | } 518 | ] 519 | }, 520 | { 521 | "role": "tool", 522 | "content": "Температура 20°C, солнечно", 523 | "tool_call_id": "call_1" 524 | }, 525 | { 526 | "role": "tool", 527 | "content": "Температура 22°C, солнечно", 528 | "tool_call_id": "call_2" 529 | } 530 | ], 531 | tools=tools, 532 | temperature=0 533 | ) 534 | 535 | assert response and hasattr(response, 'choices') and response.choices 536 | content = response.choices[0].message.content 537 | assert content is not None and content != "" and isinstance(content, str) 538 | assert "20" in content or "22" in content 539 | 540 | def test_streaming_completion_with_tools_e2e_hard(): 541 | tools = [ 542 | { 543 | "type": "function", 544 | "function": { 545 | "name": "get_weather", 546 | "description": "Получить текущую погоду в указанном городе", 547 | "parameters": { 548 | "type": "object", 549 | "properties": { 550 | "city": { 551 | "type": "string", 552 | "description": "Название города" 553 | } 554 | }, 555 | "required": ["city"] 556 | } 557 | } 558 | }, 559 | { 560 | "type": "function", 561 | "function": { 562 | "name": "get_time", 563 | "description": "Получить текущее время в указанном городе", 564 | "parameters": { 565 | "type": "object", 566 | "properties": { 567 | "city": { 568 | "type": "string", 569 | "description": "Название города" 570 | } 571 | }, 572 | "required": ["city"] 573 | } 574 | } 575 | } 576 | ] 577 | 578 | response = oai.chat.completions.create( 579 | model="yandexgpt/latest", 580 | messages=[ 581 | {"role": "system", "content": "Вы - помощник, который может узнавать погоду и время"}, 582 | {"role": "user", "content": "Какая погода в Москве?"}, 583 | { 584 | "role": "assistant", 585 | "content": None, 586 | "tool_calls": [ 587 | { 588 | "id": "call_1", 589 | "type": "function", 590 | "function": { 591 | "name": "get_weather", 592 | "arguments": '{"city": "Москва"}' 593 | } 594 | }, 595 | { 596 | "id": "call_2", 597 | "type": "function", 598 | "function": { 599 | "name": "get_time", 600 | "arguments": '{"city": "Москва"}' 601 | } 602 | } 603 | ] 604 | }, 605 | { 606 | "role": "tool", 607 | "content": "Температура 20°C, солнечно", 608 | "tool_call_id": "call_1" 609 | }, 610 | { 611 | "role": "tool", 612 | "content": "Текущее время: 14:00", 613 | "tool_call_id": "call_2" 614 | } 615 | ], 616 | tools=tools, 617 | temperature=0, 618 | stream=True 619 | ) 620 | 621 | collected_chunks = [] 622 | collected_messages = [] 623 | 624 | for chunk in response: 625 | collected_chunks.append(chunk) # save the event response 626 | chunk_message = chunk.choices[0].delta.content # extract the message 627 | collected_messages.append(chunk_message) 628 | 629 | collected_messages = [m for m in collected_messages if m is not None] 630 | full_reply_content = ''.join(collected_messages) 631 | assert full_reply_content is not None and full_reply_content != "" and isinstance(full_reply_content, str) 632 | 633 | 634 | @pytest.mark.parametrize("model", [ 635 | "yandexgpt/latest", 636 | ]) 637 | def test_completion_with_tool_missing_description(model): 638 | """ 639 | Тестирует поведение API, когда инструмент имеет функцию без описания. 640 | """ 641 | tools = [ 642 | { 643 | "type": "function", 644 | "function": { 645 | "name": "get_weather", 646 | # "description" is intentionally omitted 647 | "parameters": { 648 | "type": "object", 649 | "properties": { 650 | "city": { 651 | "type": "string", 652 | "description": "Название города" 653 | } 654 | }, 655 | "required": ["city"] 656 | } 657 | } 658 | } 659 | ] 660 | 661 | try: 662 | response = oai.chat.completions.create( 663 | model=model, 664 | messages=[ 665 | {"role": "system", "content": "Вы - помощник, который может узнавать погоду"}, 666 | {"role": "user", "content": "Какая погода в Москве?"} 667 | ], 668 | tools=tools, 669 | temperature=0 670 | ) 671 | # Assert that the response does not contain errors related to missing description 672 | assert response and not hasattr(response, 'error'), "Response should not have an error despite missing description." 673 | except Exception as e: 674 | pytest.fail(f"API вызвал исключение при отсутствии описания функции: {e}") 675 | 676 | 677 | @pytest.mark.parametrize("model", [ 678 | "yandexgpt/latest", 679 | ]) 680 | def test_completion_with_tool_missing_parameters(model): 681 | """ 682 | Тестирует поведение API, когда инструмент имеет функцию без параметров. 683 | """ 684 | tools = [ 685 | { 686 | "type": "function", 687 | "function": { 688 | "name": "get_time", 689 | "description": "Получить текущее время в указанном городе", 690 | # "parameters" is intentionally omitted 691 | } 692 | } 693 | ] 694 | 695 | try: 696 | response = oai.chat.completions.create( 697 | model=model, 698 | messages=[ 699 | {"role": "system", "content": "Вы - помощник, который может узнавать время"}, 700 | {"role": "user", "content": "Который час в Москве?"} 701 | ], 702 | tools=tools, 703 | temperature=0 704 | ) 705 | # Assert that the response does not contain errors related to missing parameters 706 | assert response and not hasattr(response, 'error'), "Response should not have an error despite missing parameters." 707 | except Exception as e: 708 | pytest.fail(f"API вызвал исключение при отсутствии параметров функции: {e}") 709 | 710 | -------------------------------------------------------------------------------- /tests/test_fastapi.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | from fastapi.testclient import TestClient 3 | from app.index import index 4 | 5 | client = TestClient(index) 6 | 7 | def test_health_check(): 8 | response = client.get("/health") 9 | assert response.status_code == 200 10 | assert response.json() == {"status": "healthy"} 11 | 12 | def test_readiness_probe(): 13 | response = client.get("/readyz") 14 | assert response.status_code == 200 15 | assert response.json() == {"status": "ready"} 16 | 17 | def test_liveness_probe(): 18 | response = client.get("/livez") 19 | assert response.status_code == 200 20 | assert response.json() == {"status": "alive"} 21 | 22 | def test_root(): 23 | response = client.get("/") 24 | assert response.status_code == 200 25 | assert response.json() == {"status": "Hello from Foundational Models Team! check .../docs for more info"} 26 | 27 | @pytest.mark.skip(reason="idk") 28 | def test_badge(): 29 | response = client.get("/badge") 30 | assert response.status_code == 200 31 | assert response.headers["location"].startswith("https://img.shields.io/badge/status-healthy-green") 32 | 33 | @pytest.mark.skip(reason="idk") 34 | def test_badge_sha(): 35 | response = client.get("/badge-sha") 36 | assert response.status_code == 200 37 | assert "sha-" in response.headers["location"] 38 | 39 | @pytest.mark.skip(reason="idk") 40 | def test_badge_ref(): 41 | response = client.get("/badge-ref") 42 | assert response.status_code == 200 43 | assert "ref-" in response.headers["location"] 44 | 45 | @pytest.mark.skip(reason="idk") 46 | def test_non_existent_endpoint(): 47 | response = client.get("/non-existent-endpoint") 48 | assert response.status_code == 405 49 | assert response.json() == {"detail": "Method Not Allowed"} 50 | 51 | 52 | -------------------------------------------------------------------------------- /tests/test_langchain.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | from langchain import LLMChain 3 | from langchain.prompts import PromptTemplate 4 | from langchain_core.tools import tool 5 | from langchain_core.messages import AIMessage, HumanMessage, SystemMessage, ToolMessage 6 | from langchain_openai import ChatOpenAI 7 | from dotenv import load_dotenv 8 | import os 9 | 10 | @pytest.fixture 11 | def llm_setup(): 12 | load_dotenv('.testenv') 13 | 14 | FOLDER = os.getenv("FOLDER_ID") 15 | API_KEY = os.getenv("YANDEX_API_KEY") 16 | 17 | OPENAI_API_KEY = f"{FOLDER}@{API_KEY}" 18 | base_url = f"http://localhost:9041/v1/" 19 | 20 | llm = ChatOpenAI( 21 | api_key=OPENAI_API_KEY, 22 | base_url=base_url, 23 | model="yandexgpt/latest", 24 | temperature=0 25 | ) 26 | return llm 27 | 28 | @tool 29 | def add(a: int, b: int) -> int: 30 | """Adds a and b.""" 31 | return a + b 32 | 33 | @tool 34 | def multiply(a: int, b: int) -> int: 35 | """Multiplies a and b.""" 36 | return a * b 37 | 38 | def test_simple_addition(llm_setup): 39 | messages = [HumanMessage("Сколько будет 2 + 2?")] 40 | ai_msg = llm_setup.invoke(messages) 41 | assert isinstance(ai_msg.content, str) 42 | assert "4" in ai_msg.content 43 | 44 | def test_calculation_flow(llm_setup): 45 | tools = [add, multiply] 46 | llm_with_tools = llm_setup.bind_tools(tools) 47 | messages = [] 48 | 49 | # Первый запрос: 3 * 12 50 | messages.append(HumanMessage("What is 3 * 12? Use tools!")) 51 | ai_msg = llm_with_tools.invoke(messages) 52 | messages.append(ai_msg) 53 | 54 | for tool_call in ai_msg.tool_calls: 55 | selected_tool = {"add": add, "multiply": multiply}[tool_call["name"].lower()] 56 | tool_msg = selected_tool.invoke(tool_call) 57 | messages.append(tool_msg) 58 | 59 | messages.append(llm_with_tools.invoke(messages)) 60 | 61 | # Второй запрос: 11 + 49 62 | messages.append(HumanMessage("What is 11 + 49? Use tools!")) 63 | ai_msg = llm_with_tools.invoke(messages) 64 | messages.append(ai_msg) 65 | 66 | for tool_call in ai_msg.tool_calls: 67 | selected_tool = {"add": add, "multiply": multiply}[tool_call["name"].lower()] 68 | tool_msg = selected_tool.invoke(tool_call) 69 | messages.append(tool_msg) 70 | 71 | messages.append(llm_with_tools.invoke(messages)) 72 | 73 | # Сложение результатов 74 | messages.append(HumanMessage("Теперь сложи результаты!")) 75 | ai_msg = llm_with_tools.invoke(messages) 76 | messages.append(ai_msg) 77 | 78 | for tool_call in ai_msg.tool_calls: 79 | selected_tool = {"add": add, "multiply": multiply}[tool_call["name"].lower()] 80 | tool_msg = selected_tool.invoke(tool_call) 81 | messages.append(tool_msg) 82 | 83 | final_message = llm_with_tools.invoke(messages) 84 | messages.append(final_message) 85 | 86 | assert "96" in final_message.content -------------------------------------------------------------------------------- /tests/test_unit.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | from app.yandex.completions import _adapt_messages 3 | from app.yandex.models import Message as YaChatCompletionMessage 4 | from openai.types.chat.chat_completion_tool_message_param import ChatCompletionToolMessageParam 5 | from openai.types.chat.chat_completion_user_message_param import ChatCompletionUserMessageParam 6 | from openai.types.chat.chat_completion_system_message_param import ChatCompletionSystemMessageParam 7 | from openai.types.chat.chat_completion_function_message_param import ChatCompletionFunctionMessageParam 8 | from openai.types.chat.chat_completion_assistant_message_param import ChatCompletionAssistantMessageParam 9 | from fastapi import HTTPException 10 | from app.yandex.completions import _get_completions_model_uri 11 | 12 | def test_adapt_messages(): 13 | # Создаем фиктивные данные для теста 14 | messages = [ 15 | ChatCompletionSystemMessageParam(role='system', content='You are a helpful assistant.'), 16 | ChatCompletionUserMessageParam(role='user', content='Hello!'), 17 | ChatCompletionAssistantMessageParam(role='assistant', content='Hi there!'), 18 | ChatCompletionAssistantMessageParam(role='assistant', tool_calls=[ 19 | {'id': '123', 'function': {'name': 'test_function', 'arguments': '{"arg1": "value1"}'}} 20 | ]), 21 | ChatCompletionToolMessageParam(role='tool', tool_call_id='123', content='Tool result') 22 | ] 23 | 24 | # Ожидаемый результат 25 | expected_result = [ 26 | YaChatCompletionMessage(**{"role": "system", "text": "You are a helpful assistant."}), 27 | YaChatCompletionMessage(**{"role": "user", "text": "Hello!"}), 28 | YaChatCompletionMessage(**{"role": "assistant", "text": "Hi there!"}), 29 | YaChatCompletionMessage(**{ 30 | "role": "assistant", 31 | "toolCallList": { 32 | "toolCalls": [ 33 | {"functionCall": {"name": "test_function", "arguments": {"arg1": "value1"}}} 34 | ] 35 | } 36 | }), 37 | YaChatCompletionMessage(**{ 38 | "role": "assistant", 39 | "toolResultList": { 40 | "toolResults": [ 41 | {"functionResult": {"name": "test_function", "content": "Tool result"}} 42 | ] 43 | } 44 | }), 45 | ] 46 | 47 | # Запускаем тестируемую функцию 48 | result = _adapt_messages(messages) 49 | 50 | # Проверяем, что результат соответствует ожиданиям 51 | assert result == expected_result 52 | 53 | def test_adapt_messages_no_tools(): 54 | # Тест без вызова инструментов 55 | messages = [ 56 | ChatCompletionSystemMessageParam(role='system', content='You are a helpful assistant.'), 57 | ChatCompletionUserMessageParam(role='user', content='Hello!'), 58 | ChatCompletionAssistantMessageParam(role='assistant', content='Hi there!'), 59 | ] 60 | 61 | expected_result = [ 62 | YaChatCompletionMessage(**{"role": "system", "text": "You are a helpful assistant."}), 63 | YaChatCompletionMessage(**{"role": "user", "text": "Hello!"}), 64 | YaChatCompletionMessage(**{"role": "assistant", "text": "Hi there!"}), 65 | ] 66 | 67 | result = _adapt_messages(messages) 68 | assert result == expected_result 69 | 70 | def test_adapt_messages_sequential_tool_calls(): 71 | # Тест с последовательным вызовом инструментов 72 | messages = [ 73 | ChatCompletionSystemMessageParam(role='system', content='You are a helpful assistant.'), 74 | ChatCompletionUserMessageParam(role='user', content='Hello!'), 75 | ChatCompletionAssistantMessageParam(role='assistant', content='Hi there!'), 76 | ChatCompletionAssistantMessageParam(role='assistant', tool_calls=[ 77 | {'id': '123', 'function': {'name': 'test_function', 'arguments': '{"arg1": "value1"}'}} 78 | ]), 79 | ChatCompletionToolMessageParam(role='tool', tool_call_id='123', content='Tool result'), 80 | ChatCompletionAssistantMessageParam(role='assistant', tool_calls=[ 81 | {'id': '333', 'function': {'name': 'test_function', 'arguments': '{"arg1": "value2"}'}} 82 | ]), 83 | ChatCompletionToolMessageParam(role='tool', tool_call_id='333', content='Tool result 2') 84 | ] 85 | 86 | # Ожидаемый результат 87 | expected_result = [ 88 | YaChatCompletionMessage(**{"role": "system", "text": "You are a helpful assistant."}), 89 | YaChatCompletionMessage(**{"role": "user", "text": "Hello!"}), 90 | YaChatCompletionMessage(**{"role": "assistant", "text": "Hi there!"}), 91 | YaChatCompletionMessage(**{ 92 | "role": "assistant", 93 | "toolCallList": { 94 | "toolCalls": [ 95 | {"functionCall": {"name": "test_function", "arguments": {"arg1": "value1"}}} 96 | ] 97 | } 98 | }), 99 | YaChatCompletionMessage(**{ 100 | "role": "assistant", 101 | "toolResultList": { 102 | "toolResults": [ 103 | {"functionResult": {"name": "test_function", "content": "Tool result"}} 104 | ] 105 | } 106 | }), 107 | YaChatCompletionMessage(**{ 108 | "role": "assistant", 109 | "toolCallList": { 110 | "toolCalls": [ 111 | {"functionCall": {"name": "test_function", "arguments": {"arg1": "value2"}}} 112 | ] 113 | } 114 | }), 115 | YaChatCompletionMessage(**{ 116 | "role": "assistant", 117 | "toolResultList": { 118 | "toolResults": [ 119 | {"functionResult": {"name": "test_function", "content": "Tool result 2"}} 120 | ] 121 | } 122 | }), 123 | ] 124 | 125 | # Запускаем тестируемую функцию 126 | result = _adapt_messages(messages) 127 | 128 | # Проверяем, что результат соответствует ожиданиям 129 | assert result == expected_result 130 | 131 | def test_adapt_messages_sequential_tool_calls_diff_names(): 132 | # Тест с последовательным вызовом инструментов 133 | messages = [ 134 | ChatCompletionSystemMessageParam(role='system', content='You are a helpful assistant.'), 135 | ChatCompletionUserMessageParam(role='user', content='Hello!'), 136 | ChatCompletionAssistantMessageParam(role='assistant', content='Hi there!'), 137 | ChatCompletionAssistantMessageParam(role='assistant', tool_calls=[ 138 | {'id': '123', 'function': {'name': 'test_function', 'arguments': '{"arg1": "value1"}'}} 139 | ]), 140 | ChatCompletionToolMessageParam(role='tool', tool_call_id='123', content='Tool result'), 141 | ChatCompletionAssistantMessageParam(role='assistant', tool_calls=[ 142 | {'id': '333', 'function': {'name': 'TEST_function', 'arguments': '{"arg1": "value2"}'}} 143 | ]), 144 | ChatCompletionToolMessageParam(role='tool', tool_call_id='333', content='Tool result 2') 145 | ] 146 | 147 | # Ожидаемый результат 148 | expected_result = [ 149 | YaChatCompletionMessage(**{"role": "system", "text": "You are a helpful assistant."}), 150 | YaChatCompletionMessage(**{"role": "user", "text": "Hello!"}), 151 | YaChatCompletionMessage(**{"role": "assistant", "text": "Hi there!"}), 152 | YaChatCompletionMessage(**{ 153 | "role": "assistant", 154 | "toolCallList": { 155 | "toolCalls": [ 156 | {"functionCall": {"name": "test_function", "arguments": {"arg1": "value1"}}} 157 | ] 158 | } 159 | }), 160 | YaChatCompletionMessage(**{ 161 | "role": "assistant", 162 | "toolResultList": { 163 | "toolResults": [ 164 | {"functionResult": {"name": "test_function", "content": "Tool result"}} 165 | ] 166 | } 167 | }), 168 | YaChatCompletionMessage(**{ 169 | "role": "assistant", 170 | "toolCallList": { 171 | "toolCalls": [ 172 | {"functionCall": {"name": "TEST_function", "arguments": {"arg1": "value2"}}} 173 | ] 174 | } 175 | }), 176 | YaChatCompletionMessage(**{ 177 | "role": "assistant", 178 | "toolResultList": { 179 | "toolResults": [ 180 | {"functionResult": {"name": "TEST_function", "content": "Tool result 2"}} 181 | ] 182 | } 183 | }), 184 | ] 185 | 186 | # Запускаем тестируемую функцию 187 | result = _adapt_messages(messages) 188 | 189 | # Проверяем, что результат соответствует ожиданиям 190 | assert result == expected_result 191 | 192 | def test_adapt_messages_parallel_tool_calls(): 193 | # Тест с параллельным вызовом инструментов 194 | messages = [ 195 | ChatCompletionSystemMessageParam(role='system', content='You are a helpful assistant.'), 196 | ChatCompletionUserMessageParam(role='user', content='Hello!'), 197 | ChatCompletionAssistantMessageParam(role='assistant', content='Hi there!'), 198 | ChatCompletionAssistantMessageParam(role='assistant', tool_calls=[ 199 | {'id': '123', 'function': {'name': 'test_function', 'arguments': '{"arg1": "value1"}'}}, 200 | {'id': '111', 'function': {'name': 'test_function22', 'arguments': '{"arg1": "value3"}'}} 201 | ]), 202 | ChatCompletionToolMessageParam(role='tool', tool_call_id='123', content='Tool result'), 203 | ChatCompletionToolMessageParam(role='tool', tool_call_id='111', content='Tool result 3'), 204 | ChatCompletionAssistantMessageParam(role='assistant', tool_calls=[ 205 | {'id': '333', 'function': {'name': 'TEST_function', 'arguments': '{"arg1": "value2"}'}} 206 | ]), 207 | ChatCompletionToolMessageParam(role='tool', tool_call_id='333', content='Tool result 2') 208 | ] 209 | 210 | # Ожидаемый результат 211 | expected_result = [ 212 | YaChatCompletionMessage(**{"role": "system", "text": "You are a helpful assistant."}), 213 | YaChatCompletionMessage(**{"role": "user", "text": "Hello!"}), 214 | YaChatCompletionMessage(**{"role": "assistant", "text": "Hi there!"}), 215 | YaChatCompletionMessage(**{ 216 | "role": "assistant", 217 | "toolCallList": { 218 | "toolCalls": [ 219 | {"functionCall": {"name": "test_function", "arguments": {"arg1": "value1"}}}, 220 | {"functionCall": {"name": "test_function22", "arguments": {"arg1": "value3"}}} 221 | ] 222 | } 223 | }), 224 | YaChatCompletionMessage(**{ 225 | "role": "assistant", 226 | "toolResultList": { 227 | "toolResults": [ 228 | {"functionResult": {"name": "test_function", "content": "Tool result"}}, 229 | {"functionResult": {"name": "test_function22", "content": "Tool result 3"}} 230 | ] 231 | } 232 | }), 233 | YaChatCompletionMessage(**{ 234 | "role": "assistant", 235 | "toolCallList": { 236 | "toolCalls": [ 237 | {"functionCall": {"name": "TEST_function", "arguments": {"arg1": "value2"}}} 238 | ] 239 | } 240 | }), 241 | YaChatCompletionMessage(**{ 242 | "role": "assistant", 243 | "toolResultList": { 244 | "toolResults": [ 245 | {"functionResult": {"name": "TEST_function", "content": "Tool result 2"}} 246 | ] 247 | } 248 | }), 249 | ] 250 | 251 | # Запускаем тестируемую функцию 252 | result = _adapt_messages(messages) 253 | 254 | # Проверяем, что результат соответствует ожиданиям 255 | assert result == expected_result 256 | 257 | 258 | 259 | @pytest.mark.parametrize("model, folder_id, expected_uri", [ 260 | ("yandexgpt/latest", "folder123", "gpt://folder123/yandexgpt/latest"), 261 | ("gpt-4o", "folder123", "gpt://folder123/yandexgpt/latest"), 262 | ("gpt-3.5", "folder456", "gpt://folder456/yandexgpt/latest"), 263 | ("gpt://folder789/custom_model/latest", "folder789", "gpt://folder789/custom_model/latest"), 264 | ("ds://folder000/custom_model/rc", "folder000", "ds://folder000/custom_model/rc"), 265 | ]) 266 | def test_get_completions_model_uri(model, folder_id, expected_uri): 267 | assert _get_completions_model_uri(model, folder_id) == expected_uri 268 | 269 | -------------------------------------------------------------------------------- /vercel.json: -------------------------------------------------------------------------------- 1 | { 2 | "version": 2, 3 | "builds": [ 4 | { "src": "main.py", "use": "@vercel/python" } 5 | ], 6 | "routes": [ 7 | { "src": "/(.*)", "dest": "/main.py" } 8 | ], 9 | "env": { 10 | "APP_MODULE": "main:app" 11 | } 12 | } --------------------------------------------------------------------------------