├── .github ├── ISSUE_TEMPLATE │ ├── bug_report.yaml │ ├── documentation_improvement.yaml │ ├── enhancement.yaml │ └── feature_request.yaml ├── mergify.yml └── workflows │ ├── Nightly_CI_main.yaml │ ├── build_dev_python_package.yaml │ ├── publish_dev_package.yaml │ ├── publish_release_image.yaml │ ├── publish_release_package.yaml │ ├── pylint.yaml │ └── unit_test_main.yaml ├── .gitignore ├── LICENSE ├── MANIFEST.in ├── Makefile ├── OWNERS ├── README.md ├── cache_config_template.yml ├── codecov.yml ├── docs ├── .readthedocs.yaml ├── GPT-Cache-Multinode.png ├── GPTCache-Distributed-Search.png ├── GPTCache-Local-Search.png ├── GPTCache.png ├── GPTCacheStructure.png ├── Makefile ├── _exts │ ├── docgen2.py │ └── index_con.py ├── _templates │ ├── author.html │ ├── copyright.html │ ├── function.rst │ └── index.rst ├── bootcamp │ ├── assets │ │ ├── image_generation_gradio.png │ │ ├── speech_to_text_gradio.png │ │ └── vqa.png │ ├── langchain │ │ ├── baby_agi.ipynb │ │ ├── index.rst │ │ ├── qa_generation.ipynb │ │ ├── question_answering.ipynb │ │ └── sqlite.ipynb │ ├── llama_index │ │ ├── index.rst │ │ └── webpage_qa.ipynb │ ├── openai │ │ ├── chat.ipynb │ │ ├── image_generation.ipynb │ │ ├── index.rst │ │ ├── language_translate.ipynb │ │ ├── speech_to_text.ipynb │ │ ├── sql_translate.ipynb │ │ └── tweet_classifier.ipynb │ ├── replicate │ │ ├── index.rst │ │ └── visual_question_answering.ipynb │ ├── streamlit │ │ ├── gptcache-streamlit-audio │ │ │ ├── .streamlit │ │ │ │ └── config.toml │ │ │ ├── README.md │ │ │ ├── audio.py │ │ │ ├── example.png │ │ │ ├── local │ │ │ │ └── .cache │ │ │ └── requirements.txt │ │ └── gptcache-streamlit-image │ │ │ ├── README.md │ │ │ ├── example.png │ │ │ ├── imagen.py │ │ │ ├── local │ │ │ └── .cache │ │ │ └── requirements.txt │ ├── temperature │ │ ├── chat.ipynb │ │ ├── create_image.ipynb │ │ └── index.rst │ └── vertex │ │ ├── index.rst │ │ └── vertexai_caching.ipynb ├── conf.py ├── configure_it.md ├── contributing.md ├── feature.md ├── gptcache_live.pdf ├── horizontal-scaling-usage.md ├── index.rst ├── make.bat ├── references │ └── index.rst ├── release_note.md ├── requirements.txt ├── toc.bak └── usage.md ├── examples ├── README.md ├── adapter │ ├── api.py │ ├── langchain_llms.py │ └── openai_chatgpt.py ├── benchmark │ ├── benchmark_sqlite_faiss_onnx.py │ ├── mock_data.json │ ├── similiar_qqp.json.gz │ └── similiar_qqp_full.json.gz ├── context_process │ ├── selective_context.py │ └── summarization_context.py ├── data_manager │ ├── map_manager.py │ ├── scalar_store.py │ └── vector_store.py ├── embedding │ ├── default.py │ ├── onnx.py │ ├── paddlenlp.py │ └── random.py ├── eviction │ └── distributed_eviction.py ├── integrate │ ├── diffusers │ │ └── stable_diffusion.py │ ├── dolly │ │ └── basic_usage.py │ ├── langchain │ │ ├── langchain_llms_mock.py │ │ ├── langchain_prompt_openai.py │ │ ├── langchain_qa_chain.py │ │ └── langchain_similaritycache_openai.py │ ├── llama_cpp │ │ └── basic_usage.py │ ├── openai │ │ ├── basic_usage.py │ │ ├── create_image.py │ │ ├── qa.py │ │ ├── readme.py │ │ └── summarize.py │ ├── replicate │ │ └── vqa.py │ └── stability │ │ └── text_to_image.py ├── processor │ └── temperature_example.py ├── session │ └── session.py ├── similarity_evaluation │ ├── exact_match.py │ ├── onnx.py │ ├── search_distance.py │ └── sequence_match.py └── vqa_demo.py ├── gptcache ├── __init__.py ├── adapter │ ├── __init__.py │ ├── adapter.py │ ├── api.py │ ├── base.py │ ├── diffusers.py │ ├── dolly.py │ ├── langchain_models.py │ ├── llama_cpp.py │ ├── minigpt4.py │ ├── openai.py │ ├── replicate.py │ └── stability_sdk.py ├── client.py ├── config.py ├── core.py ├── embedding │ ├── __init__.py │ ├── base.py │ ├── cohere.py │ ├── data2vec.py │ ├── fasttext.py │ ├── huggingface.py │ ├── langchain.py │ ├── onnx.py │ ├── openai.py │ ├── paddlenlp.py │ ├── rwkv.py │ ├── sbert.py │ ├── string.py │ ├── timm.py │ ├── uform.py │ └── vit.py ├── manager │ ├── __init__.py │ ├── data_manager.py │ ├── eviction │ │ ├── __init__.py │ │ ├── base.py │ │ ├── distributed_cache.py │ │ ├── manager.py │ │ ├── memory_cache.py │ │ └── redis_eviction.py │ ├── eviction_manager.py │ ├── factory.py │ ├── object_data │ │ ├── __init__.py │ │ ├── base.py │ │ ├── local_storage.py │ │ ├── manager.py │ │ └── s3_storage.py │ ├── scalar_data │ │ ├── __init__.py │ │ ├── base.py │ │ ├── dynamo_storage.py │ │ ├── manager.py │ │ ├── mongo.py │ │ ├── redis_storage.py │ │ └── sql_storage.py │ └── vector_data │ │ ├── __init__.py │ │ ├── base.py │ │ ├── chroma.py │ │ ├── docarray_index.py │ │ ├── faiss.py │ │ ├── hnswlib_store.py │ │ ├── manager.py │ │ ├── milvus.py │ │ ├── pgvector.py │ │ ├── qdrant.py │ │ ├── redis_vectorstore.py │ │ ├── usearch.py │ │ └── weaviate.py ├── processor │ ├── __init__.py │ ├── check_hit.py │ ├── context │ │ ├── __init__.py │ │ ├── concat_context.py │ │ ├── context.py │ │ ├── selective_context.py │ │ └── summarization_context.py │ ├── post.py │ └── pre.py ├── report.py ├── session.py ├── similarity_evaluation │ ├── __init__.py │ ├── cohere_rerank.py │ ├── distance.py │ ├── exact_match.py │ ├── kreciprocal.py │ ├── np.py │ ├── onnx.py │ ├── sbert_crossencoder.py │ ├── sequence_match.py │ ├── similarity_evaluation.py │ └── time.py └── utils │ ├── __init__.py │ ├── cache_func.py │ ├── dependency_control.py │ ├── error.py │ ├── lazy_import.py │ ├── log.py │ ├── response.py │ ├── softmax.py │ ├── time.py │ └── token.py ├── gptcache_server ├── __init__.py ├── dockerfiles │ └── Dockerfile └── server.py ├── pylint.conf ├── requirements.txt ├── scripts ├── manage_conda_env.sh └── remove_example_cache.sh ├── setup.py └── tests ├── integration_tests ├── base │ └── client_base.py ├── common │ ├── common_func.py │ └── common_type.py ├── config │ └── log_config.py ├── examples │ ├── map │ │ └── test_example_map.py │ ├── sqlite_faiss_mock │ │ └── test_example_sqlite_faiss.py │ └── sqlite_faiss_onnx │ │ └── test_example_sqlite_faiss_onnx.py ├── processor │ └── pre │ │ └── test_pre_without_prompt.py ├── test_redis_onnx.py ├── test_sqlite_faiss_onnx.py ├── test_sqlite_milvus_sbert.py └── utils │ └── util_log.py ├── pytest.ini ├── requirements.txt └── unit_tests ├── adapter ├── test_adapter.py ├── test_api.py ├── test_diffusers.py ├── test_dolly.py ├── test_langchain_models.py ├── test_llama_cpp.py ├── test_openai.py ├── test_replicate.py └── test_stability.py ├── embedding ├── test_cohere.py ├── test_data2vec.py ├── test_embedding_openai.py ├── test_embedding_string.py ├── test_fasttext.py ├── test_huggingface.py ├── test_langchain.py ├── test_onnx.py ├── test_paddlenlp.py ├── test_rwkv.py ├── test_sbert.py ├── test_timm.py ├── test_uform.py └── test_vit.py ├── eviction ├── test_distributed_cache.py └── test_memory_cache.py ├── manager ├── test_base.py ├── test_chromadb.py ├── test_dynamo_storage.py ├── test_eviction.py ├── test_factory.py ├── test_local_index.py ├── test_map.py ├── test_milvusdb.py ├── test_mongo.py ├── test_object_storage.py ├── test_pgvector.py ├── test_qdrant.py ├── test_redis.py ├── test_redis_cache_storage.py ├── test_sql_scalar.py ├── test_usearch.py └── test_weaviate.py ├── processor ├── test_concat_context.py ├── test_context.py ├── test_post.py ├── test_pre.py ├── test_selective_context.py └── test_summarize_context.py ├── similarity_evaluation ├── test_cohere_rerank.py ├── test_evaluation_kreciprocal.py ├── test_evaluation_onnx.py ├── test_evaluation_sbert.py ├── test_evaluation_sequence.py ├── test_evaluation_string.py ├── test_evalution_time.py ├── test_np.py └── test_simple.py ├── test_client.py ├── test_core.py ├── test_session.py └── utils ├── test_error.py ├── test_log.py └── test_response.py /.github/ISSUE_TEMPLATE/bug_report.yaml: -------------------------------------------------------------------------------- 1 | name: 🐞 Bug Report 2 | description: Create a bug report to help us improve GPTCache 3 | title: "[Bug]: " 4 | body: 5 | - type: markdown 6 | attributes: 7 | value: | 8 | Thanks for taking the time to fill out this bug report! Please fill the form in English! 9 | - type: textarea 10 | attributes: 11 | label: Current Behavior 12 | description: A concise description of what you're experiencing. 13 | placeholder: | 14 | When I do , happens and I see the error message attached below: 15 | ```...``` 16 | validations: 17 | required: true 18 | - type: textarea 19 | attributes: 20 | label: Expected Behavior 21 | description: A concise description of what you expected to happen. 22 | placeholder: When I do , should happen instead. 23 | validations: 24 | required: false 25 | - type: textarea 26 | attributes: 27 | label: Steps To Reproduce 28 | description: Steps to reproduce the behavior. 29 | placeholder: | 30 | 1. In this environment... 31 | 2. With this config... 32 | 3. Run '...' 33 | 4. See error... 34 | render: markdown 35 | validations: 36 | required: false 37 | - type: textarea 38 | attributes: 39 | label: Environment 40 | description: | 41 | Enter the Environment Details: 42 | render: markdown 43 | validations: 44 | required: false 45 | - type: textarea 46 | attributes: 47 | label: Anything else? 48 | description: | 49 | Links? References? Anything that will give us more context about the issue you are encountering! 50 | validations: 51 | required: false -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/documentation_improvement.yaml: -------------------------------------------------------------------------------- 1 | name: 📖 Documentation Improvement 2 | description: Suggest improvements to our documentation 3 | title: "[DOCS]: " 4 | labels: [Documentation] 5 | body: 6 | - type: markdown 7 | attributes: 8 | value: | 9 | Thanks for taking the time to fill out this documentation improvement request! 10 | - type: textarea 11 | attributes: 12 | label: Documentation Link 13 | description: Add a link to the page which needs improvement (if relevant) 14 | validations: 15 | required: false 16 | - type: textarea 17 | attributes: 18 | label: Describe the problem 19 | description: Is the documentation missing? Or is it confusing? Why is it confusing? 20 | validations: 21 | required: false 22 | - type: textarea 23 | attributes: 24 | label: Describe the improvement 25 | description: A clear and concise description of the improvement. 26 | validations: 27 | required: false 28 | - type: textarea 29 | attributes: 30 | label: Anything else? 31 | description: | 32 | Links? References? Anything that will give us more context about the issue you are encountering! 33 | validations: 34 | required: false -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/enhancement.yaml: -------------------------------------------------------------------------------- 1 | name: Enhancement Request 2 | description: As a developer, I want to make an enhancement for GPTCache 3 | title: "[Enhancement]:" 4 | labels: [kind/enhancement] 5 | body: 6 | - type: markdown 7 | attributes: 8 | value: | 9 | Thanks for taking the time to request/suggest an enhancement for GPTCache! Please fill the form in English! 10 | - type: textarea 11 | attributes: 12 | label: What would you like to be added? 13 | description: A concise description of what you're expecting/suggesting. 14 | placeholder: | 15 | I would like to suggest/request a feature that's like... 16 | validations: 17 | required: false 18 | - type: textarea 19 | attributes: 20 | label: Why is this needed? 21 | description: A concise description of the reason/motivation 22 | validations: 23 | required: false 24 | - type: textarea 25 | attributes: 26 | label: Anything else? 27 | description: | 28 | Links? References? Anything that will give us more context about this! 29 | validations: 30 | required: false -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/feature_request.yaml: -------------------------------------------------------------------------------- 1 | name: 🚀 Feature Request 2 | description: As a user, I want to request a feature for GPTCache 3 | title: "[Feature]:" 4 | labels: [kind/feature] 5 | body: 6 | - type: markdown 7 | attributes: 8 | value: | 9 | Thanks for taking the time to request a feature for GPTCache! Please fill the form in English! 10 | - type: textarea 11 | attributes: 12 | label: Is your feature request related to a problem? Please describe. 13 | description: A concise description of the problem you are facing or the motivetion behind this feature request. 14 | placeholder: | 15 | I faced a problem due to which ... 16 | validations: 17 | required: false 18 | - type: textarea 19 | attributes: 20 | label: Describe the solution you'd like. 21 | description: A concise description of the solution for the issue. 22 | validations: 23 | required: false 24 | - type: textarea 25 | attributes: 26 | label: Describe an alternate solution. 27 | description: Is there any other approack to solve the problem? 28 | validations: 29 | required: false 30 | - type: textarea 31 | attributes: 32 | label: Anything else? (Additional Context) 33 | description: | 34 | Links? References? Anything that will give us more context about this! 35 | validations: 36 | required: false -------------------------------------------------------------------------------- /.github/workflows/Nightly_CI_main.yaml: -------------------------------------------------------------------------------- 1 | name: Nightly CI 2 | 3 | on: 4 | schedule: 5 | # * is a special character in YAML so you have to quote this string 6 | # ┌───────────── minute (0 - 59) 7 | # │ ┌───────────── hour (0 - 23) 8 | # │ │ ┌───────────── day of the month (1 - 31) 9 | # │ │ │ ┌───────────── month (1 - 12 or JAN-DEC) 10 | # │ │ │ │ ┌───────────── day of the week (0 - 6 or SUN-SAT) 11 | # │ │ │ │ │ 12 | # │ │ │ │ │ 13 | # │ │ │ │ │ 14 | - cron: "0 22 * * *" 15 | pull_request: 16 | branches: 17 | - main 18 | - dev 19 | paths: 20 | - '.github/workflows/Nightly_CI_main.yaml' 21 | 22 | jobs: 23 | nightly-CI-gpt-cache-cli-main: 24 | runs-on: ubuntu-20.04 25 | strategy: 26 | fail-fast: false 27 | services: 28 | postgres: 29 | image: ankane/pgvector 30 | ports: 31 | - 5432:5432 32 | env: 33 | POSTGRES_PASSWORD: postgres 34 | options: --health-cmd pg_isready --health-interval 10s --health-timeout 5s --health-retries 5 35 | steps: 36 | - uses: actions/checkout@main 37 | 38 | - name: Set up Python 3.8 39 | uses: actions/setup-python@v4 40 | with: 41 | python-version: 3.8 42 | cache: pip 43 | 44 | - name: Install dependency 45 | shell: bash 46 | run: | 47 | pip install -r requirements.txt 48 | 49 | - name: Build and install 50 | shell: bash 51 | run: | 52 | python setup.py install 53 | 54 | - name: Install test dependency 55 | shell: bash 56 | working-directory: tests 57 | run: | 58 | pip install -r requirements.txt 59 | 60 | - name: Download the `en_core_web_sm` model 61 | shell: bash 62 | working-directory: tests 63 | run: | 64 | python3 -m spacy download en_core_web_sm 65 | 66 | - name: Nightly CI Tests 67 | timeout-minutes: 30 68 | shell: bash 69 | working-directory: tests 70 | run: | 71 | export IS_CI=true 72 | export PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION=python 73 | python3 -m pytest ./ --tags L2 74 | -------------------------------------------------------------------------------- /.github/workflows/build_dev_python_package.yaml: -------------------------------------------------------------------------------- 1 | name: Build dev package 2 | on: 3 | push: 4 | branches: 5 | - main 6 | - dev 7 | paths: 8 | - 'examples/**' 9 | - 'gptcache/**' 10 | - 'tests/**' 11 | - '!**.md' 12 | - '.github/workflows/**' 13 | pull_request: 14 | branches: 15 | - main 16 | - dev 17 | paths: 18 | - 'examples/**' 19 | - 'gptcache/**' 20 | - 'tests/**' 21 | - '!**.md' 22 | - '.github/workflows/**' 23 | workflow_dispatch: 24 | 25 | jobs: 26 | build-pypi: 27 | runs-on: ubuntu-20.04 28 | steps: 29 | - uses: actions/checkout@master 30 | - name: Setup Python 31 | uses: actions/setup-python@v4 32 | with: 33 | python-version: 3.8 34 | # - name: Get history and tags for SCM versioning 35 | # run: | 36 | # git fetch --prune --unshallow 37 | # git fetch --depth=1 origin +refs/tags/*:refs/tags/* 38 | - name: Install pypa/build 39 | run: >- 40 | python -m 41 | pip install 42 | build 43 | --user 44 | - name: Build a binary wheel and a source tarball 45 | run: >- 46 | python -m 47 | build 48 | --sdist 49 | --wheel 50 | --outdir dist/ 51 | . 52 | - name: Archive production artifacts 53 | uses: actions/upload-artifact@v3 54 | with: 55 | name: dist 56 | path: | 57 | dist 58 | !dist/**/*.md 59 | -------------------------------------------------------------------------------- /.github/workflows/publish_dev_package.yaml: -------------------------------------------------------------------------------- 1 | name: Publish package to TestPyPI 2 | # on: 3 | # push: 4 | # branches: 5 | # - 'main' 6 | # paths: 7 | # - 'gptcache/**' 8 | # - '!**.md' 9 | # - '.github/workflows/publish_dev_package.yaml' 10 | # pull_request: 11 | # branches: 12 | # - main 13 | # paths: 14 | # - '.github/workflows/publish_dev_package.yaml' 15 | on: 16 | workflow_dispatch: 17 | 18 | schedule: 19 | # * is a special character in YAML so you have to quote this string 20 | # ┌───────────── minute (0 - 59) 21 | # │ ┌───────────── hour (0 - 23) 22 | # │ │ ┌───────────── day of the month (1 - 31) 23 | # │ │ │ ┌───────────── month (1 - 12 or JAN-DEC) 24 | # │ │ │ │ ┌───────────── day of the week (0 - 6 or SUN-SAT) 25 | # │ │ │ │ │ 26 | # │ │ │ │ │ 27 | # │ │ │ │ │ 28 | - cron: '30 9 * * 1,4' 29 | 30 | 31 | jobs: 32 | build-and-publish-dev: 33 | runs-on: ubuntu-20.04 34 | steps: 35 | - uses: actions/checkout@master 36 | - name: Setup Python 37 | uses: actions/setup-python@v4 38 | with: 39 | python-version: 3.8 40 | # - name: Get history and tags for SCM versioning 41 | # run: | 42 | # git fetch --prune --unshallow 43 | # git fetch --depth=1 origin +refs/tags/*:refs/tags/* 44 | - name: Install pypi/build 45 | run: >- 46 | python -m 47 | pip install 48 | build 49 | --user 50 | - name: Build a binary wheel and a source tarball 51 | run: >- 52 | python -m 53 | build 54 | --sdist 55 | --wheel 56 | --outdir dist/ 57 | . 58 | - name: Publish distribution 📦 to Test PyPI 59 | uses: pypa/gh-action-pypi-publish@master 60 | with: 61 | password: ${{ secrets.PYPI_TEST_TOKEN }} 62 | repository_url: https://test.pypi.org/legacy/ 63 | -------------------------------------------------------------------------------- /.github/workflows/publish_release_image.yaml: -------------------------------------------------------------------------------- 1 | name: Publish image to dockerhub 2 | on: 3 | release: 4 | types: [published] 5 | workflow_dispatch: 6 | 7 | jobs: 8 | build-and-publish-image: 9 | runs-on: ubuntu-20.04 10 | steps: 11 | - name: Checkout 12 | uses: actions/checkout@master 13 | 14 | - name: Get version 15 | id: get_version 16 | run: echo ::set-output name=VERSION::${GITHUB_REF/refs\/tags\//} 17 | 18 | - name: Docker Build 19 | shell: bash 20 | run: | 21 | docker build "./gptcache_server/dockerfiles" -t "zilliz/gptcache:${{ steps.get_version.outputs.VERSION }}" 22 | 23 | - name: Docker login 24 | if: success() 25 | uses: azure/docker-login@v1 26 | with: 27 | username: ${{ secrets.ZILLIZ_DOCKERHUB_USERNAME }} 28 | password: ${{ secrets.ZILLIZ_DOCKERHUB_PASSWORD }} 29 | 30 | - name: Docker Push 31 | if: success() 32 | continue-on-error: false 33 | shell: bash 34 | run: | 35 | docker push zilliz/gptcache:${{ steps.get_version.outputs.VERSION }} 36 | docker tag zilliz/gptcache:${{ steps.get_version.outputs.VERSION }} zilliz/gptcache:latest 37 | docker push zilliz/gptcache:latest -------------------------------------------------------------------------------- /.github/workflows/publish_release_package.yaml: -------------------------------------------------------------------------------- 1 | name: Publish package to PyPI 2 | # on: 3 | # push: 4 | # branches: 5 | # - 'main' 6 | # paths: 7 | # - 'gptcache/**' 8 | # - '!**.md' 9 | # - '.github/workflows/publish_dev_package.yaml' 10 | # pull_request: 11 | # branches: 12 | # - main 13 | # paths: 14 | # - '.github/workflows/publish_dev_package.yaml' 15 | on: 16 | workflow_dispatch: 17 | 18 | release: 19 | types: [published] 20 | # tags: 21 | # description: 'Test scenario tags' 22 | # required: false 23 | # type: boolean 24 | 25 | jobs: 26 | build-and-publish-release: 27 | runs-on: ubuntu-20.04 28 | steps: 29 | - uses: actions/checkout@master 30 | - name: Setup Python 31 | uses: actions/setup-python@v4 32 | with: 33 | python-version: 3.8 34 | # - name: Get history and tags for SCM versioning 35 | # run: | 36 | # git fetch --prune --unshallow 37 | # git fetch --depth=1 origin +refs/tags/*:refs/tags/* 38 | - name: Install pypi/build 39 | run: >- 40 | python -m 41 | pip install 42 | build 43 | --user 44 | - name: Build a binary wheel and a source tarball 45 | run: >- 46 | python -m 47 | build 48 | --sdist 49 | --wheel 50 | --outdir dist/ 51 | . 52 | - name: Publish distribution 📦 to Test PyPI 53 | uses: pypa/gh-action-pypi-publish@master 54 | with: 55 | password: ${{ secrets.PYPI_TOKEN }} -------------------------------------------------------------------------------- /.github/workflows/pylint.yaml: -------------------------------------------------------------------------------- 1 | name: Pylint 2 | on: 3 | push: 4 | branches: 5 | - main 6 | - dev 7 | # file paths to consider in the event 8 | paths: 9 | - 'examples/**' 10 | - 'gptcache/**' 11 | - 'tests/**' 12 | - 'docs/**' 13 | - '!**.md' 14 | - '.github/workflows/**' 15 | pull_request: 16 | branches: 17 | - main 18 | - dev 19 | # file paths to consider in the event 20 | paths: 21 | - 'examples/**' 22 | - 'gptcache/**' 23 | - 'tests/**' 24 | - 'docs/**' 25 | - '!**.md' 26 | - '.github/workflows/**' 27 | workflow_dispatch: 28 | 29 | jobs: 30 | pylint: 31 | runs-on: ubuntu-20.04 32 | steps: 33 | - uses: actions/checkout@v3.0.0 34 | - name: Setup Python 35 | uses: actions/setup-python@v4 36 | with: 37 | python-version: 3.8 38 | - name: Python pylint 39 | run: | 40 | pip install pylint==2.10.2 41 | make pylint_check 42 | - name: Make the readthedoc html 43 | shell: bash 44 | working-directory: docs 45 | run: | 46 | pip install -r requirements.txt 47 | make html 48 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) Zilliz 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | include requirements.txt 2 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | install: 2 | @pip install -r requirements.txt 3 | @python setup.py install 4 | 5 | pip_upgrade: 6 | @python -m pip install --upgrade pip 7 | 8 | package: 9 | @python setup.py sdist bdist_wheel 10 | 11 | upload: 12 | @python -m twine upload dist/* 13 | 14 | upload_test: 15 | @python -m twine upload --repository-url https://test.pypi.org/legacy/ dist/* 16 | 17 | remove_example_cache: 18 | @bash ./scripts/remove_example_cache.sh 19 | 20 | create_conda_env: 21 | @bash ./scripts/manage_conda_env.sh create 22 | 23 | remove_conda_env: 24 | @bash ./scripts/manage_conda_env.sh remove 25 | 26 | pylint_check: 27 | pylint --rcfile=pylint.conf --output-format=colorized gptcache 28 | 29 | pytest: 30 | pytest tests/ -------------------------------------------------------------------------------- /OWNERS: -------------------------------------------------------------------------------- 1 | filters: 2 | ".*": 3 | reviewers: 4 | - SimFG 5 | - xiaofan-luan 6 | - cxie 7 | approvers: 8 | - SimFG 9 | - xiaofan-luan 10 | - cxie 11 | -------------------------------------------------------------------------------- /cache_config_template.yml: -------------------------------------------------------------------------------- 1 | # For `model_src`, `evaluation`, `post_function`, `pre_function`, 2 | # `storage_config` options, Check README for more. 3 | 4 | embedding: 5 | onnx 6 | embedding_config: 7 | # Set model kws here including `model`, `api_key` if needed 8 | storage_config: 9 | data_dir: 10 | gptcache_data 11 | manager: 12 | sqlite,faiss 13 | vector_params: 14 | # Set vector storage related params here 15 | evaluation: 16 | distance 17 | evaluation_config: 18 | # Set evaluation metric kws here 19 | pre_function: 20 | get_prompt 21 | post_function: 22 | first 23 | config: 24 | similarity_threshold: 0.8 25 | # Set other config here 26 | -------------------------------------------------------------------------------- /codecov.yml: -------------------------------------------------------------------------------- 1 | # Configuration File for CodeCov 2 | codecov: 3 | require_ci_to_pass: no 4 | notify: 5 | require_ci_to_pass: no 6 | wait_for_ci: false 7 | 8 | coverage: 9 | precision: 2 10 | round: down 11 | range: "70...100" 12 | 13 | status: 14 | project: 15 | default: 16 | target: 90% 17 | threshold: 0% #Allow the coverage to drop by threshold%, and posting a success status. 18 | patch: 19 | default: 20 | target: 90% #target of patch diff 21 | threshold: 0% 22 | if_ci_failed: error #success, failure, error, ignore 23 | 24 | comment: 25 | layout: "reach, diff, flags, files" 26 | behavior: default 27 | require_changes: false 28 | branches: # branch names that can post comment 29 | - main 30 | - dev 31 | 32 | ignore: 33 | - "LICENSES" 34 | - ".git" 35 | - "*.yml" 36 | - "*.md" 37 | - "**/minigpt4.py" 38 | -------------------------------------------------------------------------------- /docs/.readthedocs.yaml: -------------------------------------------------------------------------------- 1 | version: 2 2 | 3 | python: 4 | version: 3.8 5 | system_packages: true -------------------------------------------------------------------------------- /docs/GPT-Cache-Multinode.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zilliztech/GPTCache/48f8e768d7dcd7f66d948ad07914a630a382b45b/docs/GPT-Cache-Multinode.png -------------------------------------------------------------------------------- /docs/GPTCache-Distributed-Search.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zilliztech/GPTCache/48f8e768d7dcd7f66d948ad07914a630a382b45b/docs/GPTCache-Distributed-Search.png -------------------------------------------------------------------------------- /docs/GPTCache-Local-Search.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zilliztech/GPTCache/48f8e768d7dcd7f66d948ad07914a630a382b45b/docs/GPTCache-Local-Search.png -------------------------------------------------------------------------------- /docs/GPTCache.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zilliztech/GPTCache/48f8e768d7dcd7f66d948ad07914a630a382b45b/docs/GPTCache.png -------------------------------------------------------------------------------- /docs/GPTCacheStructure.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zilliztech/GPTCache/48f8e768d7dcd7f66d948ad07914a630a382b45b/docs/GPTCacheStructure.png -------------------------------------------------------------------------------- /docs/Makefile: -------------------------------------------------------------------------------- 1 | # Minimal makefile for Sphinx documentation 2 | # 3 | 4 | # You can set these variables from the command line, and also 5 | # from the environment for the first two. 6 | SPHINXOPTS ?= 7 | SPHINXBUILD ?= sphinx-build 8 | SPHINXAUTOBUILD ?= sphinx-autobuild 9 | SOURCEDIR = . 10 | BUILDDIR = _build 11 | 12 | # Put it first so that "make" without argument is like "make help". 13 | help: 14 | @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 15 | 16 | .PHONY: help Makefile 17 | 18 | # Catch-all target: route all unknown targets to Sphinx using the new 19 | # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). 20 | %: Makefile 21 | @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 22 | -------------------------------------------------------------------------------- /docs/_exts/index_con.py: -------------------------------------------------------------------------------- 1 | from m2r2 import convert 2 | import os 3 | 4 | class IndexCon: 5 | 6 | def __init__(self, source, output): 7 | self.source = source 8 | self.output = output 9 | self.preprocess() 10 | 11 | def preprocess(self): 12 | with open(self.source, 'r') as f: 13 | 14 | # remove the CI link from the file 15 | lines = f.readlines() 16 | lines = [line for line in lines if '[CI]' not in line] 17 | 18 | # change local links to the ones related to the _build/html directory and extension to .html 19 | lines = [line.replace('](docs/', '](') for line in lines] 20 | lines = [line.replace('.md)', '.html)') for line in lines] 21 | 22 | # get the raw text within the
tag 23 | start_details_tag = [line for line in lines if '
' in line] 24 | summary_tag = [line for line in lines if '' in line] 25 | end_details_tag = [line for line in lines if '
' in line] 26 | start_details = lines.index(start_details_tag[0]) 27 | summary_line = lines.index(summary_tag[0]) 28 | end_details = lines.index(end_details_tag[0]) 29 | 30 | before = convert(''.join(lines[:start_details-1])) 31 | end = convert(''.join(lines[end_details+1:])) 32 | 33 | collapse_rst = lines[summary_line+1:end_details] 34 | collapse_rst = [ "**" + x.split("# ")[1][:-1] + "**\n" if '# ' in x else x for x in collapse_rst] 35 | 36 | # print(collapse_rst) 37 | 38 | collapse_rst = convert(''.join(collapse_rst)) 39 | collapse_rst = collapse_rst.split("\n") 40 | collapse_rst = [ ' ' + x for x in collapse_rst] 41 | 42 | collapse_rst = [f'\n.. collapse:: Click to SHOW examples\n'] + collapse_rst 43 | os.remove(self.output) 44 | 45 | with open(self.output, 'a') as f: 46 | f.write(before) 47 | f.write('\n'.join(collapse_rst)) 48 | f.write(end) 49 | f.write('\n\n') 50 | 51 | with open('toc.bak', 'r') as t: 52 | f.write(t.read()) 53 | 54 | if __name__ == '__main__': 55 | index = IndexCon('../../README.md') 56 | 57 | -------------------------------------------------------------------------------- /docs/_templates/author.html: -------------------------------------------------------------------------------- 1 | By Zilliz Inc. -------------------------------------------------------------------------------- /docs/_templates/copyright.html: -------------------------------------------------------------------------------- 1 |

© Copyright 2023, Zilliz Inc.

-------------------------------------------------------------------------------- /docs/_templates/function.rst: -------------------------------------------------------------------------------- 1 | {{ module_name | cap }} 2 | {{ module_name | title_bar }} 3 | 4 | .. contents:: Index 5 | 6 | {% for func in funcs -%} 7 | {{func[0]}} 8 | {{ func[0] | section_bar }} 9 | .. automodule:: {{func[1]}} 10 | :members: 11 | :undoc-members: 12 | :show-inheritance: 13 | 14 | {% endfor %} -------------------------------------------------------------------------------- /docs/_templates/index.rst: -------------------------------------------------------------------------------- 1 | 🥸 API References 2 | ================= 3 | 4 | .. toctree:: 5 | :maxdepth: 1 6 | :caption: Contents: 7 | 8 | {% for module in modules %} 9 | {{ module }} 10 | {%- endfor -%} -------------------------------------------------------------------------------- /docs/bootcamp/assets/image_generation_gradio.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zilliztech/GPTCache/48f8e768d7dcd7f66d948ad07914a630a382b45b/docs/bootcamp/assets/image_generation_gradio.png -------------------------------------------------------------------------------- /docs/bootcamp/assets/speech_to_text_gradio.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zilliztech/GPTCache/48f8e768d7dcd7f66d948ad07914a630a382b45b/docs/bootcamp/assets/speech_to_text_gradio.png -------------------------------------------------------------------------------- /docs/bootcamp/assets/vqa.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zilliztech/GPTCache/48f8e768d7dcd7f66d948ad07914a630a382b45b/docs/bootcamp/assets/vqa.png -------------------------------------------------------------------------------- /docs/bootcamp/langchain/index.rst: -------------------------------------------------------------------------------- 1 | LangChain 2 | ================= 3 | 4 | .. toctree:: 5 | :maxdepth: 1 6 | :caption: Contents: 7 | 8 | 9 | qa_generation 10 | question_answering 11 | sqlite 12 | baby_agi -------------------------------------------------------------------------------- /docs/bootcamp/llama_index/index.rst: -------------------------------------------------------------------------------- 1 | Llama Index 2 | ================= 3 | 4 | .. toctree:: 5 | :maxdepth: 1 6 | :caption: Contents: 7 | 8 | 9 | webpage_qa -------------------------------------------------------------------------------- /docs/bootcamp/openai/index.rst: -------------------------------------------------------------------------------- 1 | OpenAI 2 | ================= 3 | 4 | .. toctree:: 5 | :maxdepth: 1 6 | :caption: Contents: 7 | 8 | 9 | chat 10 | image_generation 11 | sql_translate 12 | tweet_classifier 13 | image_generation 14 | speech_to_text 15 | -------------------------------------------------------------------------------- /docs/bootcamp/replicate/index.rst: -------------------------------------------------------------------------------- 1 | Replicate 2 | ================= 3 | 4 | .. toctree:: 5 | :maxdepth: 1 6 | :caption: Contents: 7 | 8 | 9 | visual_question_answering -------------------------------------------------------------------------------- /docs/bootcamp/streamlit/gptcache-streamlit-audio/.streamlit/config.toml: -------------------------------------------------------------------------------- 1 | [theme] 2 | base="dark" 3 | -------------------------------------------------------------------------------- /docs/bootcamp/streamlit/gptcache-streamlit-audio/README.md: -------------------------------------------------------------------------------- 1 | # GPTCache Demo for OpenAI Audio Transcription 2 | This project demonstrates how [GPTcache](https://github.com/zilliztech/GPTCache) can be used to save costs when using OpenAI’s audio transcription API. It provides a simple Streamlit app that allows users to input an audio file and see the corresponding transcribed text. The app uses a cache to store previously generated transcriptions and reuses them for the same audio file, thus avoiding making duplicate API calls. 3 | 4 | ## Requirements 5 | * Python 3.6 or later 6 | * Dependencies listed in requirements.txt 7 | * OpenAI API key 8 | ## Usage 9 | 1. Clone the repository to your local machine 10 | Install the required packages: pip install -r requirements.txt 11 | 2. Run the app: streamlit run audio.py 12 | 3. Open the app in your browser at http://localhost:8501 13 | 4. Enter your OpenAI API key and upload an audio file to transcribe, then click “generate” to wait for the transcribed text to appear. 14 | If a cache hit occurred, you should see a message like “cache” at the bottom of the transcribed text. 15 | 16 |

17 | example 18 |

19 | 20 | 21 | 22 | -------------------------------------------------------------------------------- /docs/bootcamp/streamlit/gptcache-streamlit-audio/example.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zilliztech/GPTCache/48f8e768d7dcd7f66d948ad07914a630a382b45b/docs/bootcamp/streamlit/gptcache-streamlit-audio/example.png -------------------------------------------------------------------------------- /docs/bootcamp/streamlit/gptcache-streamlit-audio/local/.cache: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zilliztech/GPTCache/48f8e768d7dcd7f66d948ad07914a630a382b45b/docs/bootcamp/streamlit/gptcache-streamlit-audio/local/.cache -------------------------------------------------------------------------------- /docs/bootcamp/streamlit/gptcache-streamlit-audio/requirements.txt: -------------------------------------------------------------------------------- 1 | gptcache 2 | pillow 3 | streamlit 4 | torch 5 | faiss-cpu 6 | torchaudio 7 | transformers 8 | sqlalchemy 9 | -------------------------------------------------------------------------------- /docs/bootcamp/streamlit/gptcache-streamlit-image/README.md: -------------------------------------------------------------------------------- 1 | # GPTCache Demo for OpenAI Image Generation 2 | This project demonstrates how [GPTcache](https://github.com/zilliztech/GPTCache) can be used to save costs when using OpenAI’s DALL-E API. It provides a simple Streamlit app that allows users to input a prompt and see the corresponding DALL-E output image. The app uses a cache to store previously generated images and reuses them for the same prompt, thus avoiding making duplicate API calls. There is an online [demo](https://gptcache-openai-image.streamlit.app/) hosted for preview. 3 | 4 | ## Requirements 5 | * Python 3.6 or later 6 | * Dependencies listed in requirements.txt 7 | * OpenAI API key 8 | ## Usage 9 | 1. Clone the repository to your local machine 10 | Install the required packages: pip install -r requirements.txt 11 | 2. Run the app: streamlit run imagen.py 12 | 3. Open the app in your browser at http://localhost:8501 13 | 4. Enter your OpenAI key and prompt then click “generate” to 14 | wait for the DALL-E output image to appear. 15 | If a cache hit occurred, you should see a message like “cache” at the bottom of the image. 16 | 17 |

18 | example 19 |

20 | 21 | 22 | 23 | -------------------------------------------------------------------------------- /docs/bootcamp/streamlit/gptcache-streamlit-image/example.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zilliztech/GPTCache/48f8e768d7dcd7f66d948ad07914a630a382b45b/docs/bootcamp/streamlit/gptcache-streamlit-image/example.png -------------------------------------------------------------------------------- /docs/bootcamp/streamlit/gptcache-streamlit-image/local/.cache: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zilliztech/GPTCache/48f8e768d7dcd7f66d948ad07914a630a382b45b/docs/bootcamp/streamlit/gptcache-streamlit-image/local/.cache -------------------------------------------------------------------------------- /docs/bootcamp/streamlit/gptcache-streamlit-image/requirements.txt: -------------------------------------------------------------------------------- 1 | gptcache 2 | pillow 3 | streamlit 4 | onnxruntime 5 | faiss-cpu 6 | transformers 7 | sqlalchemy 8 | -------------------------------------------------------------------------------- /docs/bootcamp/temperature/index.rst: -------------------------------------------------------------------------------- 1 | Temperature 2 | ================= 3 | 4 | .. toctree:: 5 | :maxdepth: 1 6 | :caption: Contents: 7 | 8 | 9 | chat 10 | create_image -------------------------------------------------------------------------------- /docs/bootcamp/vertex/index.rst: -------------------------------------------------------------------------------- 1 | Vertex 2 | ================= 3 | 4 | .. toctree:: 5 | :maxdepth: 1 6 | :caption: Contents: 7 | 8 | 9 | vertexai_caching -------------------------------------------------------------------------------- /docs/feature.md: -------------------------------------------------------------------------------- 1 | # Feature 2 | 3 | - Support the openai chat completion normal and stream request 4 | - Get top_k similar search results, it can be set when creating the data manager 5 | - Support the cache chain, see: `Cache#next_cache` 6 | 7 | ```python 8 | bak_cache = Cache() 9 | bak_cache.init() 10 | cache.init(next_cache=bak_cache) 11 | ``` 12 | 13 | - Whether to completely skip the current cache, that is, do not search the cache or save the Chat GPT results, see: `Cache#cache_enable_func` 14 | - In the cache initialization phase, no cache search is performed, but save the result returned by the chat gpt to cache, see: `cache_skip=True` in `create` request 15 | 16 | ```python 17 | openai.ChatCompletion.create( 18 | model="gpt-3.5-turbo", 19 | messages=mock_messages, 20 | cache_skip=True, 21 | ) 22 | ``` 23 | 24 | - Like Lego bricks, custom assemble all modules, including: 25 | - Adapter: The user interface to adapt different LLM model requests to the GPTCache protocol 26 | - Pre-processor: Extracts the key information from the request and preprocess 27 | - Context Buffer: Maintains session context 28 | - Encoder: Embed the text into a dense vector for similarity search 29 | - Cache manager: which includes searching, saving, or evicting data 30 | - Ranker: Evaluate similarity by judging the quality of cached answers 31 | - Post-processor: Determine which cached answers to the user, and generate the response -------------------------------------------------------------------------------- /docs/gptcache_live.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zilliztech/GPTCache/48f8e768d7dcd7f66d948ad07914a630a382b45b/docs/gptcache_live.pdf -------------------------------------------------------------------------------- /docs/make.bat: -------------------------------------------------------------------------------- 1 | @ECHO OFF 2 | 3 | pushd %~dp0 4 | 5 | REM Command file for Sphinx documentation 6 | 7 | if "%SPHINXBUILD%" == "" ( 8 | set SPHINXBUILD=sphinx-build 9 | ) 10 | set SOURCEDIR=. 11 | set BUILDDIR=_build 12 | 13 | %SPHINXBUILD% >NUL 2>NUL 14 | if errorlevel 9009 ( 15 | echo. 16 | echo.The 'sphinx-build' command was not found. Make sure you have Sphinx 17 | echo.installed, then set the SPHINXBUILD environment variable to point 18 | echo.to the full path of the 'sphinx-build' executable. Alternatively you 19 | echo.may add the Sphinx directory to PATH. 20 | echo. 21 | echo.If you don't have Sphinx installed, grab it from 22 | echo.https://www.sphinx-doc.org/ 23 | exit /b 1 24 | ) 25 | 26 | if "%1" == "" goto help 27 | 28 | %SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% 29 | goto end 30 | 31 | :help 32 | %SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% 33 | 34 | :end 35 | popd 36 | -------------------------------------------------------------------------------- /docs/references/index.rst: -------------------------------------------------------------------------------- 1 | 🥸 API References 2 | ================= 3 | 4 | .. toctree:: 5 | :maxdepth: 1 6 | :caption: Contents: 7 | 8 | 9 | gptcache 10 | processor 11 | embedding 12 | utils 13 | adapter 14 | manager 15 | similarity_evaluation -------------------------------------------------------------------------------- /docs/requirements.txt: -------------------------------------------------------------------------------- 1 | sphinx 2 | urllib3<2.0 3 | pyqt5<5.13 4 | pyqtwebengine<5.13 5 | nbsphinx 6 | autodoc_pydantic 7 | myst_nb 8 | sphinx-autobuild 9 | sphinx_book_theme 10 | sphinx-panels 11 | sphinx_copybutton 12 | m2r2 13 | sphinx_toolbox 14 | -------------------------------------------------------------------------------- /docs/toc.bak: -------------------------------------------------------------------------------- 1 | .. toctree:: 2 | :maxdepth: 1 3 | :caption: Getting Started 4 | :name: getting-started 5 | :hidden: 6 | 7 | usage.md 8 | feature.md 9 | configure_it.md 10 | release_note.md 11 | 12 | .. toctree:: 13 | :maxdepth: 1 14 | :caption: Bootcamp 15 | :name: bootcamp 16 | :hidden: 17 | 18 | bootcamp/langchain/index 19 | bootcamp/llama_index/index 20 | bootcamp/openai/index 21 | bootcamp/replicate/index 22 | bootcamp/temperature/index 23 | bootcamp/vertex/index 24 | 25 | .. toctree:: 26 | :maxdepth: 1 27 | :caption: References 28 | :name: references 29 | :hidden: 30 | 31 | references/index 32 | 33 | .. toctree:: 34 | :maxdepth: 1 35 | :caption: Contributing 36 | :name: contributing 37 | :hidden: 38 | 39 | contributing.md -------------------------------------------------------------------------------- /examples/adapter/api.py: -------------------------------------------------------------------------------- 1 | from gptcache import cache, Config, Cache 2 | from gptcache.adapter.api import put, get, init_similar_cache 3 | from gptcache.processor.post import nop 4 | from gptcache.processor.pre import get_prompt 5 | 6 | 7 | def run_basic(): 8 | cache.init(pre_embedding_func=get_prompt) 9 | put("hello", "foo") 10 | print(get("hello")) 11 | # output: foo 12 | 13 | 14 | def run_similar_match(): 15 | inner_cache = Cache() 16 | init_similar_cache( 17 | cache_obj=inner_cache, post_func=nop, config=Config(similarity_threshold=0) 18 | ) 19 | 20 | put("hello1", "foo1", cache_obj=inner_cache) 21 | put("hello2", "foo2", cache_obj=inner_cache) 22 | put("hello3", "foo3", cache_obj=inner_cache) 23 | 24 | messages = get("hello", cache_obj=inner_cache, top_k=3) 25 | print(messages) 26 | # output: ['foo1', 'foo2', 'foo3'] 27 | 28 | 29 | if __name__ == "__main__": 30 | run_basic() 31 | run_similar_match() 32 | -------------------------------------------------------------------------------- /examples/adapter/langchain_llms.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | from langchain import Cohere 4 | from langchain.llms import OpenAI 5 | from langchain.chat_models import ChatOpenAI 6 | from langchain.schema import HumanMessage 7 | 8 | from gptcache.adapter.langchain_models import LangChainLLMs 9 | from gptcache import cache 10 | from gptcache.processor.pre import get_prompt 11 | 12 | from gptcache.adapter.langchain_models import LangChainChat 13 | 14 | OpenAI.api_key = os.getenv("OPENAI_API_KEY") 15 | Cohere.cohere_api_key = os.getenv("COHERE_API_KEY") 16 | 17 | 18 | def run_llm(): 19 | cache.init( 20 | pre_embedding_func=get_prompt, 21 | ) 22 | 23 | question = "what is chatgpt" 24 | 25 | langchain_openai = OpenAI(model_name="text-ada-001") 26 | llm = LangChainLLMs(llm=langchain_openai) 27 | answer = llm(prompt=question) 28 | print(answer) 29 | 30 | # TODO install cohere auto 31 | langchain_cohere = Cohere() 32 | llm = LangChainLLMs(llm=langchain_cohere) 33 | answer = llm(prompt=question) 34 | print(answer) 35 | 36 | 37 | def get_msg(data, **_): 38 | return data.get("messages")[-1].content 39 | 40 | 41 | def run_chat_model(): 42 | cache.init( 43 | pre_embedding_func=get_msg, 44 | ) 45 | 46 | chat = LangChainChat(chat=ChatOpenAI(temperature=0)) 47 | answer = chat( 48 | messages=[ 49 | HumanMessage( 50 | content="Translate this sentence from English to Chinese. I love programming." 51 | ) 52 | ] 53 | ) 54 | print(answer) 55 | 56 | 57 | if __name__ == "__main__": 58 | run_llm() 59 | run_chat_model() 60 | -------------------------------------------------------------------------------- /examples/adapter/openai_chatgpt.py: -------------------------------------------------------------------------------- 1 | from gptcache import cache 2 | from gptcache.adapter import openai 3 | 4 | cache.init() 5 | cache.set_openai_key() 6 | 7 | question = 'what is github' 8 | answer = openai.ChatCompletion.create( 9 | model='gpt-3.5-turbo', 10 | messages=[ 11 | { 12 | 'role': 'user', 13 | 'content': question 14 | } 15 | ], 16 | ) 17 | print(answer) 18 | -------------------------------------------------------------------------------- /examples/benchmark/similiar_qqp.json.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zilliztech/GPTCache/48f8e768d7dcd7f66d948ad07914a630a382b45b/examples/benchmark/similiar_qqp.json.gz -------------------------------------------------------------------------------- /examples/benchmark/similiar_qqp_full.json.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zilliztech/GPTCache/48f8e768d7dcd7f66d948ad07914a630a382b45b/examples/benchmark/similiar_qqp_full.json.gz -------------------------------------------------------------------------------- /examples/data_manager/map_manager.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | from gptcache.manager import get_data_manager 4 | from gptcache.adapter import openai 5 | from gptcache import cache 6 | 7 | 8 | def run(): 9 | dir_name, _ = os.path.split(os.path.abspath(__file__)) 10 | data_file = dir_name + '/data_map.txt' 11 | data_manager = get_data_manager(data_path=data_file, max_size=10) 12 | cache.init(data_manager=data_manager) 13 | cache.set_openai_key() 14 | 15 | answer = openai.ChatCompletion.create( 16 | model='gpt-3.5-turbo', 17 | messages=[ 18 | {'role': 'user', 'content': 'what is chatgpt'} 19 | ], 20 | ) 21 | print(answer) 22 | 23 | 24 | if __name__ == '__main__': 25 | run() 26 | -------------------------------------------------------------------------------- /examples/data_manager/scalar_store.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | import numpy as np 4 | 5 | from gptcache import cache 6 | from gptcache.adapter import openai 7 | from gptcache.manager import get_data_manager, CacheBase, VectorBase 8 | from gptcache.similarity_evaluation.distance import SearchDistanceEvaluation 9 | 10 | d = 8 11 | 12 | # Change the embdding function to your own 13 | def mock_embeddings(data, **kwargs): 14 | return np.random.random((d, )).astype('float32') 15 | 16 | 17 | def run(): 18 | scalar_stores = [ 19 | CacheBase('sqlite', sql_url='sqlite:///./sqlite.db'), 20 | CacheBase('postgresql', sql_url='postgresql+psycopg2://postgres:123456@127.0.0.1:5432/postgres'), 21 | CacheBase('mysql', sql_url='mysql+pymysql://root:123456@127.0.0.1:3306/mysql'), 22 | CacheBase('mariadb', sql_url='mariadb+pymysql://root:123456@127.0.0.1:3307/mysql'), 23 | CacheBase('sqlserver', sql_url='ssql+pyodbc://sa:Strongpsw_123@127.0.0.1:1434/msdb?driver=ODBC+Driver+17+for+SQL+Server'), 24 | CacheBase('oracle', sql_url='oracle+cx_oracle://oracle:123456@127.0.0.1:1521/?service_name=helowin&encoding=UTF-8&nencoding=UTF-8'), 25 | CacheBase('dynamo'), 26 | ] 27 | 28 | for scalar_store in scalar_stores: 29 | if os.path.exists('faiss.index'): 30 | os.remove('faiss.index') 31 | vector_base = VectorBase('faiss', dimension=d) 32 | data_manager = get_data_manager(scalar_store, vector_base) 33 | cache.init(embedding_func=mock_embeddings, 34 | data_manager=data_manager, 35 | similarity_evaluation=SearchDistanceEvaluation(), 36 | ) 37 | cache.set_openai_key() 38 | 39 | answer = openai.ChatCompletion.create( 40 | model='gpt-3.5-turbo', 41 | messages=[ 42 | {'role': 'user', 'content': 'what is chatgpt'} 43 | ], 44 | ) 45 | print('answer:', answer) 46 | 47 | answer = openai.ChatCompletion.create( 48 | model='gpt-3.5-turbo', 49 | messages=[ 50 | {'role': 'user', 'content': 'what is chatgpt'} 51 | ], 52 | ) 53 | print('answer cached:', answer) 54 | 55 | 56 | if __name__ == '__main__': 57 | run() 58 | -------------------------------------------------------------------------------- /examples/data_manager/vector_store.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | from gptcache import cache 4 | from gptcache.adapter import openai 5 | from gptcache.manager import CacheBase, VectorBase, get_data_manager 6 | from gptcache.similarity_evaluation.distance import SearchDistanceEvaluation 7 | 8 | d = 8 9 | 10 | 11 | def mock_embeddings(data, **kwargs): 12 | return np.random.random((d, )).astype('float32') 13 | 14 | 15 | def run(): 16 | vector_stores = [ 17 | 'faiss', 18 | 'milvus', 19 | 'chromadb', 20 | 'docarray', 21 | 'redis', 22 | 'weaviate', 23 | ] 24 | for vector_store in vector_stores: 25 | cache_base = CacheBase('sqlite') 26 | vector_base = VectorBase(vector_store, dimension=d) 27 | data_manager = get_data_manager(cache_base, vector_base) 28 | 29 | cache.init( 30 | embedding_func=mock_embeddings, 31 | data_manager=data_manager, 32 | similarity_evaluation=SearchDistanceEvaluation(), 33 | ) 34 | cache.set_openai_key() 35 | 36 | answer = openai.ChatCompletion.create( 37 | model='gpt-3.5-turbo', 38 | messages=[{'role': 'user', 'content': 'what is chatgpt'}], 39 | ) 40 | print(answer) 41 | 42 | 43 | if __name__ == '__main__': 44 | run() 45 | -------------------------------------------------------------------------------- /examples/embedding/default.py: -------------------------------------------------------------------------------- 1 | from gptcache.adapter import openai 2 | from gptcache import cache 3 | from gptcache.embedding.string import to_embeddings as string_embedding 4 | 5 | 6 | def run(): 7 | cache.init(embedding_func=string_embedding) 8 | cache.set_openai_key() 9 | 10 | answer = openai.ChatCompletion.create( 11 | model='gpt-3.5-turbo', 12 | messages=[ 13 | {'role': 'user', 'content': 'what is chatgpt'} 14 | ], 15 | ) 16 | print(answer) 17 | 18 | 19 | if __name__ == '__main__': 20 | run() 21 | -------------------------------------------------------------------------------- /examples/embedding/onnx.py: -------------------------------------------------------------------------------- 1 | from gptcache.adapter import openai 2 | from gptcache import cache 3 | from gptcache.manager.factory import get_data_manager 4 | from gptcache.manager import get_data_manager, CacheBase, VectorBase 5 | from gptcache.similarity_evaluation.distance import SearchDistanceEvaluation 6 | from gptcache.embedding import Onnx 7 | 8 | 9 | def run(): 10 | onnx = Onnx() 11 | 12 | cache_base = CacheBase('sqlite') 13 | vector_base = VectorBase('faiss', dimension=onnx.dimension) 14 | data_manager = get_data_manager(cache_base, vector_base) 15 | 16 | cache.init(embedding_func=onnx.to_embeddings, 17 | data_manager=data_manager, 18 | similarity_evaluation=SearchDistanceEvaluation(), 19 | ) 20 | cache.set_openai_key() 21 | 22 | answer = openai.ChatCompletion.create( 23 | model='gpt-3.5-turbo', 24 | messages=[ 25 | {'role': 'user', 'content': 'what is chatgpt'} 26 | ], 27 | ) 28 | print(answer) 29 | 30 | 31 | if __name__ == '__main__': 32 | run() 33 | -------------------------------------------------------------------------------- /examples/embedding/paddlenlp.py: -------------------------------------------------------------------------------- 1 | from gptcache.adapter import openai 2 | from gptcache import cache 3 | from gptcache.manager.factory import get_data_manager 4 | from gptcache.manager import get_data_manager, CacheBase, VectorBase 5 | from gptcache.similarity_evaluation.distance import SearchDistanceEvaluation 6 | from gptcache.embedding import PaddleNLP 7 | 8 | 9 | def run(): 10 | paddlenlp = PaddleNLP() 11 | 12 | cache_base = CacheBase('sqlite') 13 | vector_base = VectorBase('faiss', dimension=paddlenlp.dimension) 14 | data_manager = get_data_manager(cache_base, vector_base) 15 | 16 | cache.init(embedding_func=paddlenlp.to_embeddings, 17 | data_manager=data_manager, 18 | similarity_evaluation=SearchDistanceEvaluation(), 19 | ) 20 | cache.set_openai_key() 21 | 22 | answer = openai.ChatCompletion.create( 23 | model='gpt-3.5-turbo', 24 | messages=[ 25 | {'role': 'user', 'content': 'what is chatgpt'} 26 | ], 27 | ) 28 | print(answer) 29 | 30 | 31 | if __name__ == '__main__': 32 | run() 33 | -------------------------------------------------------------------------------- /examples/embedding/random.py: -------------------------------------------------------------------------------- 1 | from gptcache.adapter import openai 2 | from gptcache import cache 3 | from gptcache.manager import get_data_manager, CacheBase, VectorBase 4 | from gptcache.similarity_evaluation.distance import SearchDistanceEvaluation 5 | import numpy as np 6 | 7 | 8 | d = 8 9 | 10 | 11 | def mock_embeddings(data, **kwargs): 12 | return np.random.random((d, )).astype('float32') 13 | 14 | 15 | def run(): 16 | cache_base = CacheBase('sqlite') 17 | vector_base = VectorBase('faiss', dimension=d) 18 | data_manager = get_data_manager(cache_base, vector_base) 19 | cache.init(embedding_func=mock_embeddings, 20 | data_manager=data_manager, 21 | similarity_evaluation=SearchDistanceEvaluation(), 22 | ) 23 | cache.set_openai_key() 24 | 25 | answer = openai.ChatCompletion.create( 26 | model='gpt-3.5-turbo', 27 | messages=[ 28 | {'role': 'user', 'content': 'what is chatgpt'} 29 | ], 30 | ) 31 | print(answer) 32 | 33 | 34 | if __name__ == '__main__': 35 | run() 36 | -------------------------------------------------------------------------------- /examples/integrate/diffusers/stable_diffusion.py: -------------------------------------------------------------------------------- 1 | import time 2 | 3 | import torch 4 | from PIL import ImageChops 5 | from diffusers import DPMSolverMultistepScheduler 6 | 7 | from gptcache.adapter.diffusers import StableDiffusionPipeline 8 | from gptcache.processor.pre import get_prompt 9 | from gptcache import cache 10 | 11 | from gptcache.embedding import Onnx 12 | from gptcache.similarity_evaluation.distance import SearchDistanceEvaluation 13 | from gptcache.manager import get_data_manager, CacheBase, VectorBase, ObjectBase 14 | 15 | 16 | # onnx = Onnx() 17 | # cache_base = CacheBase('sqlite') 18 | # vector_base = VectorBase('milvus', host='localhost', port='19530', collection_name='gptcache_image', dimension=onnx.dimension) 19 | # object_base = ObjectBase('local', path='./images') 20 | # data_manager = get_data_manager(cache_base, vector_base, object_base) 21 | 22 | cache.init( 23 | pre_embedding_func=get_prompt, 24 | # embedding_func=onnx.to_embeddings, 25 | # data_manager=data_manager, 26 | # similarity_evaluation=SearchDistanceEvaluation(), 27 | ) 28 | 29 | 30 | model_id = "stabilityai/stable-diffusion-2-1" 31 | 32 | # Use the DPMSolverMultistepScheduler (DPM-Solver++) scheduler here instead 33 | pipe = StableDiffusionPipeline.from_pretrained(model_id, torch_dtype=torch.float16) 34 | pipe.scheduler = DPMSolverMultistepScheduler.from_config(pipe.scheduler.config) 35 | pipe = pipe.to("cuda") 36 | 37 | prompt = "a photo of an astronaut riding a horse on mars" 38 | start = time.time() 39 | image1 = pipe(prompt=prompt).images[0] 40 | print("First time generation:", time.time() - start) 41 | 42 | start = time.time() 43 | image2 = pipe(prompt=prompt).images[0] 44 | print("Second time generation:", time.time() - start) 45 | 46 | # Compare generated images 47 | diff = ImageChops.difference(image1, image2) 48 | assert not diff.getbbox(), "Got different images." -------------------------------------------------------------------------------- /examples/integrate/dolly/basic_usage.py: -------------------------------------------------------------------------------- 1 | import time 2 | import torch 3 | from transformers import pipeline 4 | from gptcache.processor.pre import get_inputs 5 | from gptcache.manager import manager_factory 6 | from gptcache import Cache 7 | from gptcache.embedding import Onnx 8 | from gptcache.adapter.dolly import Dolly 9 | 10 | 11 | def dolly_base_usage(): 12 | onnx = Onnx() 13 | m = manager_factory("sqlite,faiss,local", data_dir="./dolly", vector_params={"dimension": onnx.dimension}) 14 | llm_cache = Cache() 15 | llm_cache.init( 16 | pre_embedding_func=get_inputs, 17 | data_manager=m, 18 | embedding_func=onnx.to_embeddings 19 | ) 20 | 21 | llm = Dolly.from_model(model="databricks/dolly-v2-3b", torch_dtype=torch.bfloat16, trust_remote_code=True, device=0) 22 | 23 | context = """George Washington (February 22, 1732[b] – December 14, 1799) was an American military officer, statesman, 24 | and Founding Father who served as the first president of the United States from 1789 to 1797.""" 25 | 26 | for _ in range(2): 27 | start_time = time.time() 28 | answer = llm(context, cache_obj=llm_cache) 29 | print("Time consuming: {:.2f}s".format(time.time() - start_time)) 30 | print(f"Received: {answer[0]['generated_text']}") 31 | print(f"Hit cache: {answer[0].get('gptcache', False)}") 32 | 33 | 34 | def dolly_from_hugggingface(): 35 | onnx = Onnx() 36 | m = manager_factory("sqlite,faiss,local", data_dir="./dolly_hg", vector_params={"dimension": onnx.dimension}) 37 | llm_cache = Cache() 38 | llm_cache.init( 39 | pre_embedding_func=get_inputs, 40 | data_manager=m, 41 | embedding_func=onnx.to_embeddings 42 | ) 43 | 44 | pipe = pipeline(model="databricks/dolly-v2-3b", torch_dtype=torch.bfloat16, 45 | trust_remote_code=True, device=0, return_full_text=True) 46 | llm = Dolly(pipe) 47 | 48 | context = """George Washington (February 22, 1732[b] – December 14, 1799) was an American military officer, statesman, 49 | and Founding Father who served as the first president of the United States from 1789 to 1797.""" 50 | 51 | for _ in range(2): 52 | start_time = time.time() 53 | answer = llm(context, cache_obj=llm_cache) 54 | print("Time consuming: {:.2f}s".format(time.time() - start_time)) 55 | print(f"Received: {answer[0]['generated_text']}") 56 | print(f"Hit cache: {answer[0].get('gptcache', False)}") 57 | 58 | 59 | if __name__ == '__main__': 60 | dolly_base_usage() 61 | dolly_from_hugggingface() 62 | -------------------------------------------------------------------------------- /examples/integrate/langchain/langchain_llms_mock.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | from langchain import Cohere 4 | from langchain.llms import OpenAI 5 | 6 | from gptcache.adapter.langchain_models import LangChainLLMs 7 | from gptcache import cache, Cache 8 | from gptcache.processor.pre import get_prompt 9 | 10 | OpenAI.api_key = os.getenv("OPENAI_API_KEY") 11 | Cohere.cohere_api_key = os.getenv("COHERE_API_KEY") 12 | 13 | 14 | def run(): 15 | data_file = "data_map.txt" 16 | has_data = os.path.isfile(data_file) 17 | llm_cache = Cache() 18 | llm_cache.init( 19 | pre_embedding_func=get_prompt, 20 | ) 21 | 22 | if not has_data: 23 | for i in range(10): 24 | question = f"foo{i}" 25 | answer = f"receiver the foo {i}" 26 | cache.data_manager.save(question, answer, cache.embedding_func(question)) 27 | 28 | question = "foo0" 29 | 30 | langchain_openai = OpenAI(model_name="text-ada-001") 31 | llm = LangChainLLMs(llm=langchain_openai) 32 | answer = llm(prompt=question, cache_obj=llm_cache) 33 | print(answer) 34 | answer = llm(prompt=question, cache_obj=llm_cache) 35 | print(answer) 36 | 37 | # TODO install cohere auto 38 | langchain_cohere = Cohere() 39 | llm = LangChainLLMs(llm=langchain_cohere) 40 | answer = llm(prompt=question, cache_obj=llm_cache) 41 | print(answer) 42 | 43 | 44 | if __name__ == '__main__': 45 | run() 46 | -------------------------------------------------------------------------------- /examples/integrate/langchain/langchain_prompt_openai.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | import openai 4 | import time 5 | from langchain.llms import OpenAI 6 | from langchain import PromptTemplate, LLMChain 7 | 8 | from gptcache.adapter.langchain_models import LangChainLLMs 9 | from gptcache import Cache 10 | from gptcache.processor.pre import get_prompt 11 | 12 | openai.api_key = os.getenv("OPENAI_API_KEY") 13 | 14 | template = """Question: {question} 15 | 16 | Answer: Let's think step by step.""" 17 | 18 | prompt = PromptTemplate(template=template, input_variables=["question"]) 19 | 20 | llm = OpenAI() 21 | 22 | question = "What NFL team won the Super Bowl in the year Justin Bieber was born?" 23 | 24 | llm_cache = Cache() 25 | llm_cache.init( 26 | pre_embedding_func=get_prompt, 27 | ) 28 | 29 | before = time.time() 30 | cached_llm = LangChainLLMs(llm=llm) 31 | answer = cached_llm(prompt=question, cache_obj=llm_cache) 32 | print(answer) 33 | print("Read through Time Spent =", time.time() - before) 34 | 35 | before = time.time() 36 | answer = cached_llm(prompt=question, cache_obj=llm_cache) 37 | print(answer) 38 | print("Cache Hit Time Spent =", time.time() - before) 39 | -------------------------------------------------------------------------------- /examples/integrate/langchain/langchain_qa_chain.py: -------------------------------------------------------------------------------- 1 | import time 2 | 3 | from langchain import OpenAI 4 | from langchain.chains.question_answering import load_qa_chain 5 | from langchain.schema import Document 6 | 7 | from gptcache import cache 8 | from gptcache.adapter.api import init_similar_cache 9 | from gptcache.adapter.langchain_models import LangChainLLMs 10 | 11 | 12 | def get_content_func(data, **_): 13 | return data.get("prompt").split("Question:")[-1] 14 | 15 | 16 | init_similar_cache(pre_func=get_content_func) 17 | cache.set_openai_key() 18 | 19 | mkt_qa = load_qa_chain(llm=LangChainLLMs(llm=OpenAI(temperature=0)), chain_type="stuff") 20 | 21 | msg = "What is Traditional marketing?" 22 | 23 | 24 | before = time.time() 25 | answer = mkt_qa.run(question=msg, input_documents=[Document(page_content="marketing is hello world")]) 26 | print(answer) 27 | print("Time Spent:", time.time() - before) 28 | 29 | before = time.time() 30 | answer = mkt_qa.run(question=msg, input_documents=[Document(page_content="marketing is hello world")]) 31 | print(answer) 32 | print("Time Spent:", time.time() - before) 33 | -------------------------------------------------------------------------------- /examples/integrate/langchain/langchain_similaritycache_openai.py: -------------------------------------------------------------------------------- 1 | import os 2 | import time 3 | 4 | import openai 5 | from langchain import PromptTemplate 6 | from langchain.llms import OpenAI 7 | 8 | from gptcache import Cache 9 | from gptcache.adapter.api import init_similar_cache 10 | from gptcache.adapter.langchain_models import LangChainLLMs 11 | from gptcache.processor.pre import get_prompt 12 | 13 | openai.api_key = os.getenv("OPENAI_API_KEY") 14 | 15 | template = """Question: {question} 16 | 17 | Answer: Let's think step by step.""" 18 | 19 | prompt = PromptTemplate(template=template, input_variables=["question"]) 20 | 21 | llm = OpenAI() 22 | 23 | question = "What NFL team won the Super Bowl in the year Justin Bieber was born?" 24 | 25 | llm_cache = Cache() 26 | init_similar_cache(pre_func=get_prompt, cache_obj=llm_cache) 27 | 28 | 29 | before = time.time() 30 | cached_llm = LangChainLLMs(llm=llm) 31 | answer = cached_llm(prompt=question, cache_obj=llm_cache) 32 | print(answer) 33 | print("Read through Time Spent =", time.time() - before) 34 | 35 | before = time.time() 36 | question = "What is the winner Super Bowl in the year Justin Bieber was born?" 37 | answer = cached_llm(prompt=question, cache_obj=llm_cache) 38 | print(answer) 39 | print("Cache Hit Time Spent =", time.time() - before) 40 | -------------------------------------------------------------------------------- /examples/integrate/llama_cpp/basic_usage.py: -------------------------------------------------------------------------------- 1 | import time 2 | 3 | from gptcache.adapter.llama_cpp import Llama 4 | from gptcache.manager import manager_factory 5 | from gptcache import Cache 6 | from gptcache.embedding import Onnx 7 | from gptcache.processor.pre import get_prompt 8 | 9 | 10 | def llama_cpp_base_usage(): 11 | onnx = Onnx() 12 | m = manager_factory("sqlite,faiss,local", data_dir="./llamacpp_basic", vector_params={"dimension": onnx.dimension}) 13 | llm_cache = Cache() 14 | llm_cache.init( 15 | pre_embedding_func=get_prompt, 16 | data_manager=m, 17 | embedding_func=onnx.to_embeddings 18 | ) 19 | llm = Llama("./ggml-model-q4_0.bin") 20 | for _ in range(2): 21 | start_time = time.time() 22 | answer = llm(prompt="Q: Name the planets in the solar system? A: ", stop=["Q:", "\n"], cache_obj=llm_cache) 23 | print("Time consuming: {:.2f}s".format(time.time() - start_time)) 24 | print(f"Received: {answer['choices'][0]['text']}") 25 | print(f"Hit cache: {answer.get('gptcache', False)}") 26 | 27 | 28 | def llama_cpp_stream_usage(): 29 | onnx = Onnx() 30 | m = manager_factory("sqlite,faiss,local", data_dir="./llamacpp_stream", vector_params={"dimension": onnx.dimension}) 31 | llm_cache = Cache() 32 | llm_cache.init( 33 | pre_embedding_func=get_prompt, 34 | data_manager=m, 35 | embedding_func=onnx.to_embeddings 36 | ) 37 | llm = Llama("./ggml-model-q4_0.bin") 38 | for _ in range(2): 39 | start_time = time.time() 40 | ret = llm(prompt="Q: Name the planets in the solar system? A: ", stop=["Q:", "\n"], stream=True, cache_obj=llm_cache) 41 | answer = '' 42 | for chunk in ret: 43 | answer += chunk['choices'][0]['text'] 44 | print("Time consuming: {:.2f}s".format(time.time() - start_time)) 45 | print(f"Received: {answer}") 46 | 47 | 48 | if __name__ == "__main__": 49 | llama_cpp_base_usage() 50 | llama_cpp_stream_usage() 51 | 52 | -------------------------------------------------------------------------------- /examples/integrate/replicate/vqa.py: -------------------------------------------------------------------------------- 1 | import time 2 | 3 | from gptcache import cache 4 | from gptcache.adapter import replicate 5 | from gptcache.embedding import Timm, Onnx 6 | from gptcache.manager import get_data_manager, CacheBase, VectorBase, ObjectBase 7 | from gptcache.processor.pre import get_input_image_file_name 8 | from gptcache.similarity_evaluation.np import NumpyNormEvaluation 9 | 10 | timm = Timm('resnet18') 11 | onnx = Onnx() 12 | cache_base = CacheBase('sqlite') 13 | vector_base = VectorBase('faiss', dimension=timm.dimension) 14 | object_base = ObjectBase('local', path='./objects') 15 | data_manager = get_data_manager(cache_base, vector_base, object_base) 16 | 17 | cache.init( 18 | pre_embedding_func=get_input_image_file_name, 19 | data_manager=data_manager, 20 | embedding_func=timm.to_embeddings, 21 | similarity_evaluation=NumpyNormEvaluation(enable_normal=True, question_embedding_function=onnx.to_embeddings) 22 | ) 23 | 24 | 25 | image_path = '../../../docs/GPTCache.png' 26 | 27 | 28 | # run replicate clinet with gptcache 29 | start = time.time() 30 | question1 = "what is in the image?" 31 | question2 = "What can you see in the image?" 32 | 33 | output = replicate.run( 34 | "andreasjansson/blip-2:4b32258c42e9efd4288bb9910bc532a69727f9acd26aa08e175713a0a857a608", 35 | input={ 36 | "image": open(image_path, 'rb'), 37 | "question": question1} 38 | ) 39 | end = time.time() 40 | print('Answer:', output) 41 | print('Time elapsed 1:', end - start) 42 | 43 | start = time.time() 44 | output = replicate.run( 45 | "andreasjansson/blip-2:4b32258c42e9efd4288bb9910bc532a69727f9acd26aa08e175713a0a857a608", 46 | input={ 47 | "image": open(image_path, 'rb'), 48 | "question": question2} 49 | ) 50 | end = time.time() 51 | print('Answer:', output) 52 | print('Time elapsed 2:', end - start) -------------------------------------------------------------------------------- /examples/integrate/stability/text_to_image.py: -------------------------------------------------------------------------------- 1 | import io 2 | import os 3 | import time 4 | 5 | from PIL import Image 6 | 7 | from gptcache import cache 8 | from gptcache.adapter.stability_sdk import StabilityInference, generation 9 | from gptcache.embedding import Onnx 10 | from gptcache.manager.factory import manager_factory 11 | from gptcache.processor.pre import get_prompt 12 | from gptcache.similarity_evaluation.distance import SearchDistanceEvaluation 13 | 14 | # init gptcache 15 | onnx = Onnx() 16 | data_manager = manager_factory('sqlite,faiss,local', 17 | data_dir='/', 18 | vector_params={'dimension': onnx.dimension}, 19 | object_params={'path': './images'} 20 | ) 21 | cache.init( 22 | pre_embedding_func=get_prompt, 23 | embedding_func=onnx.to_embeddings, 24 | data_manager=data_manager, 25 | similarity_evaluation=SearchDistanceEvaluation() 26 | ) 27 | 28 | # run with gptcache 29 | api_key = os.getenv('STABILITY_KEY', 'key-goes-here') 30 | 31 | stability_api = StabilityInference( 32 | key=os.environ['STABILITY_KEY'], # API Key reference. 33 | verbose=False, # Print debug messages. 34 | engine='stable-diffusion-xl-beta-v2-2-2', # Set the engine to use for generation. 35 | ) 36 | 37 | start = time.time() 38 | answers = stability_api.generate( 39 | prompt='a cat sitting besides a dog', 40 | width=256, 41 | height=256 42 | ) 43 | 44 | for resp in answers: 45 | for artifact in resp.artifacts: 46 | if artifact.type == generation.ARTIFACT_IMAGE: 47 | img = Image.open(io.BytesIO(artifact.binary)) 48 | assert img.size == (256, 256) 49 | print('Time elapsed 1:', time.time() - start) 50 | 51 | start = time.time() 52 | answers = stability_api.generate( 53 | prompt='a dog and a dog sitting together', 54 | width=512, 55 | height=512 56 | ) 57 | 58 | for resp in answers: 59 | for artifact in resp.artifacts: 60 | if artifact.type == generation.ARTIFACT_IMAGE: 61 | img = Image.open(io.BytesIO(artifact.binary)) 62 | assert img.size == (512, 512) 63 | print('Time elapsed 2:', time.time() - start) -------------------------------------------------------------------------------- /examples/processor/temperature_example.py: -------------------------------------------------------------------------------- 1 | import time 2 | 3 | from gptcache import cache 4 | from gptcache.adapter import openai 5 | from gptcache.embedding import Onnx 6 | from gptcache.manager import manager_factory 7 | from gptcache.processor.post import temperature_softmax 8 | from gptcache.similarity_evaluation.distance import SearchDistanceEvaluation 9 | 10 | cache.set_openai_key() 11 | 12 | # Init cache with vector store 13 | # if os.path.exists("faiss.index"): 14 | # os.remove("faiss.index") 15 | # if os.path.exists("sqlite.db"): 16 | # os.remove("sqlite.db") 17 | 18 | onnx = Onnx() 19 | data_manager = manager_factory("sqlite,faiss", vector_params={"dimension": onnx.dimension}) 20 | 21 | cache.init( 22 | embedding_func=onnx.to_embeddings, 23 | data_manager=data_manager, 24 | similarity_evaluation=SearchDistanceEvaluation(), 25 | post_process_messages_func=temperature_softmax 26 | ) 27 | # cache.config = Config(similarity_threshold=0.2) 28 | 29 | question = 'what is github' 30 | 31 | for _ in range(3): 32 | start = time.time() 33 | response = openai.ChatCompletion.create( 34 | model='gpt-3.5-turbo', 35 | temperature = 1.0, # Change temperature here 36 | messages=[{ 37 | 'role': 'user', 38 | 'content': question 39 | }], 40 | ) 41 | print(round(time.time() - start, 3)) 42 | print(response["choices"][0]["message"]["content"]) -------------------------------------------------------------------------------- /examples/session/session.py: -------------------------------------------------------------------------------- 1 | from gptcache import cache 2 | from gptcache.session import Session 3 | from gptcache.adapter import openai 4 | 5 | # init gptcache 6 | cache.init() 7 | cache.set_openai_key() 8 | 9 | 10 | def run_session(): 11 | session = Session() 12 | response = openai.ChatCompletion.create( 13 | model="gpt-3.5-turbo", 14 | messages=[ 15 | { 16 | "role": "user", 17 | "content": "what's github?" 18 | }], 19 | session=session 20 | ) 21 | response_content = response["choices"][0]["message"]["content"] 22 | print(response_content) 23 | 24 | 25 | def run_custom_session(): 26 | def my_check_hit(cur_session_id, cache_session_ids, cache_questions, cache_answer): 27 | print(cur_session_id, cache_session_ids, cache_questions, cache_answer) 28 | if "GitHub" in cache_answer: 29 | return True 30 | return False 31 | session = Session(name="my-session", check_hit_func=my_check_hit) 32 | response = openai.ChatCompletion.create( 33 | model="gpt-3.5-turbo", 34 | messages=[ 35 | { 36 | "role": "user", 37 | "content": "what's github?" 38 | }], 39 | session=session 40 | ) 41 | response_content = response["choices"][0]["message"]["content"] 42 | print(response_content) 43 | -------------------------------------------------------------------------------- /examples/similarity_evaluation/exact_match.py: -------------------------------------------------------------------------------- 1 | from gptcache.adapter import openai 2 | from gptcache import cache 3 | from gptcache.similarity_evaluation.exact_match import ExactMatchEvaluation 4 | 5 | 6 | def run(): 7 | cache.init(similarity_evaluation=ExactMatchEvaluation()) 8 | cache.set_openai_key() 9 | 10 | answer = openai.ChatCompletion.create( 11 | model='gpt-3.5-turbo', 12 | messages=[ 13 | {'role': 'user', 'content': 'what is chatgpt'} 14 | ], 15 | ) 16 | print(answer) 17 | 18 | 19 | if __name__ == '__main__': 20 | run() 21 | -------------------------------------------------------------------------------- /examples/similarity_evaluation/onnx.py: -------------------------------------------------------------------------------- 1 | from gptcache.adapter import openai 2 | from gptcache import cache 3 | from gptcache.manager import get_data_manager, CacheBase, VectorBase 4 | from gptcache.embedding import Onnx as EmbeddingOnnx 5 | from gptcache.similarity_evaluation import OnnxModelEvaluation 6 | 7 | 8 | def run(): 9 | onnx = EmbeddingOnnx() 10 | evaluation_onnx = OnnxModelEvaluation() 11 | 12 | vector_base = VectorBase('faiss', dimension=onnx.dimension) 13 | data_manager = get_data_manager('sqlite', vector_base) 14 | 15 | cache.init(embedding_func=onnx.to_embeddings, 16 | data_manager=data_manager, 17 | similarity_evaluation=evaluation_onnx, 18 | ) 19 | cache.set_openai_key() 20 | 21 | answer = openai.ChatCompletion.create( 22 | model='gpt-3.5-turbo', 23 | messages=[ 24 | {'role': 'user', 'content': 'what is chatgpt'} 25 | ], 26 | ) 27 | print(answer) 28 | 29 | 30 | if __name__ == '__main__': 31 | run() 32 | -------------------------------------------------------------------------------- /examples/similarity_evaluation/search_distance.py: -------------------------------------------------------------------------------- 1 | from gptcache.adapter import openai 2 | from gptcache import cache 3 | from gptcache.manager import get_data_manager, VectorBase 4 | from gptcache.similarity_evaluation.distance import SearchDistanceEvaluation 5 | from gptcache.embedding import Onnx 6 | 7 | 8 | def run(): 9 | onnx = Onnx() 10 | 11 | vector_base = VectorBase('faiss', dimension=onnx.dimension) 12 | data_manager = get_data_manager('sqlite', vector_base) 13 | 14 | cache.init(embedding_func=onnx.to_embeddings, 15 | data_manager=data_manager, 16 | similarity_evaluation=SearchDistanceEvaluation(), 17 | ) 18 | cache.set_openai_key() 19 | 20 | answer = openai.ChatCompletion.create( 21 | model='gpt-3.5-turbo', 22 | messages=[ 23 | {'role': 'user', 'content': 'what is chatgpt'} 24 | ], 25 | ) 26 | print(answer) 27 | 28 | 29 | if __name__ == '__main__': 30 | run() 31 | -------------------------------------------------------------------------------- /examples/similarity_evaluation/sequence_match.py: -------------------------------------------------------------------------------- 1 | from gptcache.adapter import openai 2 | from gptcache import cache 3 | from gptcache.manager import get_data_manager, VectorBase 4 | from gptcache.similarity_evaluation import SequenceMatchEvaluation 5 | from gptcache.processor.pre import concat_all_queries 6 | from gptcache.embedding import Onnx 7 | from gptcache import Config 8 | 9 | 10 | def run(): 11 | onnx = Onnx() 12 | 13 | vector_base = VectorBase('faiss', dimension=onnx.dimension) 14 | data_manager = get_data_manager('sqlite', vector_base) 15 | 16 | cache.init(embedding_func=onnx.to_embeddings, 17 | pre_embedding_func=concat_all_queries, 18 | data_manager=data_manager, 19 | similarity_evaluation=SequenceMatchEvaluation([0.1, 0.2, 0.7], 'onnx'), 20 | config=Config(context_len=3, skip_list=['system', 'assistant']) 21 | ) 22 | cache.set_openai_key() 23 | 24 | answer = openai.ChatCompletion.create( 25 | model='gpt-3.5-turbo', 26 | messages=[ 27 | {'role': 'system', 'content': 'you are a helpful chatbot.'}, 28 | {'role': 'user', 'content': 'query1'}, 29 | {'role': 'assistant', 'content': 'answer1'}, 30 | {'role': 'user', 'content': 'query2'}, 31 | {'role': 'assistant', 'content': 'answer2'}, 32 | {'role': 'user', 'content': 'query3'}, 33 | {'role': 'assistant', 'content': 'answer3'} 34 | ] 35 | ) 36 | print(answer) 37 | 38 | 39 | if __name__ == '__main__': 40 | run() 41 | -------------------------------------------------------------------------------- /gptcache/__init__.py: -------------------------------------------------------------------------------- 1 | """gptcache version""" 2 | __version__ = "0.1.44" 3 | 4 | from gptcache.config import Config 5 | from gptcache.core import Cache 6 | from gptcache.core import cache 7 | -------------------------------------------------------------------------------- /gptcache/adapter/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zilliztech/GPTCache/48f8e768d7dcd7f66d948ad07914a630a382b45b/gptcache/adapter/__init__.py -------------------------------------------------------------------------------- /gptcache/adapter/base.py: -------------------------------------------------------------------------------- 1 | from abc import ABCMeta 2 | from typing import Any, Dict, Callable, Optional 3 | 4 | 5 | class BaseCacheLLM(metaclass=ABCMeta): 6 | """Base LLM, When you have enhanced llm without using the original llm api, 7 | you can use this class as a proxy to use the ability of the cache. 8 | 9 | NOTE: Please make sure that the custom llm returns the same value as the original llm. 10 | 11 | For example, if you use the openai proxy, you perform delay statistics before sending the openai request, 12 | and then you package this part of the function, so you may have a separate package, which is different from openai. 13 | If the api request parameters and return results you wrap are the same as the original ones, 14 | then you can use this class to obtain cache-related capabilities. 15 | 16 | Example: 17 | .. code-block:: python 18 | 19 | import time 20 | 21 | import openai 22 | 23 | from gptcache import Cache 24 | from gptcache.adapter import openai as cache_openai 25 | 26 | 27 | def proxy_openai_chat_complete(*args, **kwargs): 28 | start_time = time.time() 29 | res = openai.ChatCompletion.create(*args, **kwargs) 30 | print("Consume Time Spent =", round((time.time() - start_time), 2)) 31 | return res 32 | 33 | 34 | llm_cache = Cache() 35 | 36 | cache_openai.ChatCompletion.llm = proxy_openai_chat_complete 37 | cache_openai.ChatCompletion.cache_args = {"cache_obj": llm_cache} 38 | 39 | cache_openai.ChatCompletion.create( 40 | model="gpt-3.5-turbo", 41 | messages=[ 42 | { 43 | "role": "user", 44 | "content": "What's GitHub?", 45 | } 46 | ], 47 | ) 48 | """ 49 | 50 | llm: Optional[Callable] = None 51 | """ 52 | On a cache miss, if that variable is set, it will be called; 53 | if not, it will call the original llm. 54 | """ 55 | 56 | cache_args: Dict[str, Any] = {} 57 | """ 58 | It can be used to set some cache-related public parameters. 59 | If you don't want to set the same parameters every time when using cache, say cache_obj, you can use it. 60 | """ 61 | 62 | @classmethod 63 | def fill_base_args(cls, **kwargs): 64 | """ Fill the base args to the cache args 65 | """ 66 | for key, value in cls.cache_args.items(): 67 | if key not in kwargs: 68 | kwargs[key] = value 69 | 70 | return kwargs 71 | -------------------------------------------------------------------------------- /gptcache/adapter/dolly.py: -------------------------------------------------------------------------------- 1 | from typing import Any 2 | 3 | from gptcache.adapter.adapter import adapt 4 | from gptcache.manager.scalar_data.base import Answer, DataType 5 | from gptcache.utils import import_huggingface, import_torch 6 | 7 | import_torch() 8 | import_huggingface() 9 | 10 | from transformers import pipeline # pylint: disable=wrong-import-position 11 | 12 | 13 | class Dolly: 14 | """Wrapper for Dolly (https://github.com/databrickslabs/dolly.git). 15 | 16 | Example using from_model: 17 | .. code-block:: python 18 | 19 | from gptcache import cache 20 | from gptcache.processor.pre import get_inputs 21 | cache.init(pre_embedding_func=get_inputs) 22 | 23 | from gptcache.adapter.dolly import Dolly 24 | dolly = Dolly.from_model( 25 | model="databricks/dolly-v2-12b", torch_dtype=torch.bfloat16, trust_remote_code=True, device=0 26 | ) 27 | 28 | Example passing pipeline in directly: 29 | .. code-block:: python 30 | 31 | import torch 32 | from transformers import pipeline 33 | from gptcache import cache 34 | from gptcache.processor.pre import get_inputs 35 | cache.init(pre_embedding_func=get_inputs) 36 | from gptcache.adapter.dolly import Dolly 37 | 38 | pipe = pipeline( 39 | model="databricks/dolly-v2-12b", torch_dtype=torch.bfloat16, trust_remote_code=True, device=0 40 | ) 41 | dolly = Dolly(pipe) 42 | """ 43 | 44 | def __init__(self, dolly_pipeline: Any): 45 | self._dolly_pipeline = dolly_pipeline 46 | 47 | @classmethod 48 | def from_model(cls, model: str, **kwargs): 49 | pipe = pipeline(model=model, **kwargs) 50 | return cls(pipe) 51 | 52 | def __call__(self, prompt: str, **kwargs): 53 | return adapt( 54 | self._dolly_pipeline, 55 | _cache_data_convert, 56 | _update_cache_callback, 57 | inputs=prompt, 58 | **kwargs 59 | ) 60 | 61 | 62 | def _cache_data_convert(cache_data): 63 | return [{"generated_text": cache_data, "gptcache": True}] 64 | 65 | 66 | def _update_cache_callback(llm_data, update_cache_func, *args, **kwargs): # pylint: disable=unused-argument 67 | update_cache_func(Answer(llm_data[0]["generated_text"], DataType.STR)) 68 | return llm_data 69 | -------------------------------------------------------------------------------- /gptcache/client.py: -------------------------------------------------------------------------------- 1 | import asyncio 2 | import json 3 | 4 | from gptcache.utils import import_httpx 5 | 6 | import_httpx() 7 | 8 | import httpx # pylint: disable=C0413 9 | 10 | 11 | _CLIENT_HEADER = {"Content-Type": "application/json", "Accept": "application/json"} 12 | 13 | 14 | class Client: 15 | """GPTCache client to send requests to GPTCache server. 16 | 17 | :param uri: the uri leads to the server, defaults to "http://localhost:8000". 18 | :type uri: str 19 | 20 | Example: 21 | .. code-block:: python 22 | 23 | from gptcache import client 24 | 25 | client = Client(uri="http://localhost:8000") 26 | client.put("Hi", "Hi back") 27 | ans = client.get("Hi") 28 | """ 29 | 30 | def __init__(self, uri: str = "http://localhost:8000"): 31 | self._uri = uri 32 | 33 | async def _put(self, question: str, answer: str): 34 | async with httpx.AsyncClient() as client: 35 | data = { 36 | "prompt": question, 37 | "answer": answer, 38 | } 39 | 40 | response = await client.post( 41 | f"{self._uri}/put", headers=_CLIENT_HEADER, data=json.dumps(data) 42 | ) 43 | 44 | return response.status_code 45 | 46 | async def _get(self, question: str): 47 | async with httpx.AsyncClient() as client: 48 | data = { 49 | "prompt": question, 50 | } 51 | 52 | response = await client.post( 53 | f"{self._uri}/get", headers=_CLIENT_HEADER, data=json.dumps(data) 54 | ) 55 | 56 | return response.json().get("answer") 57 | 58 | def put(self, question: str, answer: str): 59 | """ 60 | :param question: the question to be put. 61 | :type question: str 62 | :param answer: the answer to the question to be put. 63 | :type answer: str 64 | :return: status code. 65 | """ 66 | return asyncio.run(self._put(question, answer)) 67 | 68 | def get(self, question: str): 69 | """ 70 | :param question: the question to get an answer. 71 | :type question: str 72 | :return: answer to the question. 73 | """ 74 | return asyncio.run(self._get(question)) 75 | -------------------------------------------------------------------------------- /gptcache/embedding/__init__.py: -------------------------------------------------------------------------------- 1 | __all__ = [ 2 | "OpenAI", 3 | "Huggingface", 4 | "SBERT", 5 | "Cohere", 6 | "Onnx", 7 | "FastText", 8 | "Data2VecAudio", 9 | "Timm", 10 | "ViT", 11 | "LangChain", 12 | "Rwkv", 13 | "PaddleNLP", 14 | "UForm", 15 | ] 16 | 17 | 18 | from gptcache.utils.lazy_import import LazyImport 19 | 20 | openai = LazyImport("openai", globals(), "gptcache.embedding.openai") 21 | huggingface = LazyImport("huggingface", globals(), "gptcache.embedding.huggingface") 22 | sbert = LazyImport("sbert", globals(), "gptcache.embedding.sbert") 23 | onnx = LazyImport("onnx", globals(), "gptcache.embedding.onnx") 24 | cohere = LazyImport("cohere", globals(), "gptcache.embedding.cohere") 25 | fasttext = LazyImport("fasttext", globals(), "gptcache.embedding.fasttext") 26 | data2vec = LazyImport("data2vec", globals(), "gptcache.embedding.data2vec") 27 | timm = LazyImport("timm", globals(), "gptcache.embedding.timm") 28 | vit = LazyImport("vit", globals(), "gptcache.embedding.vit") 29 | langchain = LazyImport("langchain", globals(), "gptcache.embedding.langchain") 30 | rwkv = LazyImport("rwkv", globals(), "gptcache.embedding.rwkv") 31 | paddlenlp = LazyImport("paddlenlp", globals(), "gptcache.embedding.paddlenlp") 32 | uform = LazyImport("uform", globals(), "gptcache.embedding.uform") 33 | 34 | 35 | def Cohere(model="large", api_key=None): 36 | return cohere.Cohere(model, api_key) 37 | 38 | 39 | def OpenAI(model="text-embedding-ada-002", api_key=None): 40 | return openai.OpenAI(model, api_key) 41 | 42 | 43 | def Huggingface(model="distilbert-base-uncased"): 44 | return huggingface.Huggingface(model) 45 | 46 | 47 | def SBERT(model="all-MiniLM-L6-v2"): 48 | return sbert.SBERT(model) 49 | 50 | 51 | def Onnx(model="GPTCache/paraphrase-albert-onnx"): 52 | return onnx.Onnx(model) 53 | 54 | 55 | def FastText(model="en", dim=None): 56 | return fasttext.FastText(model, dim) 57 | 58 | 59 | def Data2VecAudio(model="facebook/data2vec-audio-base-960h"): 60 | return data2vec.Data2VecAudio(model) 61 | 62 | 63 | def Timm(model="resnet50", device="default"): 64 | return timm.Timm(model, device) 65 | 66 | 67 | def ViT(model="google/vit-base-patch16-384"): 68 | return vit.ViT(model) 69 | 70 | 71 | def LangChain(embeddings, dimension=0): 72 | return langchain.LangChain(embeddings, dimension) 73 | 74 | 75 | def Rwkv(model="sgugger/rwkv-430M-pile"): 76 | return rwkv.Rwkv(model) 77 | 78 | 79 | def PaddleNLP(model="ernie-3.0-medium-zh"): 80 | return paddlenlp.PaddleNLP(model) 81 | 82 | 83 | def UForm(model="unum-cloud/uform-vl-multilingual", embedding_type="text"): 84 | return uform.UForm(model, embedding_type) 85 | -------------------------------------------------------------------------------- /gptcache/embedding/base.py: -------------------------------------------------------------------------------- 1 | from abc import ABCMeta, abstractmethod 2 | 3 | 4 | class BaseEmbedding(metaclass=ABCMeta): 5 | """ 6 | Base Embedding interface. 7 | """ 8 | 9 | @abstractmethod 10 | def to_embeddings(self, data, **kwargs): 11 | pass 12 | 13 | @property 14 | @abstractmethod 15 | def dimension(self) -> int: 16 | return 0 17 | -------------------------------------------------------------------------------- /gptcache/embedding/cohere.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | from gptcache.utils import import_cohere 4 | from gptcache.embedding.base import BaseEmbedding 5 | 6 | import_cohere() 7 | 8 | import cohere # pylint: disable=C0413 9 | 10 | 11 | class Cohere(BaseEmbedding): 12 | """Generate text embedding for given text using Cohere. 13 | 14 | :param model: model name (size), defaults to 'large'. 15 | :type model: str 16 | :param api_key: Cohere API Key. 17 | :type api_key: str 18 | 19 | Example: 20 | .. code-block:: python 21 | 22 | from gptcache.embedding import Cohere 23 | 24 | test_sentence = 'Hello, world.' 25 | encoder = Cohere(model='small', api_key='your_cohere_key') 26 | embed = encoder.to_embeddings(test_sentence) 27 | """ 28 | 29 | def __init__(self, model: str = "large", api_key: str = None): 30 | self.co = cohere.Client(api_key) 31 | self.model = model 32 | 33 | if model in self.dim_dict(): 34 | self.__dimension = self.dim_dict()[model] 35 | else: 36 | self.__dimension = None 37 | 38 | def to_embeddings(self, data, **_): 39 | """Generate embedding given text input 40 | 41 | :param data: text in string. 42 | :type data: str 43 | 44 | :return: a text embedding in shape of (dim,). 45 | """ 46 | if not isinstance(data, list): 47 | data = [data] 48 | response = self.co.embed(texts=data, model=self.model) 49 | embeddings = response.embeddings 50 | return np.array(embeddings).astype("float32").squeeze(0) 51 | 52 | @property 53 | def dimension(self): 54 | """Embedding dimension. 55 | 56 | :return: embedding dimension 57 | """ 58 | if not self.__dimension: 59 | foo_emb = self.to_embeddings("foo") 60 | self.__dimension = len(foo_emb) 61 | return self.__dimension 62 | 63 | @staticmethod 64 | def dim_dict(): 65 | return {"large": 4096, "small": 1024} 66 | -------------------------------------------------------------------------------- /gptcache/embedding/data2vec.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | from gptcache.utils import import_huggingface, import_torch, import_torchaudio 4 | from gptcache.embedding.base import BaseEmbedding 5 | 6 | import_torch() 7 | import_huggingface() 8 | import_torchaudio() 9 | 10 | import torch # pylint: disable=C0413 11 | import torchaudio # pylint: disable=C0413 12 | from transformers import Data2VecAudioModel, Wav2Vec2Processor # pylint: disable=C0413 13 | 14 | 15 | class Data2VecAudio(BaseEmbedding): 16 | """Generate audio embedding for given audio using pretrained models from Data2Vec. 17 | 18 | :param model: model name, defaults to 'facebook/data2vec-audio-base-960h'. 19 | :type model: str 20 | 21 | Example: 22 | .. code-block:: python 23 | 24 | from gptcache.embedding import Data2VecAudio 25 | 26 | audio_file = 'test.wav' 27 | encoder = Data2VecAudio(model='facebook/data2vec-audio-base-960h') 28 | embed = encoder.to_embeddings(audio_file) 29 | """ 30 | def __init__(self, model_name = "facebook/data2vec-audio-base-960h"): 31 | self.model = Data2VecAudioModel.from_pretrained(model_name) 32 | self.processor = Wav2Vec2Processor.from_pretrained(model_name) 33 | self.__dimension = self.model.config.hidden_size 34 | self.sr = self.processor.feature_extractor.sampling_rate 35 | 36 | def to_embeddings(self, data, **_): 37 | """Generate embedding given text input 38 | 39 | :param data: path to audio file. 40 | :type data: str 41 | 42 | :return: a text embedding in shape of (dim,). 43 | """ 44 | audio = self.load_audio(data, self.sr) 45 | inputs = self.processor(audio, sampling_rate=self.sr, return_tensors="pt") 46 | with torch.no_grad(): 47 | outputs = self.model(**inputs) 48 | last_hidden_states = outputs.last_hidden_state 49 | feat = last_hidden_states[:,-1,:].flatten().detach().cpu().numpy() 50 | return np.array(feat).astype("float32") 51 | 52 | def load_audio(self, audio_path, target_sr): 53 | waveform, sample_rate = torchaudio.load(audio_path) 54 | waveform = torch.mean(waveform, axis=0) 55 | transform = torchaudio.transforms.Resample(sample_rate, target_sr) 56 | waveform = transform(waveform) 57 | return waveform 58 | 59 | 60 | @property 61 | def dimension(self): 62 | """Embedding dimension. 63 | 64 | :return: embedding dimension 65 | """ 66 | return self.__dimension 67 | -------------------------------------------------------------------------------- /gptcache/embedding/fasttext.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import os 3 | 4 | from gptcache.utils import import_fasttext 5 | from gptcache.embedding.base import BaseEmbedding 6 | 7 | import_fasttext() 8 | 9 | import fasttext.util # pylint: disable=C0413 10 | 11 | 12 | class FastText(BaseEmbedding): 13 | """Generate sentence embedding for given text using pretrained models of different languages from fastText. 14 | 15 | :param model: model name, defaults to 'en'. 16 | :type model: str 17 | :param dim: reduced dimension of embedding. If this parameter is not provided, the embedding dimension (300) will not change. 18 | :type dim: int 19 | 20 | Example: 21 | .. code-block:: python 22 | 23 | from gptcache.embedding import FastText 24 | 25 | test_sentence = 'Hello, world.' 26 | encoder = FastText(model='en', dim=100) 27 | embed = encoder.to_embeddings(test_sentence) 28 | """ 29 | 30 | def __init__(self, model: str = "en", dim: int = None): 31 | self.model_path = os.path.abspath(fasttext.util.download_model(model)) 32 | self.ft = fasttext.load_model(self.model_path) 33 | 34 | if dim: 35 | fasttext.util.reduce_model(self.ft, dim) 36 | self.__dimension = self.ft.get_dimension() 37 | 38 | def to_embeddings(self, data, **_): 39 | """Generate embedding given text input 40 | 41 | :param data: text in string. 42 | :type data: str 43 | 44 | :return: a text embedding in shape of (dim,). 45 | """ 46 | assert isinstance(data, str), "Only allow string as input." 47 | emb = self.ft.get_sentence_vector(data) 48 | return np.array(emb).astype("float32") 49 | 50 | @property 51 | def dimension(self): 52 | """Embedding dimension. 53 | 54 | :return: embedding dimension 55 | """ 56 | return self.__dimension 57 | -------------------------------------------------------------------------------- /gptcache/embedding/langchain.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | from gptcache.embedding.base import BaseEmbedding 4 | from gptcache.utils import import_langchain 5 | 6 | import_langchain() 7 | 8 | from langchain.embeddings.base import Embeddings # pylint: disable=C0413 9 | 10 | 11 | class LangChain(BaseEmbedding): 12 | """Generate text embedding for given text using LangChain 13 | 14 | :param embeddings: the LangChain Embeddings object. 15 | :type embeddings: Embeddings 16 | :param dimension: The vector dimension after embedding is calculated by calling embed once by default. 17 | If you confirm the dimension, you can assign a value to this parameter to reduce this request. 18 | :type dimension: int 19 | 20 | Example: 21 | .. code-block:: python 22 | 23 | from gptcache.embedding import LangChain 24 | from langchain.embeddings.openai import OpenAIEmbeddings 25 | 26 | test_sentence = 'Hello, world.' 27 | embeddings = OpenAIEmbeddings(model="your-embeddings-deployment-name") 28 | encoder = LangChain(embeddings=embeddings) 29 | embed = encoder.to_embeddings(test_sentence) 30 | """ 31 | 32 | def __init__(self, embeddings: Embeddings, dimension: int = 0): 33 | self._embeddings: Embeddings = embeddings 34 | self._dimension: int = ( 35 | dimension if dimension != 0 else len(self._embeddings.embed_query("foo")) 36 | ) 37 | 38 | def to_embeddings(self, data, **kwargs): 39 | vector_data = self._embeddings.embed_query(data) 40 | return np.array(vector_data).astype("float32") 41 | 42 | @property 43 | def dimension(self) -> int: 44 | return self._dimension 45 | -------------------------------------------------------------------------------- /gptcache/embedding/openai.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | import numpy as np 4 | 5 | from gptcache.embedding.base import BaseEmbedding 6 | from gptcache.utils import import_openai 7 | 8 | import_openai() 9 | 10 | import openai # pylint: disable=C0413 11 | 12 | class OpenAI(BaseEmbedding): 13 | """Generate text embedding for given text using OpenAI. 14 | 15 | :param model: model name, defaults to 'text-embedding-ada-002'. 16 | :type model: str 17 | :param api_key: OpenAI API Key. When the parameter is not specified, it will load the key by default if it is available. 18 | :type api_key: str 19 | 20 | Example: 21 | .. code-block:: python 22 | 23 | from gptcache.embedding import OpenAI 24 | 25 | test_sentence = 'Hello, world.' 26 | encoder = OpenAI(api_key='your_openai_key') 27 | embed = encoder.to_embeddings(test_sentence) 28 | """ 29 | 30 | def __init__(self, model: str = "text-embedding-ada-002", api_key: str = None, api_base: str = None): 31 | if not api_key: 32 | if openai.api_key: 33 | api_key = openai.api_key 34 | else: 35 | api_key = os.getenv("OPENAI_API_KEY") 36 | if not api_base: 37 | if openai.api_base: 38 | api_base = openai.api_base 39 | else: 40 | api_base = os.getenv("OPENAI_API_BASE") 41 | openai.api_key = api_key 42 | self.api_base = api_base # don't override all of openai as we may just want to override for say embeddings 43 | self.model = model 44 | if model in self.dim_dict(): 45 | self.__dimension = self.dim_dict()[model] 46 | else: 47 | self.__dimension = None 48 | 49 | def to_embeddings(self, data, **_): 50 | """Generate embedding given text input 51 | 52 | :param data: text in string. 53 | :type data: str 54 | 55 | :return: a text embedding in shape of (dim,). 56 | """ 57 | sentence_embeddings = openai.Embedding.create(model=self.model, input=data, api_base=self.api_base) 58 | return np.array(sentence_embeddings["data"][0]["embedding"]).astype("float32") 59 | 60 | @property 61 | def dimension(self): 62 | """Embedding dimension. 63 | 64 | :return: embedding dimension 65 | """ 66 | if not self.__dimension: 67 | foo_emb = self.to_embeddings("foo") 68 | self.__dimension = len(foo_emb) 69 | return self.__dimension 70 | 71 | @staticmethod 72 | def dim_dict(): 73 | return {"text-embedding-ada-002": 1536} 74 | -------------------------------------------------------------------------------- /gptcache/embedding/paddlenlp.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | from gptcache.embedding.base import BaseEmbedding 4 | from gptcache.utils import import_paddlenlp, import_paddle 5 | 6 | import_paddle() 7 | import_paddlenlp() 8 | 9 | 10 | import paddle # pylint: disable=C0413 11 | from paddlenlp.transformers import AutoModel, AutoTokenizer # pylint: disable=C0413 12 | 13 | class PaddleNLP(BaseEmbedding): 14 | """Generate sentence embedding for given text using pretrained models from PaddleNLP transformers. 15 | 16 | :param model: model name, defaults to 'ernie-3.0-medium-zh'. 17 | :type model: str 18 | 19 | Example: 20 | .. code-block:: python 21 | 22 | from gptcache.embedding import PaddleNLP 23 | 24 | test_sentence = 'Hello, world.' 25 | encoder = PaddleNLP(model='ernie-3.0-medium-zh') 26 | embed = encoder.to_embeddings(test_sentence) 27 | """ 28 | 29 | def __init__(self, model: str = "ernie-3.0-medium-zh"): 30 | self.model = AutoModel.from_pretrained(model) 31 | self.model.eval() 32 | 33 | self.tokenizer = AutoTokenizer.from_pretrained(model) 34 | if not self.tokenizer.pad_token: 35 | self.tokenizer.pad_token = "" 36 | self.__dimension = None 37 | 38 | def to_embeddings(self, data, **_): 39 | """Generate embedding given text input 40 | 41 | :param data: text in string. 42 | :type data: str 43 | 44 | :return: a text embedding in shape of (dim,). 45 | """ 46 | if not isinstance(data, list): 47 | data = [data] 48 | inputs = self.tokenizer( 49 | data, padding=True, truncation=True, return_tensors="pd" 50 | ) 51 | outs = self.model(**inputs)[0] 52 | emb = self.post_proc(outs, inputs).squeeze(0).detach().numpy() 53 | return np.array(emb).astype("float32") 54 | 55 | def post_proc(self, token_embeddings, inputs): 56 | attention_mask = paddle.ones(inputs["token_type_ids"].shape) 57 | input_mask_expanded = ( 58 | attention_mask.unsqueeze(-1).expand(token_embeddings.shape).astype("float32") 59 | ) 60 | sentence_embs = paddle.sum( 61 | token_embeddings * input_mask_expanded, 1 62 | ) / paddle.clip(input_mask_expanded.sum(1), min=1e-9) 63 | return sentence_embs 64 | 65 | @property 66 | def dimension(self): 67 | """Embedding dimension. 68 | 69 | :return: embedding dimension 70 | """ 71 | if not self.__dimension: 72 | self.__dimension = len(self.to_embeddings("foo")) 73 | return self.__dimension 74 | 75 | -------------------------------------------------------------------------------- /gptcache/embedding/rwkv.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | from gptcache.embedding.base import BaseEmbedding 4 | from gptcache.utils import import_huggingface 5 | 6 | import_huggingface() 7 | 8 | from transformers import AutoTokenizer, RwkvModel # pylint: disable=C0413 9 | 10 | 11 | class Rwkv(BaseEmbedding): 12 | """Generate sentence embedding for given text using RWKV models. 13 | 14 | :param model: model name, defaults to 'sgugger/rwkv-430M-pile'. Check 15 | https://huggingface.co/docs/transformers/model_doc/rwkv for more avaliable models. 16 | :type model: str 17 | 18 | Example: 19 | .. code-block:: python 20 | 21 | from gptcache.embedding import Rwkv 22 | 23 | test_sentence = 'Hello, world.' 24 | encoder = Rwkv(model='sgugger/rwkv-430M-pile') 25 | embed = encoder.to_embeddings(test_sentence) 26 | """ 27 | 28 | def __init__(self, model: str = "sgugger/rwkv-430M-pile"): 29 | self.model = RwkvModel.from_pretrained(model) 30 | self.model.eval() 31 | 32 | self.tokenizer = AutoTokenizer.from_pretrained(model) 33 | try: 34 | self.__dimension = self.model.config.hidden_size 35 | except Exception: # pylint: disable=W0703 36 | from transformers import AutoConfig # pylint: disable=C0415 37 | 38 | config = AutoConfig.from_pretrained(model) 39 | self.__dimension = config.hidden_size 40 | 41 | def to_embeddings(self, data, **_): 42 | """Generate embedding given text input 43 | 44 | :param data: text in string. 45 | :type data: str 46 | 47 | :return: a text embedding in shape of (dim,). 48 | """ 49 | inputs = self.tokenizer(data, return_tensors="pt") 50 | outputs = self.model(inputs["input_ids"]) 51 | emb = outputs.last_hidden_state[0, 0, :].detach().numpy() 52 | return np.array(emb).astype("float32") 53 | 54 | @property 55 | def dimension(self): 56 | """Embedding dimension. 57 | 58 | :return: embedding dimension 59 | """ 60 | return self.__dimension 61 | -------------------------------------------------------------------------------- /gptcache/embedding/sbert.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from gptcache.utils import import_sbert 3 | from gptcache.embedding.base import BaseEmbedding 4 | 5 | import_sbert() 6 | 7 | from sentence_transformers import SentenceTransformer # pylint: disable=C0413 8 | 9 | 10 | class SBERT(BaseEmbedding): 11 | """Generate sentence embedding for given text using pretrained models of Sentence Transformers. 12 | 13 | :param model: model name, defaults to 'all-MiniLM-L6-v2'. 14 | :type model: str 15 | 16 | Example: 17 | .. code-block:: python 18 | 19 | from gptcache.embedding import SBERT 20 | 21 | test_sentence = 'Hello, world.' 22 | encoder = SBERT('all-MiniLM-L6-v2') 23 | embed = encoder.to_embeddings(test_sentence) 24 | """ 25 | 26 | def __init__(self, model: str = "all-MiniLM-L6-v2"): 27 | self.model = SentenceTransformer(model) 28 | self.model.eval() 29 | self.__dimension = None 30 | 31 | def to_embeddings(self, data, **_): 32 | """Generate embedding given text input 33 | 34 | :param data: text in string. 35 | :type data: str 36 | 37 | :return: a text embedding in shape of (dim,). 38 | """ 39 | if not isinstance(data, list): 40 | data = [data] 41 | emb = self.model.encode(data).squeeze(0) 42 | 43 | if not self.__dimension: 44 | self.__dimension = len(emb) 45 | return np.array(emb).astype("float32") 46 | 47 | @property 48 | def dimension(self): 49 | """Embedding dimension. 50 | 51 | :return: embedding dimension 52 | """ 53 | if not self.__dimension: 54 | self.__dimension = len(self.to_embeddings("foo")) 55 | return self.__dimension 56 | -------------------------------------------------------------------------------- /gptcache/embedding/string.py: -------------------------------------------------------------------------------- 1 | def to_embeddings(data, **_): 2 | """Nothing to do, return the origin data""" 3 | return data 4 | -------------------------------------------------------------------------------- /gptcache/embedding/vit.py: -------------------------------------------------------------------------------- 1 | from gptcache.utils import import_huggingface, import_torch, import_torchvision 2 | from gptcache.embedding.base import BaseEmbedding 3 | 4 | import_torch() 5 | import_huggingface() 6 | import_torchvision() 7 | 8 | import torch # pylint: disable=C0413 9 | from transformers import AutoImageProcessor # pylint: disable=C0413 10 | from transformers import ViTModel # pylint: disable=C0413 11 | 12 | 13 | class ViT(BaseEmbedding): 14 | """Generate sentence embedding for given text using pretrained models from Huggingface transformers. 15 | 16 | :param model: model name, defaults to 'google/vit-base-patch16-384'. 17 | :type model: str 18 | 19 | Example: 20 | .. code-block:: python 21 | 22 | import io 23 | from PIL import Image 24 | from gptcache.embedding import ImageEmbedding 25 | 26 | def prepare_image(image_data: str = None): 27 | if not image_data: 28 | image_data = io.BytesIO() 29 | Image.new('RGB', (244, 244), color=(255, 0, 0)).save(image_data, format='JPEG') 30 | image_data.seek(0) 31 | image = Image.open(image_data) 32 | return image 33 | 34 | image = prepare_image() 35 | encoder = ImageEmbeddings(model="google/vit-base-patch16-384") 36 | embed = encoder.to_embeddings(image) 37 | """ 38 | 39 | def __init__(self, model: str = "google/vit-base-patch16-384"): 40 | 41 | self.model_name = model 42 | model = ViTModel.from_pretrained(model) 43 | self.model = model.eval() 44 | config = self.model.config 45 | self.__dimension = config.hidden_size 46 | 47 | def to_embeddings(self, data, **__): 48 | """Generate embedding given text input 49 | 50 | :param data: text in string. 51 | :type data: str 52 | 53 | :return: a text embedding in shape of (dim,). 54 | """ 55 | inputs = self.preprocess(data) 56 | 57 | with torch.no_grad(): 58 | outputs = self.model(**inputs) 59 | 60 | last_hidden_states = outputs.last_hidden_state 61 | features = last_hidden_states[:, 0, :] 62 | features = features.squeeze() 63 | return features.detach().numpy() 64 | 65 | def preprocess(self, data): 66 | image_processor = AutoImageProcessor.from_pretrained(self.model_name) 67 | inputs = image_processor(data, return_tensors="pt") 68 | return inputs 69 | 70 | @property 71 | def dimension(self): 72 | """Embedding dimension. 73 | 74 | :return: embedding dimension 75 | """ 76 | return self.__dimension 77 | 78 | -------------------------------------------------------------------------------- /gptcache/manager/__init__.py: -------------------------------------------------------------------------------- 1 | from gptcache.manager.scalar_data import CacheBase 2 | from gptcache.manager.vector_data import VectorBase 3 | from gptcache.manager.object_data import ObjectBase 4 | from gptcache.manager.factory import get_data_manager, manager_factory 5 | -------------------------------------------------------------------------------- /gptcache/manager/eviction/__init__.py: -------------------------------------------------------------------------------- 1 | __all__ = ["EvictionBase"] 2 | 3 | from gptcache.utils.lazy_import import LazyImport 4 | 5 | eviction_manager = LazyImport( 6 | "eviction_manager", globals(), "gptcache.manager.eviction.manager" 7 | ) 8 | 9 | 10 | def EvictionBase(name: str, **kwargs): 11 | """Generate specific CacheStorage with the configuration. 12 | 13 | :param name: the name of the eviction, like: memory 14 | :type name: str 15 | 16 | :param policy: eviction strategy 17 | :type policy: str 18 | :param maxsize: the maxsize of cache data 19 | :type maxsize: int 20 | :param clean_size: will clean the size of data when the size of cache data reaches the max size 21 | :type clean_size: int 22 | :param on_evict: the function for cleaning the data in the store 23 | :type on_evict: Callable[[List[Any]], None] 24 | 25 | Example: 26 | .. code-block:: python 27 | 28 | from gptcache.manager import EvictionBase 29 | 30 | cache_base = EvictionBase('memory', policy='lru', maxsize=10, clean_size=2, on_evict=lambda x: print(x)) 31 | """ 32 | return eviction_manager.EvictionBase.get(name, **kwargs) 33 | -------------------------------------------------------------------------------- /gptcache/manager/eviction/base.py: -------------------------------------------------------------------------------- 1 | from abc import ABCMeta, abstractmethod 2 | from typing import Any, List 3 | 4 | 5 | class EvictionBase(metaclass=ABCMeta): 6 | """ 7 | Eviction base. 8 | """ 9 | 10 | @abstractmethod 11 | def put(self, objs: List[Any]): 12 | pass 13 | 14 | @abstractmethod 15 | def get(self, obj: Any): 16 | pass 17 | 18 | @property 19 | @abstractmethod 20 | def policy(self) -> str: 21 | pass 22 | -------------------------------------------------------------------------------- /gptcache/manager/eviction/distributed_cache.py: -------------------------------------------------------------------------------- 1 | # pylint: disable=wrong-import-position 2 | from abc import ABC, abstractmethod 3 | from typing import List 4 | 5 | from gptcache.manager.eviction.base import EvictionBase 6 | 7 | 8 | class DistributedEviction(EvictionBase, ABC): 9 | """ 10 | Base class for Distributed Eviction Strategy. 11 | """ 12 | 13 | @abstractmethod 14 | def put(self, objs: List[str]): 15 | pass 16 | 17 | @abstractmethod 18 | def get(self, obj: str): 19 | pass 20 | 21 | @property 22 | @abstractmethod 23 | def policy(self) -> str: 24 | pass 25 | 26 | 27 | class NoOpEviction(EvictionBase): 28 | """eviction: No Op Eviction Strategy. This is used when Eviction is managed internally 29 | by the Databases such as Redis or memcached and no eviction is required to perform. 30 | 31 | """ 32 | 33 | @property 34 | def policy(self) -> str: 35 | return "" 36 | 37 | def __init__(self, **kwargs): 38 | pass 39 | 40 | def put(self, objs: List[str]): 41 | pass 42 | 43 | def get(self, obj: str): 44 | pass 45 | -------------------------------------------------------------------------------- /gptcache/manager/eviction/manager.py: -------------------------------------------------------------------------------- 1 | # pylint: disable=import-outside-toplevel 2 | from typing import Callable, List, Any 3 | 4 | from gptcache.utils.error import NotFoundError 5 | 6 | 7 | class EvictionBase: 8 | """ 9 | EvictionBase to evict the cache data. 10 | """ 11 | 12 | def __init__(self): 13 | raise EnvironmentError( 14 | "EvictionBase is designed to be instantiated, " 15 | "please using the `EvictionBase.get(name, policy, maxsize, clean_size)`." 16 | ) 17 | 18 | @staticmethod 19 | def get( 20 | name: str, 21 | policy: str = "LRU", 22 | maxsize: int = 1000, 23 | clean_size: int = 0, 24 | on_evict: Callable[[List[Any]], None] = None, 25 | **kwargs 26 | ): 27 | if not clean_size: 28 | clean_size = int(maxsize * 0.2) 29 | if name in "memory": 30 | from gptcache.manager.eviction.memory_cache import MemoryCacheEviction 31 | 32 | eviction_base = MemoryCacheEviction( 33 | policy, maxsize, clean_size, on_evict, **kwargs 34 | ) 35 | return eviction_base 36 | if name == "redis": 37 | from gptcache.manager.eviction.redis_eviction import RedisCacheEviction 38 | if policy == "LRU": 39 | policy = None 40 | eviction_base = RedisCacheEviction(policy=policy, **kwargs) 41 | return eviction_base 42 | if name == "no_op_eviction": 43 | from gptcache.manager.eviction.distributed_cache import NoOpEviction 44 | eviction_base = NoOpEviction() 45 | return eviction_base 46 | 47 | else: 48 | raise NotFoundError("eviction base", name) 49 | -------------------------------------------------------------------------------- /gptcache/manager/eviction/memory_cache.py: -------------------------------------------------------------------------------- 1 | from typing import Any, Callable, List 2 | 3 | import cachetools 4 | 5 | from gptcache.manager.eviction.base import EvictionBase 6 | 7 | 8 | def popitem_wrapper(func, wrapper_func, clean_size): 9 | def wrapper(*args, **kwargs): 10 | keys = [] 11 | try: 12 | keys = [func(*args, **kwargs)[0] for _ in range(clean_size)] 13 | except KeyError: 14 | pass 15 | wrapper_func(keys) 16 | 17 | return wrapper 18 | 19 | 20 | class MemoryCacheEviction(EvictionBase): 21 | """eviction: Memory Cache 22 | 23 | :param policy: eviction strategy 24 | :type policy: str 25 | :param maxsize: the maxsize of cache data 26 | :type maxsize: int 27 | :param clean_size: will clean the size of data when the size of cache data reaches the max size 28 | :type clean_size: int 29 | :param on_evict: the function for cleaning the data in the store 30 | :type on_evict: Callable[[List[Any]], None] 31 | 32 | 33 | """ 34 | 35 | def __init__( 36 | self, 37 | policy: str = "LRU", 38 | maxsize: int = 1000, 39 | clean_size: int = 0, 40 | on_evict: Callable[[List[Any]], None] = None, 41 | **kwargs, 42 | ): 43 | self._policy = policy.upper() 44 | if self._policy == "LRU": 45 | self._cache = cachetools.LRUCache(maxsize=maxsize, **kwargs) 46 | elif self._policy == "LFU": 47 | self._cache = cachetools.LFUCache(maxsize=maxsize, **kwargs) 48 | elif self._policy == "FIFO": 49 | self._cache = cachetools.FIFOCache(maxsize=maxsize, **kwargs) 50 | elif self._policy == "RR": 51 | self._cache = cachetools.RRCache(maxsize=maxsize, **kwargs) 52 | else: 53 | raise ValueError(f"Unknown policy {policy}") 54 | 55 | self._cache.popitem = popitem_wrapper(self._cache.popitem, on_evict, clean_size) 56 | 57 | def put(self, objs: List[Any]): 58 | for obj in objs: 59 | self._cache[obj] = True 60 | 61 | def get(self, obj: Any): 62 | return self._cache.get(obj) 63 | 64 | @property 65 | def policy(self) -> str: 66 | return self._policy 67 | -------------------------------------------------------------------------------- /gptcache/manager/eviction_manager.py: -------------------------------------------------------------------------------- 1 | class EvictionManager: 2 | """ 3 | EvictionManager to manager the eviction policy. 4 | 5 | :param scalar_storage: CacheStorage to manager the scalar data. 6 | :type scalar_storage: :class:`CacheStorage` 7 | :param vector_base: VectorBase to manager the vector data. 8 | :type vector_base: :class:`VectorBase` 9 | """ 10 | 11 | MAX_MARK_COUNT = 5000 12 | MAX_MARK_RATE = 0.1 13 | BATCH_SIZE = 100000 14 | REBUILD_CONDITION = 5 15 | 16 | def __init__(self, scalar_storage, vector_base): 17 | self._scalar_storage = scalar_storage 18 | self._vector_base = vector_base 19 | self.delete_count = 0 20 | 21 | def check_evict(self): 22 | mark_count = self._scalar_storage.count(state=-1) 23 | all_count = self._scalar_storage.count(is_all=True) 24 | if ( 25 | mark_count > self.MAX_MARK_COUNT 26 | or mark_count / all_count > self.MAX_MARK_RATE 27 | ): 28 | return True 29 | return False 30 | 31 | def delete(self): 32 | mark_ids = self._scalar_storage.get_ids(deleted=True) 33 | self._scalar_storage.clear_deleted_data() 34 | self._vector_base.delete(mark_ids) 35 | self.delete_count += 1 36 | if self.delete_count >= self.REBUILD_CONDITION: 37 | self.rebuild() 38 | 39 | def rebuild(self): 40 | self._scalar_storage.clear_deleted_data() 41 | ids = self._scalar_storage.get_ids(deleted=False) 42 | self._vector_base.rebuild(ids) 43 | self.delete_count = 0 44 | 45 | def soft_evict(self, marked_keys): 46 | self._scalar_storage.mark_deleted(marked_keys) 47 | -------------------------------------------------------------------------------- /gptcache/manager/object_data/__init__.py: -------------------------------------------------------------------------------- 1 | __all__ = ["ObjectBase"] 2 | 3 | from gptcache.utils.lazy_import import LazyImport 4 | 5 | object_manager = LazyImport( 6 | "object_manager", globals(), "gptcache.manager.object_data.manager" 7 | ) 8 | 9 | 10 | def ObjectBase(name: str, **kwargs): 11 | """Generate specific ObjectStorage with the configuration. For example, setting for 12 | `ObjectBase` (with `name`) to manage LocalObjectStorage, S3 object storage. 13 | """ 14 | return object_manager.ObjectBase.get(name, **kwargs) 15 | -------------------------------------------------------------------------------- /gptcache/manager/object_data/base.py: -------------------------------------------------------------------------------- 1 | from abc import ABC, abstractmethod 2 | from typing import Any, List 3 | 4 | 5 | class ObjectBase(ABC): 6 | """ 7 | Object storage base. 8 | """ 9 | 10 | @abstractmethod 11 | def put(self, obj: Any) -> str: 12 | pass 13 | 14 | @abstractmethod 15 | def get(self, obj: str) -> Any: 16 | pass 17 | 18 | @abstractmethod 19 | def get_access_link(self, obj: str) -> str: 20 | pass 21 | 22 | @abstractmethod 23 | def delete(self, to_delete: List[str]): 24 | pass 25 | -------------------------------------------------------------------------------- /gptcache/manager/object_data/local_storage.py: -------------------------------------------------------------------------------- 1 | from typing import Any, List 2 | import os 3 | import uuid 4 | from pathlib import Path 5 | from gptcache.manager.object_data.base import ObjectBase 6 | from gptcache.utils.log import gptcache_log 7 | 8 | 9 | class LocalObjectStorage(ObjectBase): 10 | """Local object storage 11 | """ 12 | 13 | def __init__(self, local_root: str): 14 | self._local_root = Path(local_root) 15 | self._local_root.mkdir(exist_ok=True) 16 | 17 | def put(self, obj: Any) -> str: 18 | f_path = self._local_root / str(uuid.uuid4()) 19 | with open(f_path, "wb") as f: 20 | f.write(obj) 21 | return str(f_path.absolute()) 22 | 23 | def get(self, obj: str) -> Any: 24 | try: 25 | with open(obj, "rb") as f: 26 | return f.read() 27 | except Exception: # pylint: disable=broad-except 28 | return None 29 | 30 | def get_access_link(self, obj: str, _: int = 3600): 31 | return obj 32 | 33 | def delete(self, to_delete: List[str]): 34 | assert isinstance(to_delete, list) 35 | for obj in to_delete: 36 | try: 37 | os.remove(obj) 38 | except Exception: # pylint: disable=broad-except 39 | gptcache_log.warning("Can not find obj: %s", obj) 40 | pass 41 | -------------------------------------------------------------------------------- /gptcache/manager/object_data/manager.py: -------------------------------------------------------------------------------- 1 | from gptcache.utils.error import NotFoundError 2 | 3 | 4 | class ObjectBase: 5 | """ 6 | ObjectBase to manager the object storage. 7 | 8 | Generate specific ObjectStorage with the configuration. For example, setting for 9 | `ObjectBase` (with `name`) to manage LocalObjectStorage, S3 object storage. 10 | 11 | :param name: the name of the object storage, it is support 'local', 's3'. 12 | :type name: str 13 | :param path: the cache root of the LocalObjectStorage. 14 | :type path: str 15 | 16 | :param bucket: the bucket of s3. 17 | :type bucket: str 18 | :param path_prefix: s3 object prefix. 19 | :type path_prefix: str 20 | :param access_key: the access_key of s3. 21 | :type access_key: str 22 | :param secret_key: the secret_key of s3. 23 | :type secret_key: str 24 | 25 | :return: ObjectStorage. 26 | 27 | Example: 28 | .. code-block:: python 29 | 30 | from gptcache.manager import ObjectBase 31 | 32 | obj_storage = ObjectBase('local', path='./') 33 | """ 34 | 35 | def __init__(self): 36 | raise EnvironmentError( 37 | "CacheBase is designed to be instantiated, please using the `CacheBase.get(name)`." 38 | ) 39 | 40 | @staticmethod 41 | def get(name, **kwargs): 42 | if name == "local": 43 | from gptcache.manager.object_data.local_storage import LocalObjectStorage # pylint: disable=import-outside-toplevel 44 | object_base = LocalObjectStorage(kwargs.get("path", "./local_obj")) 45 | elif name == "s3": 46 | from gptcache.manager.object_data.s3_storage import S3Storage # pylint: disable=import-outside-toplevel 47 | object_base = S3Storage(kwargs.get("path_prefix"), kwargs.get("bucket"), 48 | kwargs.get("access_key"), kwargs.get("secret_key"), 49 | kwargs.get("endpoint")) 50 | else: 51 | raise NotFoundError("object store", name) 52 | return object_base 53 | -------------------------------------------------------------------------------- /gptcache/manager/object_data/s3_storage.py: -------------------------------------------------------------------------------- 1 | from typing import Any, List 2 | import uuid 3 | import os 4 | 5 | from gptcache.manager.object_data.base import ObjectBase 6 | 7 | from gptcache.utils import import_boto3 8 | from gptcache.utils.log import gptcache_log 9 | 10 | import_boto3() 11 | import boto3 # pylint: disable=wrong-import-position 12 | 13 | 14 | class S3Storage(ObjectBase): 15 | """S3 storage 16 | """ 17 | 18 | def __init__(self, bucket: str, path_prefix: str, access_key: str, secret_key: str, endpoint: str = None): 19 | self._session = boto3.Session( 20 | aws_access_key_id=access_key, 21 | aws_secret_access_key=secret_key 22 | ) 23 | self._s3 = self._session.resource("s3") 24 | self._bucket = bucket 25 | self._path_prefix = path_prefix 26 | self._endpoint = endpoint 27 | 28 | def put(self, obj: Any) -> str: 29 | f_path = os.path.join(self._path_prefix, str(uuid.uuid4())) 30 | self._s3.Bucket(self._bucket).put_object(Key=str(f_path), Body=obj) 31 | return f_path 32 | 33 | def get(self, obj: str) -> Any: 34 | try: 35 | return self._s3.Bucket(self._bucket).Object(obj).get()["Body"].read() 36 | except Exception: # pylint: disable=broad-except 37 | gptcache_log.error("obj:%s not exist", obj) 38 | return None 39 | 40 | def get_access_link(self, obj: str, expires: int = 3600) -> str: 41 | s3 = self._session.client("s3") 42 | link = s3.generate_presigned_url( 43 | ClientMethod="get_object", 44 | ExpiresIn=expires, 45 | Params={ 46 | "Bucket": self._bucket, 47 | "Key": obj 48 | } 49 | ) 50 | if self._endpoint: 51 | link = link.replace("s3.amazonaws.com/" + self._bucket, self._endpoint) 52 | return link 53 | 54 | def delete(self, to_delete: List[str]): 55 | self._s3.Bucket(self._bucket).delete_objects(Delete={"Objects": [{"Key": k} for k in to_delete]}) 56 | -------------------------------------------------------------------------------- /gptcache/manager/scalar_data/__init__.py: -------------------------------------------------------------------------------- 1 | __all__ = ["CacheBase"] 2 | 3 | from gptcache.utils.lazy_import import LazyImport 4 | 5 | scalar_manager = LazyImport( 6 | "scalar_manager", globals(), "gptcache.manager.scalar_data.manager" 7 | ) 8 | 9 | 10 | def CacheBase(name: str, **kwargs): 11 | """Generate specific CacheStorage with the configuration. For example, setting for 12 | `SQLDataBase` (with `name`, `sql_url` and `table_name` params) to manage SQLite, PostgreSQL, MySQL, MariaDB, SQL Server and Oracle. 13 | """ 14 | return scalar_manager.CacheBase.get(name, **kwargs) 15 | -------------------------------------------------------------------------------- /gptcache/manager/vector_data/__init__.py: -------------------------------------------------------------------------------- 1 | __all__ = ["VectorBase"] 2 | 3 | from gptcache.utils.lazy_import import LazyImport 4 | 5 | vector_manager = LazyImport( 6 | "vector_manager", globals(), "gptcache.manager.vector_data.manager" 7 | ) 8 | 9 | 10 | def VectorBase(name: str, **kwargs): 11 | """Generate specific VectorBase with the configuration. 12 | """ 13 | return vector_manager.VectorBase.get(name, **kwargs) 14 | -------------------------------------------------------------------------------- /gptcache/manager/vector_data/base.py: -------------------------------------------------------------------------------- 1 | from abc import ABC, abstractmethod 2 | from dataclasses import dataclass 3 | from typing import List, Optional, Union 4 | 5 | import numpy as np 6 | 7 | 8 | @dataclass 9 | class VectorData: 10 | id: int 11 | data: np.ndarray 12 | 13 | 14 | class VectorBase(ABC): 15 | """VectorBase: base vector store interface""" 16 | 17 | @abstractmethod 18 | def mul_add(self, datas: List[VectorData]): 19 | pass 20 | 21 | @abstractmethod 22 | def search(self, data: np.ndarray, top_k: int): 23 | pass 24 | 25 | @abstractmethod 26 | def rebuild(self, ids=None) -> bool: 27 | pass 28 | 29 | @abstractmethod 30 | def delete(self, ids) -> bool: 31 | pass 32 | 33 | def flush(self): 34 | pass 35 | 36 | def close(self): 37 | pass 38 | 39 | def get_embeddings(self, data_id: Union[int, str]) -> Optional[np.ndarray]: 40 | raise NotImplementedError 41 | 42 | def update_embeddings(self, data_id: Union[int, str], emb: np.ndarray): 43 | pass 44 | -------------------------------------------------------------------------------- /gptcache/manager/vector_data/faiss.py: -------------------------------------------------------------------------------- 1 | import os 2 | from typing import List 3 | 4 | import numpy as np 5 | 6 | from gptcache.manager.vector_data.base import VectorBase, VectorData 7 | from gptcache.utils import import_faiss 8 | 9 | import_faiss() 10 | 11 | import faiss # pylint: disable=C0413 12 | 13 | 14 | class Faiss(VectorBase): 15 | """vector store: Faiss 16 | 17 | :param index_path: the path to Faiss index, defaults to 'faiss.index'. 18 | :type index_path: str 19 | :param dimension: the dimension of the vector, defaults to 0. 20 | :type dimension: int 21 | :param top_k: the number of the vectors results to return, defaults to 1. 22 | :type top_k: int 23 | """ 24 | 25 | def __init__(self, index_file_path, dimension, top_k): 26 | self._index_file_path = index_file_path 27 | self._dimension = dimension 28 | self._index = faiss.index_factory(self._dimension, "IDMap,Flat", faiss.METRIC_L2) 29 | self._top_k = top_k 30 | if os.path.isfile(index_file_path): 31 | self._index = faiss.read_index(index_file_path) 32 | 33 | def mul_add(self, datas: List[VectorData]): 34 | data_array, id_array = map(list, zip(*((data.data, data.id) for data in datas))) 35 | np_data = np.array(data_array).astype("float32") 36 | ids = np.array(id_array) 37 | self._index.add_with_ids(np_data, ids) 38 | 39 | def search(self, data: np.ndarray, top_k: int = -1): 40 | if self._index.ntotal == 0: 41 | return None 42 | if top_k == -1: 43 | top_k = self._top_k 44 | np_data = np.array(data).astype("float32").reshape(1, -1) 45 | dist, ids = self._index.search(np_data, top_k) 46 | ids = [int(i) for i in ids[0]] 47 | return list(zip(dist[0], ids)) 48 | 49 | def rebuild(self, ids=None): 50 | return True 51 | 52 | def delete(self, ids): 53 | ids_to_remove = np.array(ids) 54 | self._index.remove_ids(faiss.IDSelectorBatch(ids_to_remove.size, faiss.swig_ptr(ids_to_remove))) 55 | 56 | def flush(self): 57 | faiss.write_index(self._index, self._index_file_path) 58 | 59 | def close(self): 60 | self.flush() 61 | 62 | def count(self): 63 | return self._index.ntotal 64 | -------------------------------------------------------------------------------- /gptcache/processor/__init__.py: -------------------------------------------------------------------------------- 1 | from gptcache.processor.context.context import ContextProcess 2 | -------------------------------------------------------------------------------- /gptcache/processor/check_hit.py: -------------------------------------------------------------------------------- 1 | # pylint: disable=unused-argument 2 | def check_hit_session(cur_session_id: str, cache_session_ids: list, cache_questions: list, cache_answer: str): 3 | """ 4 | Check if the sesion result meets the hit requirement. 5 | 6 | :param cur_session_id: the name of the current session. 7 | :type cur_session_id: str 8 | :param cache_session_ids: a list of session names for caching the same content if you are using map as a data management method. 9 | Otherwise a list of session names for similar content and same answer. 10 | :type cache_session_ids: list 11 | :param cache_question: a list with one question which same as the you asked if you use a map as a data management method. 12 | Otherwise it is a list that is similar to the question you asked with the same answer, 13 | and it is correspondence with cache_session_ids. 14 | :type cache_question: list 15 | :param cache_answer: the content of the cached answer. 16 | :param cache_answer: str 17 | 18 | :return: True or False 19 | """ 20 | return cur_session_id not in cache_session_ids 21 | -------------------------------------------------------------------------------- /gptcache/processor/context/__init__.py: -------------------------------------------------------------------------------- 1 | from gptcache.utils.lazy_import import LazyImport 2 | 3 | summarization = LazyImport( 4 | "summarization_context", 5 | globals(), 6 | "gptcache.processor.context.summarization_context", 7 | ) 8 | selective = LazyImport( 9 | "selective_context", globals(), "gptcache.processor.context.selective_context" 10 | ) 11 | concat = LazyImport( 12 | "concat_context", globals(), "gptcache.processor.context.concat_context" 13 | ) 14 | 15 | 16 | __all__ = [ 17 | "SummarizationContextProcess", 18 | "SelectiveContextProcess", 19 | "ConcatContextProcess", 20 | ] 21 | 22 | 23 | def SummarizationContextProcess(model_name=None, tokenizer=None, target_length=512): 24 | return summarization.SummarizationContextProcess( 25 | model_name, tokenizer, target_length 26 | ) 27 | 28 | 29 | def SelectiveContextProcess( 30 | model_type: str = "gpt2", 31 | lang: str = "en", 32 | reduce_ratio: float = 0.35, 33 | reduce_level: str = "phrase", 34 | ): 35 | return selective.SelectiveContextProcess( 36 | model_type=model_type, 37 | lang=lang, 38 | reduce_ratio=reduce_ratio, 39 | reduce_level=reduce_level, 40 | ) 41 | 42 | 43 | def ConcatContextProcess(): 44 | return concat.ConcatContextProcess() 45 | -------------------------------------------------------------------------------- /gptcache/processor/context/concat_context.py: -------------------------------------------------------------------------------- 1 | from typing import Any, Dict 2 | 3 | from gptcache.processor import ContextProcess 4 | 5 | 6 | class ConcatContextProcess(ContextProcess): 7 | """A concat context processor simply concat the context. 8 | Generally used with rwkv embedding, because rwkv can input almost infinitely long 9 | 10 | Example: 11 | .. code-block:: python 12 | 13 | from gptcache.manager import manager_factory 14 | from gptcache.processor.context.concat_context import ConcatContextProcess 15 | 16 | context_process = ConcatContextProcess() 17 | rwkv_embedding = Rwkv() 18 | data_manager = manager_factory( 19 | "sqlite,faiss", 20 | vector_params={"dimension": rwkv_embedding.dimension}, 21 | ) 22 | cache.init( 23 | pre_embedding_func=context_process.pre_process, 24 | embedding_func=rwkv_embedding.to_embeddings, 25 | data_manager=data_manager, 26 | ) 27 | """ 28 | 29 | content: str = "" 30 | 31 | def __init__( 32 | self 33 | ): 34 | self.content = "" 35 | self.concat_content = "" 36 | 37 | def format_all_content(self, data: Dict[str, Any], **params: Dict[str, Any]): 38 | for query in data["messages"]: 39 | self.content += f"{query['role']}: {query['content']} \n" 40 | self.concat_content += query["content"] 41 | 42 | def process_all_content(self) -> (Any, Any): 43 | return self.content, self.concat_content 44 | -------------------------------------------------------------------------------- /gptcache/processor/context/context.py: -------------------------------------------------------------------------------- 1 | from abc import ABCMeta, abstractmethod 2 | from typing import Dict, Any 3 | 4 | 5 | class ContextProcess(metaclass=ABCMeta): 6 | """ContextProcess: the context process interfacer, which is used to pre-process the lang conversation. 7 | By the way, the GPTCache will acquire more information and get a more accurate embedding vector. 8 | 9 | Example: 10 | .. code-block:: python 11 | 12 | from gptcache.processor.context import SummarizationContextProcess 13 | 14 | context_process = SummarizationContextProcess() 15 | cache.init(pre_embedding_func=context_process.pre_process) 16 | """ 17 | 18 | @abstractmethod 19 | def format_all_content(self, data: Dict[str, Any], **params: Dict[str, Any]): 20 | """format all content of the llm request data as a string 21 | 22 | :param data: the user llm request data 23 | :type data: Dict[str, Any] 24 | """ 25 | pass 26 | 27 | @abstractmethod 28 | def process_all_content(self) -> (Any, Any): 29 | """process all content of the llm request data, for extracting key information in context. 30 | In order to achieve this goal, you can pass the summary model and so on 31 | """ 32 | pass 33 | 34 | def pre_process(self, data: Dict[str, Any], **params: Dict[str, Any]) -> (Any, Any): 35 | """ pre-process function, it's used as the GPTCache initialization param -- pre_embedding_func. 36 | 37 | :param data: the user llm request data 38 | :type data: Dict[str, Any] 39 | """ 40 | self.format_all_content(data, **params) 41 | return self.process_all_content() 42 | -------------------------------------------------------------------------------- /gptcache/processor/context/selective_context.py: -------------------------------------------------------------------------------- 1 | from typing import Any, Dict 2 | 3 | from gptcache.processor import ContextProcess 4 | from gptcache.utils import import_selective_context 5 | 6 | import_selective_context() 7 | 8 | from selective_context import SelectiveContext # pylint: disable=C0413 9 | 10 | 11 | class SelectiveContextProcess(ContextProcess): 12 | """A context processor for selecting context 13 | 14 | Need to download the corresponding model before use, the default English model is: en_core_web_sm 15 | 16 | `pip install spacy && python -m spacy download en_core_web_sm` 17 | 18 | :param model_type: the selective context model name, default value is 'gpt2' 19 | :type model_type: str 20 | :param lang: the content lang type, default value is 'en'. 21 | :type lang: str 22 | :param reduce_ratio: selective context ratio. The range for the value is between 0 and 1, with a default value of 0.35. 23 | :type reduce_ratio: float 24 | :param reduce_level: selective context level. The valid values include 'sent', 'phrase', and 'token', with the default value being 'phrase'. 25 | :type reduce_level: str 26 | 27 | more details: https://github.com/liyucheng09/Selective_Context 28 | 29 | Example: 30 | .. code-block:: python 31 | 32 | from gptcache.processor.context.selective_context import SelectiveContextProcess 33 | 34 | context_process = SelectiveContextProcess() 35 | cache.init(pre_embedding_func=context_process.pre_process) 36 | """ 37 | 38 | content: str = "" 39 | 40 | def __init__( 41 | self, 42 | model_type: str = "gpt2", 43 | lang: str = "en", 44 | reduce_ratio: float = 0.35, 45 | reduce_level: str = "phrase", 46 | ): 47 | self.sc = SelectiveContext(model_type=model_type, lang=lang) 48 | self.reduce_ratio = reduce_ratio 49 | self.reduce_level = reduce_level 50 | 51 | def format_all_content(self, data: Dict[str, Any], **params: Dict[str, Any]): 52 | for query in data["messages"]: 53 | self.content += f"{query['role']}: {query['content']} \n" 54 | 55 | def process_all_content(self) -> (Any, Any): 56 | selective_content, _ = self.sc( 57 | self.content, reduce_ratio=self.reduce_ratio, reduce_level=self.reduce_level 58 | ) 59 | return self.content, selective_content 60 | -------------------------------------------------------------------------------- /gptcache/similarity_evaluation/cohere_rerank.py: -------------------------------------------------------------------------------- 1 | from typing import Tuple, Dict, Any 2 | 3 | from gptcache.similarity_evaluation import SimilarityEvaluation 4 | from gptcache.utils import import_cohere 5 | 6 | import_cohere() 7 | 8 | import cohere # pylint: disable=C0413 9 | 10 | 11 | class CohereRerank(SimilarityEvaluation): 12 | """Use the Cohere Rerank API to evaluate relevance of question and answer. 13 | 14 | Reference: https://docs.cohere.com/reference/rerank-1 15 | 16 | :param model: model name, defaults to 'rerank-english-v2.0', and multilingual option: rerank-multilingual-v2.0. 17 | :type model: str 18 | :param api_key: cohere api key, defaults to None. 19 | :type api_key: str 20 | 21 | Example: 22 | .. code-block:: python 23 | 24 | from gptcache.similarity_evaluation import CohereRerankEvaluation 25 | 26 | evaluation = CohereRerankEvaluation() 27 | score = evaluation.evaluation( 28 | { 29 | 'question': 'What is the color of sky?' 30 | }, 31 | { 32 | 'answer': 'the color of sky is blue' 33 | } 34 | ) 35 | """ 36 | 37 | def __init__(self, model: str = "rerank-english-v2.0", api_key: str = None): 38 | self.co = cohere.Client(api_key) 39 | self.model = model 40 | 41 | def evaluation(self, src_dict: Dict[str, Any], cache_dict: Dict[str, Any], **kwargs) -> float: 42 | response = self.co.rerank( 43 | model=self.model, 44 | query=src_dict["question"], 45 | documents=cache_dict["answer"], 46 | top_n=1, 47 | ) 48 | if len(response.results) == 0: 49 | return 0 50 | return response.results[0].relevance_score 51 | 52 | def range(self) -> Tuple[float, float]: 53 | return 0.0, 1.0 54 | -------------------------------------------------------------------------------- /gptcache/similarity_evaluation/distance.py: -------------------------------------------------------------------------------- 1 | from typing import Tuple, Dict, Any 2 | 3 | from gptcache.similarity_evaluation import SimilarityEvaluation 4 | 5 | 6 | class SearchDistanceEvaluation(SimilarityEvaluation): 7 | """Using search distance to evaluate sentences pair similarity. 8 | 9 | This is the evaluator to compare two embeddings according to their distance computed in embedding retrieval stage. 10 | In the retrieval stage, `search_result` is the distance used for approximate nearest neighbor search and have been 11 | put into `cache_dict`. `max_distance` is used to bound this distance to make it between [0-`max_distance`]. `positive` is 12 | used to indicate this distance is directly proportional to the similarity of two entites. If `positive` is set `False`, 13 | `max_distance` will be used to substract this distance to get the final score. 14 | 15 | :param max_distance: the bound of maximum distance. 16 | :type max_distance: float 17 | :param positive: if the larger distance indicates more similar of two entities, It is True. Otherwise it is False. 18 | :type positive: bool 19 | 20 | Example: 21 | .. code-block:: python 22 | 23 | from gptcache.similarity_evaluation import SearchDistanceEvaluation 24 | 25 | evaluation = SearchDistanceEvaluation() 26 | score = evaluation.evaluation( 27 | {}, 28 | { 29 | "search_result": (1, None) 30 | } 31 | ) 32 | """ 33 | 34 | def __init__(self, max_distance=4.0, positive=False): 35 | self.max_distance = max_distance 36 | self.positive = positive 37 | 38 | def evaluation( 39 | self, src_dict: Dict[str, Any], cache_dict: Dict[str, Any], **_ 40 | ) -> float: 41 | """Evaluate the similarity score of pair. 42 | :param src_dict: the query dictionary to evaluate with cache. 43 | :type src_dict: Dict 44 | :param cache_dict: the cache dictionary. 45 | :type cache_dict: Dict 46 | 47 | :return: evaluation score. 48 | """ 49 | distance, _ = cache_dict["search_result"] 50 | if distance < 0: 51 | distance = 0 52 | elif distance > self.max_distance: 53 | distance = self.max_distance 54 | if self.positive: 55 | return distance 56 | return self.max_distance - distance 57 | 58 | def range(self) -> Tuple[float, float]: 59 | """Range of similarity score. 60 | 61 | :return: minimum and maximum of similarity score. 62 | """ 63 | return 0.0, self.max_distance 64 | -------------------------------------------------------------------------------- /gptcache/similarity_evaluation/exact_match.py: -------------------------------------------------------------------------------- 1 | from typing import Tuple, Dict, Any 2 | 3 | from gptcache.similarity_evaluation.similarity_evaluation import SimilarityEvaluation 4 | 5 | 6 | class ExactMatchEvaluation(SimilarityEvaluation): 7 | """Using exact metric to evaluate sentences pair similarity. 8 | 9 | This evaluator is used to directly compare two `question` from text. If every single character in two questions can match, then this evaluator 10 | will return 1 else 0. 11 | 12 | Example: 13 | .. code-block:: python 14 | 15 | from gptcache.similarity_evaluation import ExactMatchEvaluation 16 | 17 | evaluation = ExactMatchEvaluation() 18 | score = evaluation.evaluation( 19 | { 20 | "question": "What is the color of sky?" 21 | }, 22 | { 23 | "question": "What is the color of sky?" 24 | } 25 | ) 26 | """ 27 | 28 | def __init__(self): 29 | pass 30 | 31 | def evaluation( 32 | self, src_dict: Dict[str, Any], cache_dict: Dict[str, Any], **_ 33 | ) -> float: 34 | """Evaluate the similarity score of pair. 35 | 36 | :param src_dict: the query dictionary to evaluate with cache_dict. 37 | :type src_dict: Dict 38 | :param cache_dict: the cache dictionary. 39 | :type cache_dict: Dict 40 | 41 | :return: evaluation score. 42 | """ 43 | return 1 if cache_dict["question"] == src_dict["question"] else 0 44 | 45 | def range(self) -> Tuple[float, float]: 46 | """Range of similarity score. 47 | 48 | :return: minimum and maximum of similarity score. 49 | """ 50 | return 0, 1 51 | -------------------------------------------------------------------------------- /gptcache/similarity_evaluation/sbert_crossencoder.py: -------------------------------------------------------------------------------- 1 | from typing import Dict, Tuple, Any 2 | from gptcache.utils import import_sbert 3 | from gptcache.similarity_evaluation import SimilarityEvaluation 4 | import_sbert() 5 | from sentence_transformers import CrossEncoder # pylint: disable=C0413 6 | 7 | class SbertCrossencoderEvaluation(SimilarityEvaluation): 8 | """Using SBERT crossencoders to evaluate sentences pair similarity. 9 | 10 | This evaluator use the crossencoder model to evaluate the similarity of two sentences. 11 | 12 | :param model: model name of SbertCrossencoderEvaluation. Default is 'cross-encoder/quora-distilroberta-base'. 13 | Check more please refer to https://www.sbert.net/docs/pretrained_cross-encoders.html#quora-duplicate-questions. 14 | :type model: str 15 | 16 | Example: 17 | .. code-block:: python 18 | 19 | from gptcache.similarity_evaluation import SbertCrossencoderEvaluation 20 | 21 | evaluation = SbertCrossencoderEvaluation() 22 | score = evaluation.evaluation( 23 | { 24 | 'question': 'What is the color of sky?' 25 | }, 26 | { 27 | 'question': 'hello' 28 | } 29 | ) 30 | """ 31 | def __init__(self, model: str="cross-encoder/quora-distilroberta-base"): 32 | self.model = CrossEncoder(model) 33 | 34 | def evaluation( 35 | self, src_dict: Dict[str, Any], cache_dict: Dict[str, Any], **_ 36 | ) -> float: 37 | """Evaluate the similarity score of pair. 38 | 39 | :param src_dict: the query dictionary to evaluate with cache. 40 | :type src_dict: Dict 41 | :param cache_dict: the cache dictionary. 42 | :type cache_dict: Dict 43 | 44 | :return: evaluation score. 45 | """ 46 | try: 47 | src_question = src_dict["question"] 48 | cache_question = cache_dict["question"] 49 | if src_question.lower() == cache_question.lower(): 50 | return 1 51 | return self.model.predict([(src_question, cache_question)])[0] 52 | except Exception: # pylint: disable=W0703 53 | return 0 54 | 55 | def range(self) -> Tuple[float, float]: 56 | """Range of similarity score. 57 | 58 | :return: minimum and maximum of similarity score. 59 | """ 60 | return 0.0, 1.0 61 | -------------------------------------------------------------------------------- /gptcache/similarity_evaluation/similarity_evaluation.py: -------------------------------------------------------------------------------- 1 | from abc import ABCMeta, abstractmethod 2 | from typing import Tuple, Dict, Any 3 | 4 | 5 | class SimilarityEvaluation(metaclass=ABCMeta): 6 | """Similarity Evaluation interface, 7 | determine the similarity between the input request and the requests from the Vector Store. 8 | Based on this similarity, it determines whether a request matches the cache. 9 | 10 | Example: 11 | .. code-block:: python 12 | 13 | from gptcache import cache 14 | from gptcache.similarity_evaluation import SearchDistanceEvaluation 15 | 16 | cache.init( 17 | similarity_evaluation=SearchDistanceEvaluation() 18 | ) 19 | """ 20 | 21 | @abstractmethod 22 | def evaluation( 23 | self, src_dict: Dict[str, Any], cache_dict: Dict[str, Any], **kwargs 24 | ) -> float: 25 | """Evaluate the similarity score of the user and cache requests pair. 26 | 27 | :param src_dict: the user request params. 28 | :type src_dict: Dict 29 | :param cache_dict: the cache request params. 30 | :type cache_dict: Dict 31 | """ 32 | pass 33 | 34 | @abstractmethod 35 | def range(self) -> Tuple[float, float]: 36 | """Range of similarity score. 37 | 38 | :return: the range of similarity score, which is the min and max values 39 | :rtype: Tuple[float, float] 40 | """ 41 | pass 42 | -------------------------------------------------------------------------------- /gptcache/similarity_evaluation/time.py: -------------------------------------------------------------------------------- 1 | from datetime import datetime 2 | from typing import Tuple, Dict, Any 3 | 4 | from gptcache.adapter.api import _get_eval 5 | from gptcache.similarity_evaluation import SimilarityEvaluation 6 | 7 | 8 | class TimeEvaluation(SimilarityEvaluation): 9 | """Add time dimension restrictions on the basis of other Evaluation, 10 | for example, only use the cache within 1 day from the current time, 11 | and filter out the previous cache. 12 | 13 | :param evaluation: Similarity evaluation, like distance/onnx. 14 | :param evaluation_config: Similarity evaluation config. 15 | :param time_range: Time range, time unit: s 16 | 17 | Example: 18 | .. code-block:: python 19 | 20 | import datetime 21 | 22 | from gptcache.manager.scalar_data.base import CacheData 23 | from gptcache.similarity_evaluation import TimeEvaluation 24 | 25 | evaluation = TimeEvaluation(evaluation="distance", time_range=86400) 26 | 27 | similarity = eval.evaluation( 28 | {}, 29 | { 30 | "search_result": (3.5, None), 31 | "cache_data": CacheData("a", "b", create_on=datetime.datetime.now()), 32 | }, 33 | ) 34 | # 0.5 35 | 36 | """ 37 | 38 | def __init__(self, evaluation: str, evaluation_config=None, time_range: float = 86400.0): 39 | if evaluation_config is None: 40 | evaluation_config = {} 41 | self._eval = _get_eval(evaluation, evaluation_config) 42 | self._time_range = time_range 43 | 44 | def evaluation(self, src_dict: Dict[str, Any], cache_dict: Dict[str, Any], **kwargs) -> float: 45 | cache_data = cache_dict.get("cache_data", None) 46 | if not cache_data or not cache_data.create_on: 47 | return self.range()[0] 48 | delta_time = datetime.now().timestamp() - cache_data.create_on.timestamp() 49 | if delta_time > self._time_range: 50 | return self.range()[0] 51 | return self._eval.evaluation(src_dict, cache_dict, **kwargs) 52 | 53 | def range(self) -> Tuple[float, float]: 54 | return self._eval.range() 55 | 56 | -------------------------------------------------------------------------------- /gptcache/utils/cache_func.py: -------------------------------------------------------------------------------- 1 | def cache_all(*_, **__): 2 | return True 3 | -------------------------------------------------------------------------------- /gptcache/utils/dependency_control.py: -------------------------------------------------------------------------------- 1 | import subprocess 2 | 3 | from gptcache.utils.error import PipInstallError 4 | from gptcache.utils.log import gptcache_log 5 | 6 | 7 | def prompt_install(package: str, warn: bool = False): # pragma: no cover 8 | """ 9 | Function used to prompt user to install a package. 10 | """ 11 | cmd = f"pip install -q {package}" 12 | try: 13 | if warn and input(f"Install {package}? Y/n: ") != "Y": 14 | raise ModuleNotFoundError(f"No module named {package}") 15 | print(f"start to install package: {package}") 16 | subprocess.check_call(cmd, shell=True) 17 | print(f"successfully installed package: {package}") 18 | gptcache_log.info("%s installed successfully!", package) 19 | except subprocess.CalledProcessError as e: 20 | raise PipInstallError(package) from e 21 | -------------------------------------------------------------------------------- /gptcache/utils/error.py: -------------------------------------------------------------------------------- 1 | class CacheError(Exception): 2 | """GPTCache base error""" 3 | 4 | 5 | class NotInitError(CacheError): 6 | """Raise when the cache has been used before it's inited""" 7 | def __init__(self): 8 | super().__init__("The cache should be inited before using") 9 | 10 | 11 | class NotFoundError(CacheError): 12 | """Raise when getting an unsupported store.""" 13 | def __init__(self, store_type, current_type_name): 14 | super().__init__(f"Unsupported ${store_type}: {current_type_name}") 15 | 16 | 17 | class ParamError(CacheError): 18 | """Raise when receiving an invalid param.""" 19 | 20 | 21 | class PipInstallError(CacheError): 22 | """Raise when failed to install package.""" 23 | def __init__(self, package): 24 | super().__init__(f"Ran into error installing {package}.") 25 | 26 | 27 | def wrap_error(e: Exception) -> Exception: 28 | """Add a type to exception `e` while ensuring that the original type is not changed 29 | 30 | Example: 31 | .. code-block:: python 32 | 33 | import openai 34 | 35 | from gptcache.utils.error import wrap_error 36 | 37 | 38 | def raise_error(): 39 | try: 40 | raise openai.error.OpenAIError(message="test") 41 | except openai.error.OpenAIError as e: 42 | raise wrap_error(e) 43 | 44 | 45 | try: 46 | raise_error() 47 | except openai.error.OpenAIError as e: 48 | print("exception:") 49 | print(e) 50 | 51 | print("over") 52 | """ 53 | e.__class__ = type(e.__class__.__name__, (CacheError, e.__class__), {}) 54 | return e 55 | -------------------------------------------------------------------------------- /gptcache/utils/lazy_import.py: -------------------------------------------------------------------------------- 1 | import importlib 2 | from types import ModuleType 3 | 4 | 5 | class LazyImport(ModuleType): 6 | """ 7 | Lazily import a module. 8 | """ 9 | 10 | def __init__(self, local_name, parent_module_globals, name): 11 | self._local_name = local_name 12 | self._parent_module_globals = parent_module_globals 13 | super().__init__(name) 14 | 15 | def _load(self): 16 | module = importlib.import_module(self.__name__) 17 | self._parent_module_globals[self._local_name] = module 18 | self.__dict__.update(module.__dict__) 19 | return module 20 | 21 | def __getattr__(self, item): 22 | module = self._load() 23 | return getattr(module, item) 24 | 25 | def __dir__(self): 26 | module = self._load() 27 | return dir(module) 28 | -------------------------------------------------------------------------------- /gptcache/utils/log.py: -------------------------------------------------------------------------------- 1 | import logging 2 | 3 | import gptcache 4 | 5 | FORMAT = '%(asctime)s - %(thread)d - %(filename)s-%(module)s:%(lineno)s - %(levelname)s: %(message)s' 6 | logging.basicConfig(format=FORMAT) 7 | 8 | gptcache_log = logging.getLogger(f'gptcache:{gptcache.__version__}') 9 | -------------------------------------------------------------------------------- /gptcache/utils/response.py: -------------------------------------------------------------------------------- 1 | import base64 2 | import requests 3 | 4 | 5 | def get_message_from_openai_answer(openai_resp): 6 | return openai_resp["choices"][0]["message"]["content"] 7 | 8 | 9 | def get_stream_message_from_openai_answer(openai_data): 10 | return openai_data["choices"][0]["delta"].get("content", "") 11 | 12 | 13 | def get_text_from_openai_answer(openai_resp): 14 | return openai_resp["choices"][0]["text"] 15 | 16 | 17 | def get_image_from_openai_b64(openai_resp): 18 | return openai_resp["data"][0]["b64_json"] 19 | 20 | 21 | def get_image_from_openai_url(openai_resp): 22 | url = openai_resp["data"][0]["url"] 23 | img_content = requests.get(url).content 24 | img_data = base64.b64encode(img_content) 25 | return img_data 26 | 27 | 28 | def get_image_from_path(openai_resp): 29 | img_path = openai_resp["data"][0]["url"] 30 | with open(img_path, "rb") as f: 31 | img_data = base64.b64encode(f.read()) 32 | return img_data 33 | 34 | 35 | def get_audio_text_from_openai_answer(openai_resp): 36 | return openai_resp["text"] 37 | -------------------------------------------------------------------------------- /gptcache/utils/softmax.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | 4 | def softmax(x: list): 5 | x = np.array(x) 6 | assert len(x.shape) == 1, f"Expect to get a shape of (len,) but got {x.shape}, x value: {x}." 7 | max_val = x.max() 8 | e_x = np.exp(x - max_val) 9 | return e_x / e_x.sum() 10 | -------------------------------------------------------------------------------- /gptcache/utils/time.py: -------------------------------------------------------------------------------- 1 | import time 2 | 3 | from gptcache import cache 4 | 5 | 6 | def time_cal(func, func_name=None, report_func=None): 7 | def inner(*args, **kwargs): 8 | time_start = time.time() 9 | res = func(*args, **kwargs) 10 | delta_time = time.time() - time_start 11 | if cache.config.log_time_func: 12 | cache.config.log_time_func( 13 | func.__name__ if func_name is None else func_name, delta_time 14 | ) 15 | if report_func is not None: 16 | report_func(delta_time) 17 | return res 18 | 19 | return inner 20 | -------------------------------------------------------------------------------- /gptcache/utils/token.py: -------------------------------------------------------------------------------- 1 | from gptcache.utils import import_tiktoken 2 | 3 | _encoding = None 4 | 5 | 6 | def _get_encoding(): 7 | global _encoding 8 | if _encoding is None: 9 | import_tiktoken() 10 | import tiktoken # pylint: disable=C0415 11 | _encoding = tiktoken.get_encoding("cl100k_base") 12 | return _encoding 13 | 14 | 15 | def token_counter(text): 16 | """Token Counter""" 17 | num_tokens = len(_get_encoding().encode(text)) 18 | return num_tokens 19 | -------------------------------------------------------------------------------- /gptcache_server/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zilliztech/GPTCache/48f8e768d7dcd7f66d948ad07914a630a382b45b/gptcache_server/__init__.py -------------------------------------------------------------------------------- /gptcache_server/dockerfiles/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM python:3.8-slim-bullseye 2 | 3 | ENV PYTHONDONTWRITEBYTECODE=1 4 | 5 | RUN pip3 install --upgrade pip 6 | 7 | RUN pip3 install --no-cache-dir gptcache 8 | 9 | WORKDIR /workspace 10 | 11 | CMD ["gptcache_server", "-s", "0.0.0.0"] 12 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | numpy 2 | cachetools 3 | requests -------------------------------------------------------------------------------- /scripts/manage_conda_env.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | 4 | DEFAULT_ENV_NAME="gpt-cache" 5 | 6 | # Usage: ./manage_conda_env.sh create [env_name] 7 | # Usage: ./manage_conda_env.sh remove [env_name] 8 | 9 | if [[ "$1" == "create" ]]; then 10 | if [[ -n "$2" ]]; then 11 | env_name="$2" 12 | else 13 | env_name="$DEFAULT_ENV_NAME" 14 | fi 15 | if conda env list | grep -q "^$env_name "; then 16 | echo "conda environment '$env_name' already exists." 17 | else 18 | conda create --name "$env_name" python=3.8 19 | echo "conda environment '$env_name' created." 20 | fi 21 | conda activate "$env_name" 22 | echo "conda environment '$env_name' activated." 23 | elif [[ "$1" == "remove" ]]; then 24 | conda deactivate 25 | if [[ -n "$2" ]]; then 26 | env_name="$2" 27 | else 28 | env_name="$DEFAULT_ENV_NAME" 29 | fi 30 | conda remove --name "$env_name" --all 31 | echo "conda environment '$env_name' removed." 32 | else 33 | echo "Usage: ./manage_conda_env.sh [create|remove] [env_name]" 34 | exit 1 35 | fi -------------------------------------------------------------------------------- /scripts/remove_example_cache.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | parent_dir="$(cd "$(dirname "${BASH_SOURCE[0]}")"/.. && pwd)" 4 | find "$parent_dir/examples" \( -path "$parent_dir/examples/benchmark" -path "$parent_dir/examples/sqlite_milvus_mock" \) -prune -o \( -type f \( -name 'data_map*.txt' -or -name 'faiss.index' -or -name '*.db' \) -delete \) -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | import codecs 2 | import os 3 | import re 4 | from typing import List 5 | 6 | import setuptools 7 | from setuptools import find_packages 8 | 9 | here = os.path.abspath(os.path.dirname(__file__)) 10 | 11 | 12 | with open("README.md", "r") as fh: 13 | long_description = fh.read() 14 | 15 | 16 | def parse_requirements(file_name: str) -> List[str]: 17 | with open(file_name) as f: 18 | return [ 19 | require.strip() for require in f 20 | if require.strip() and not require.startswith('#') 21 | ] 22 | 23 | 24 | def read(*parts): 25 | with codecs.open(os.path.join(here, *parts), "r") as fp: 26 | return fp.read() 27 | 28 | 29 | def find_version(*file_paths): 30 | version_file = read(*file_paths) 31 | version_match = re.search(r"^__version__ = ['\"]([^'\"]*)['\"]", version_file, re.M) 32 | if version_match: 33 | return version_match.group(1) 34 | raise RuntimeError("Unable to find version string.") 35 | 36 | 37 | setuptools.setup( 38 | name="gptcache", 39 | packages=find_packages(), 40 | version=find_version("gptcache", "__init__.py"), 41 | author="SimFG", 42 | author_email="bang.fu@zilliz.com", 43 | description="GPTCache, a powerful caching library that can be used to speed up and lower the cost of chat " 44 | "applications that rely on the LLM service. GPTCache works as a memcache for AIGC applications, " 45 | "similar to how Redis works for traditional applications.", 46 | long_description=long_description, 47 | long_description_content_type="text/markdown", 48 | install_requires=parse_requirements('requirements.txt'), 49 | url="https://github.com/zilliztech/GPTCache", 50 | license='https://opensource.org/license/mit/', 51 | python_requires='>=3.8.1', 52 | entry_points={ 53 | 'console_scripts': [ 54 | 'gptcache_server=gptcache_server.server:main', 55 | ], 56 | }, 57 | ) 58 | -------------------------------------------------------------------------------- /tests/integration_tests/base/client_base.py: -------------------------------------------------------------------------------- 1 | import os 2 | import pytest 3 | from utils.util_log import test_log as log 4 | from common import common_type as ct 5 | from common import common_func as cf 6 | 7 | 8 | class Base: 9 | def setup_method(self, method): 10 | log.info(("*" * 35) + " setup " + ("*" * 35)) 11 | log.info("[setup_method] Start setup test case %s." % method.__name__) 12 | log.info("[setup_method] Clean up tmp files.") 13 | cf.remove_file() 14 | 15 | def teardown_method(self, method): 16 | log.info(("*" * 35) + " teardown " + ("*" * 35)) 17 | log.info("[teardown_method] Start teardown test case %s..." % method.__name__) 18 | log.info("[teardown_method] Clean up tmp files.") 19 | cf.remove_file() 20 | -------------------------------------------------------------------------------- /tests/integration_tests/common/common_func.py: -------------------------------------------------------------------------------- 1 | """" Methods of processing data """ 2 | import os 3 | from common import common_type as ct 4 | from utils.util_log import test_log as log 5 | 6 | 7 | def remove_file(file_names=[ct.sqlite_file, ct.faiss_file]): 8 | """ 9 | delete files 10 | :param file_names: file name list 11 | :return: None 12 | """ 13 | for file in file_names: 14 | if os.path.isfile(file): 15 | os.remove(file) 16 | log.info("%s is removed" % file) 17 | 18 | 19 | def log_time_func(func_name, delta_time): 20 | """ 21 | print function time 22 | :param func_name: function name 23 | :param delta_time: consumed time 24 | :return: None 25 | """ 26 | log.info("func `{}` consume time: {:.2f}s".format(func_name, delta_time)) 27 | 28 | 29 | def disable_cache(*args, **kwargs): 30 | """ 31 | disable cache 32 | """ 33 | return False 34 | -------------------------------------------------------------------------------- /tests/integration_tests/common/common_type.py: -------------------------------------------------------------------------------- 1 | """ Initialized parameters """ 2 | sqlite_file = "sqlite.db" 3 | faiss_file = "faiss.index" 4 | -------------------------------------------------------------------------------- /tests/integration_tests/config/log_config.py: -------------------------------------------------------------------------------- 1 | import os 2 | import datetime 3 | 4 | 5 | class LogConfig: 6 | def __init__(self): 7 | self.log_debug = "" 8 | self.log_err = "" 9 | self.log_info = "" 10 | self.log_worker = "" 11 | self.get_default_config() 12 | 13 | @staticmethod 14 | def get_env_variable(var="CI_LOG_PATH"): 15 | """get log path for testing""" 16 | try: 17 | log_path = os.environ[var] 18 | return str(log_path) 19 | except Exception as e: 20 | # now = datetime.datetime.now().strftime('%Y-%m-%d-%H-%M-%S') 21 | log_path = f"/tmp/ci_logs" 22 | print( 23 | "[get_env_variable] failed to get environment variables : %s, use default path : %s" 24 | % (str(e), log_path) 25 | ) 26 | return log_path 27 | 28 | @staticmethod 29 | def create_path(log_path): 30 | if not os.path.isdir(str(log_path)): 31 | print("[create_path] folder(%s) is not exist." % log_path) 32 | print("[create_path] create path now...") 33 | os.makedirs(log_path) 34 | 35 | def get_default_config(self): 36 | """Make sure the path exists""" 37 | log_dir = self.get_env_variable() 38 | self.log_debug = "%s/ci_test_log.debug" % log_dir 39 | self.log_info = "%s/ci_test_log.log" % log_dir 40 | self.log_err = "%s/ci_test_log.err" % log_dir 41 | work_log = os.environ.get("PYTEST_XDIST_WORKER") 42 | if work_log is not None: 43 | self.log_worker = f"{log_dir}/{work_log}.log" 44 | 45 | self.create_path(log_dir) 46 | 47 | 48 | log_config = LogConfig() 49 | -------------------------------------------------------------------------------- /tests/integration_tests/examples/map/test_example_map.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | from gptcache.utils.response import get_message_from_openai_answer 4 | from gptcache.manager.factory import get_data_manager 5 | from gptcache.adapter import openai 6 | from gptcache import cache, Cache 7 | 8 | 9 | def test_map(): 10 | dir_name, _ = os.path.split(os.path.abspath(__file__)) 11 | bak_cache = Cache() 12 | bak_data_file = dir_name + "/data_map_bak.txt" 13 | bak_cache.init(data_manager=get_data_manager(data_path=bak_data_file, max_size=10)) 14 | data_file = dir_name + "/data_map.txt" 15 | cache.init( 16 | data_manager=get_data_manager(data_path=data_file, max_size=10), 17 | next_cache=bak_cache, 18 | ) 19 | 20 | cache.set_openai_key() 21 | mock_messages = [ 22 | {"role": "system", "content": "You are a helpful assistant."}, 23 | {"role": "user", "content": "foo15"}, 24 | ] 25 | 26 | if not os.path.isfile(bak_data_file): 27 | cache.import_data( 28 | [f"foo{i}" for i in range(10)], [f"receiver the foo {i}" for i in range(10)] 29 | ) 30 | if not os.path.isfile(data_file): 31 | bak_cache.import_data( 32 | [f"foo{i}" for i in range(10, 20)], 33 | [f"receiver the foo {i}" for i in range(10, 20)], 34 | ) 35 | 36 | expect_answer = "receiver the foo 15" 37 | answer = openai.ChatCompletion.create( 38 | model="gpt-3.5-turbo", 39 | messages=mock_messages, 40 | ) 41 | assert get_message_from_openai_answer(answer) == expect_answer 42 | 43 | cache.flush() 44 | 45 | bak_cache2 = Cache() 46 | bak_cache2.init(data_manager=get_data_manager(data_path=bak_data_file, max_size=10)) 47 | cache.init( 48 | data_manager=get_data_manager(data_path=data_file, max_size=10), 49 | next_cache=bak_cache2, 50 | ) 51 | answer = openai.ChatCompletion.create( 52 | model="gpt-3.5-turbo", 53 | messages=mock_messages, 54 | ) 55 | assert get_message_from_openai_answer(answer) == expect_answer 56 | -------------------------------------------------------------------------------- /tests/integration_tests/examples/sqlite_faiss_mock/test_example_sqlite_faiss.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | from gptcache.utils.response import get_message_from_openai_answer 4 | from gptcache.adapter import openai 5 | from gptcache import cache, Config 6 | from gptcache.manager import get_data_manager, VectorBase 7 | from gptcache.similarity_evaluation.distance import SearchDistanceEvaluation 8 | import numpy as np 9 | 10 | 11 | d = 8 12 | 13 | 14 | def mock_embeddings(data, **kwargs): # pylint: disable=W0613 15 | return np.random.random((d,)).astype("float32") 16 | 17 | 18 | def test_sqlite_faiss(): 19 | sqlite_file = "sqlite.db" 20 | faiss_file = "faiss.index" 21 | 22 | if os.path.isfile(sqlite_file): 23 | os.remove(sqlite_file) 24 | if os.path.isfile(faiss_file): 25 | os.remove(faiss_file) 26 | 27 | vector_base = VectorBase("faiss", dimension=d, top_k=3) 28 | data_manager = get_data_manager("sqlite", vector_base, max_size=8, clean_size=2) 29 | cache.init( 30 | embedding_func=mock_embeddings, 31 | data_manager=data_manager, 32 | similarity_evaluation=SearchDistanceEvaluation(), 33 | config=Config( 34 | similarity_threshold=0, 35 | ), 36 | ) 37 | 38 | mock_messages = [ 39 | {"role": "system", "content": "You are a helpful assistant."}, 40 | {"role": "user", "content": "foo"}, 41 | ] 42 | cache.import_data( 43 | [f"foo{i}" for i in range(10)], [f"receiver the foo {i}" for i in range(10)] 44 | ) 45 | 46 | answer = openai.ChatCompletion.create( 47 | model="gpt-3.5-turbo", 48 | messages=mock_messages, 49 | ) 50 | assert get_message_from_openai_answer(answer) 51 | 52 | cache.flush() 53 | vector_base = VectorBase("faiss", dimension=d, top_k=3) 54 | data_manager = get_data_manager("sqlite", vector_base, max_size=8, clean_size=2) 55 | cache.init( 56 | embedding_func=mock_embeddings, 57 | data_manager=data_manager, 58 | similarity_evaluation=SearchDistanceEvaluation(), 59 | config=Config( 60 | similarity_threshold=0, 61 | ), 62 | ) 63 | answer = openai.ChatCompletion.create( 64 | model="gpt-3.5-turbo", 65 | messages=mock_messages, 66 | ) 67 | assert get_message_from_openai_answer(answer) 68 | -------------------------------------------------------------------------------- /tests/integration_tests/processor/pre/test_pre_without_prompt.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | from gptcache import Cache, Config 4 | from gptcache.adapter import openai 5 | from gptcache.manager import get_data_manager 6 | from gptcache.processor.pre import last_content_without_prompt 7 | from gptcache.utils.response import get_message_from_openai_answer 8 | 9 | 10 | def test_pre_without_prompt(): 11 | cache_obj = Cache() 12 | data_file = "data_map_prompt.txt" 13 | cache_obj.init( 14 | pre_embedding_func=last_content_without_prompt, 15 | data_manager=get_data_manager(data_path=data_file), 16 | config=Config(prompts=["foo"]), 17 | ) 18 | 19 | if not os.path.isfile(data_file): 20 | cache_obj.import_data( 21 | [f"{i}" for i in range(10)], 22 | [f"receiver the foo {i}" for i in range(10)], 23 | ) 24 | 25 | answer = openai.ChatCompletion.create( 26 | model="gpt-3.5-turbo", 27 | messages=[ 28 | {"role": "system", "content": "You are a helpful assistant."}, 29 | {"role": "user", "content": "foo5"}, 30 | ], 31 | cache_obj=cache_obj, 32 | ) 33 | assert get_message_from_openai_answer(answer) == "receiver the foo 5" 34 | -------------------------------------------------------------------------------- /tests/integration_tests/utils/util_log.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import sys 3 | 4 | from config.log_config import log_config 5 | 6 | 7 | class TestLog: 8 | def __init__(self, logger, log_debug, log_file, log_err, log_worker): 9 | self.logger = logger 10 | self.log_debug = log_debug 11 | self.log_file = log_file 12 | self.log_err = log_err 13 | self.log_worker = log_worker 14 | 15 | self.log = logging.getLogger(self.logger) 16 | self.log.setLevel(logging.DEBUG) 17 | 18 | try: 19 | formatter = logging.Formatter( 20 | "[%(asctime)s - %(levelname)s - %(name)s]: " 21 | "%(message)s (%(filename)s:%(lineno)s)" 22 | ) 23 | dh = logging.FileHandler(self.log_debug) 24 | dh.setLevel(logging.DEBUG) 25 | dh.setFormatter(formatter) 26 | self.log.addHandler(dh) 27 | 28 | fh = logging.FileHandler(self.log_file) 29 | fh.setLevel(logging.INFO) 30 | fh.setFormatter(formatter) 31 | self.log.addHandler(fh) 32 | 33 | eh = logging.FileHandler(self.log_err) 34 | eh.setLevel(logging.ERROR) 35 | eh.setFormatter(formatter) 36 | self.log.addHandler(eh) 37 | 38 | if self.log_worker != "": 39 | wh = logging.FileHandler(self.log_worker) 40 | wh.setLevel(logging.DEBUG) 41 | wh.setFormatter(formatter) 42 | self.log.addHandler(wh) 43 | 44 | ch = logging.StreamHandler(sys.stdout) 45 | ch.setLevel(logging.DEBUG) 46 | ch.setFormatter(formatter) 47 | 48 | except Exception as e: 49 | print( 50 | "Can not use %s or %s or %s to log. error : %s" 51 | % (log_debug, log_file, log_err, str(e)) 52 | ) 53 | 54 | 55 | """All modules share this unified log""" 56 | log_debug = log_config.log_debug 57 | log_info = log_config.log_info 58 | log_err = log_config.log_err 59 | log_worker = log_config.log_worker 60 | test_log = TestLog("ci_test", log_debug, log_info, log_err, log_worker).log 61 | -------------------------------------------------------------------------------- /tests/pytest.ini: -------------------------------------------------------------------------------- 1 | [pytest] 2 | 3 | addopts = --html=/tmp/ci_logs/report.html --self-contained-html -v -s 4 | # python3 -W ignore -m pytest 5 | 6 | log_format = [%(asctime)s - %(levelname)s - %(name)s]: %(message)s (%(filename)s:%(lineno)s) 7 | log_date_format = %Y-%m-%d %H:%M:%S 8 | 9 | 10 | filterwarnings = 11 | ignore::DeprecationWarning 12 | -------------------------------------------------------------------------------- /tests/requirements.txt: -------------------------------------------------------------------------------- 1 | --extra-index-url https://test.pypi.org/simple/ 2 | loguru==0.5.3 3 | pytest-cov==4.1.0 4 | pytest==7.2.0 5 | coverage==7.2.3 6 | pytest-assume==2.4.3 7 | pytest-timeout==1.3.3 8 | pytest-repeat==0.8.0 9 | pytest-level==0.1.1 10 | pytest-xdist==2.5.0 11 | pytest-loguru==0.2.0 12 | pytest-rerunfailures==9.1.1 13 | git+https://github.com/Projectplace/pytest-tags 14 | pytest-html==3.1.1 15 | pytest-sugar==0.9.5 16 | pytest-parallel 17 | psycopg2-binary 18 | transformers==4.29.2 19 | anyio==3.6.2 20 | torch 21 | mock 22 | pexpect 23 | spacy 24 | safetensors 25 | typing_extensions<4.6.0 26 | stability-sdk 27 | grpcio==1.53.0 28 | protobuf==3.20.0 29 | milvus==2.2.8 30 | pymilvus==2.2.8 31 | -------------------------------------------------------------------------------- /tests/unit_tests/embedding/test_cohere.py: -------------------------------------------------------------------------------- 1 | import os 2 | import types 3 | from unittest.mock import patch 4 | from gptcache.utils import import_cohere 5 | from gptcache.embedding import Cohere 6 | from gptcache.adapter.api import _get_model 7 | 8 | import_cohere() 9 | 10 | 11 | def test_embedding(): 12 | os.environ["CO_API_KEY"] = "API" 13 | 14 | with patch("cohere.Client.embed") as mock_create: 15 | dimension = 4096 16 | mock_create.return_value = types.SimpleNamespace(embeddings=[[0] * dimension]) 17 | c1 = Cohere() 18 | assert c1.dimension == dimension 19 | assert len(c1.to_embeddings("foo")) == dimension 20 | 21 | with patch("cohere.Client.embed") as mock_create: 22 | dimension = 512 23 | mock_create.return_value = types.SimpleNamespace(embeddings=[[0] * dimension]) 24 | c1 = Cohere("foo") 25 | assert c1.dimension == dimension 26 | assert len(c1.to_embeddings("foo")) == dimension 27 | 28 | with patch("cohere.Client.embed") as mock_create: 29 | dimension = 4096 30 | mock_create.return_value = types.SimpleNamespace(embeddings=[[0] * dimension]) 31 | c1 = _get_model("cohere") 32 | assert c1.dimension == dimension 33 | assert len(c1.to_embeddings("foo")) == dimension 34 | -------------------------------------------------------------------------------- /tests/unit_tests/embedding/test_data2vec.py: -------------------------------------------------------------------------------- 1 | from io import BytesIO 2 | 3 | import requests 4 | 5 | from gptcache.adapter.api import _get_model 6 | from gptcache.embedding import Data2VecAudio 7 | 8 | 9 | def test_data2vec_audio(): 10 | url = "https://github.com/towhee-io/examples/releases/download/data/ah_yes.wav" 11 | req = requests.get(url) 12 | audio = BytesIO(req.content) 13 | t = Data2VecAudio(model="facebook/data2vec-audio-base-960h") 14 | data = t.to_embeddings(audio) 15 | assert len(data) == t.dimension, f"{len(data)}, {t.dimension}" 16 | 17 | req = requests.get(url) 18 | audio = BytesIO(req.content) 19 | t = _get_model("data2vecaudio") 20 | data = t.to_embeddings(audio) 21 | assert len(data) == t.dimension, f"{len(data)}, {t.dimension}" 22 | 23 | 24 | if __name__ == "__main__": 25 | test_data2vec_audio() 26 | -------------------------------------------------------------------------------- /tests/unit_tests/embedding/test_embedding_openai.py: -------------------------------------------------------------------------------- 1 | import os 2 | from unittest.mock import patch 3 | 4 | from gptcache.embedding import OpenAI 5 | from gptcache.adapter.api import _get_model 6 | 7 | 8 | def test_embedding(): 9 | os.environ["OPENAI_API_KEY"] = "API" 10 | 11 | def get_return_value(d): 12 | return { 13 | "object": "list", 14 | "data": [ 15 | { 16 | "object": "embedding", 17 | "embedding": [0] * d, 18 | "index": 0 19 | } 20 | ], 21 | "model": "text-embedding-ada-002", 22 | "usage": { 23 | "prompt_tokens": 8, 24 | "total_tokens": 8 25 | } 26 | } 27 | 28 | with patch("openai.Embedding.create") as mock_create: 29 | dimension = 1536 30 | mock_create.return_value = get_return_value(dimension) 31 | oa = OpenAI() 32 | assert oa.dimension == dimension 33 | assert len(oa.to_embeddings("foo")) == dimension 34 | 35 | with patch("openai.Embedding.create") as mock_create: 36 | dimension = 1536 37 | mock_create.return_value = get_return_value(dimension) 38 | oa = OpenAI(api_key="openai") 39 | assert oa.dimension == dimension 40 | assert len(oa.to_embeddings("foo")) == dimension 41 | 42 | with patch("openai.Embedding.create") as mock_create: 43 | dimension = 512 44 | mock_create.return_value = get_return_value(dimension) 45 | oa = OpenAI(model="test_embedding") 46 | assert oa.dimension == dimension 47 | assert len(oa.to_embeddings("foo")) == dimension 48 | 49 | with patch("openai.Embedding.create") as mock_create: 50 | dimension = 1536 51 | mock_create.return_value = get_return_value(dimension) 52 | oa = _get_model("openai") 53 | assert oa.dimension == dimension 54 | assert len(oa.to_embeddings("foo")) == dimension 55 | 56 | 57 | -------------------------------------------------------------------------------- /tests/unit_tests/embedding/test_embedding_string.py: -------------------------------------------------------------------------------- 1 | from gptcache.embedding.string import to_embeddings 2 | 3 | 4 | def test_embedding(): 5 | message = to_embeddings("foo") 6 | assert message == "foo" 7 | -------------------------------------------------------------------------------- /tests/unit_tests/embedding/test_fasttext.py: -------------------------------------------------------------------------------- 1 | # from unittest.mock import patch 2 | 3 | # from gptcache.embedding import FastText 4 | 5 | # from gptcache.utils import import_fasttext 6 | # from gptcache.adapter.api import _get_model 7 | 8 | # import_fasttext() 9 | 10 | # import fasttext 11 | 12 | 13 | # def test_embedding(): 14 | # with patch("fasttext.util.download_model") as download_model_mock: 15 | # download_model_mock.return_value = "fastttext.bin" 16 | # with patch("fasttext.load_model") as load_model_mock: 17 | # load_model_mock.return_value = fasttext.FastText._FastText() 18 | # with patch("fasttext.util.reduce_model") as reduce_model_mock: 19 | # reduce_model_mock.return_value = None 20 | # with patch("fasttext.FastText._FastText.get_dimension") as dimension_mock: 21 | # dimension_mock.return_value = 128 22 | # with patch("fasttext.FastText._FastText.get_sentence_vector") as vector_mock: 23 | # vector_mock.return_value = [0] * 128 24 | 25 | # ft = FastText(dim=128) 26 | # assert len(ft.to_embeddings("foo")) == 128 27 | # assert ft.dimension == 128 28 | 29 | # ft1 = _get_model("fasttext", model_config={"dim": 128}) 30 | # assert len(ft1.to_embeddings("foo")) == 128 31 | # assert ft1.dimension == 128 32 | -------------------------------------------------------------------------------- /tests/unit_tests/embedding/test_huggingface.py: -------------------------------------------------------------------------------- 1 | from gptcache.embedding import Huggingface 2 | from gptcache.adapter.api import _get_model 3 | 4 | 5 | def test_huggingface(): 6 | t = Huggingface("distilbert-base-uncased") 7 | data = t.to_embeddings("foo") 8 | assert len(data) == t.dimension, f"{len(data)}, {t.dimension}" 9 | 10 | t = _get_model(model_src="huggingface", model_config={"model": "distilbert-base-uncased"}) 11 | data = t.to_embeddings("foo") 12 | assert len(data) == t.dimension, f"{len(data)}, {t.dimension}" 13 | -------------------------------------------------------------------------------- /tests/unit_tests/embedding/test_langchain.py: -------------------------------------------------------------------------------- 1 | from gptcache.embedding import LangChain 2 | from gptcache.utils import import_langchain, prompt_install 3 | 4 | import_langchain() 5 | from langchain.embeddings import FakeEmbeddings 6 | 7 | 8 | def test_langchain_embedding(): 9 | size = 10 10 | l = LangChain(embeddings=FakeEmbeddings(size=size)) 11 | data = l.to_embeddings("foo") 12 | assert len(data) == size 13 | 14 | l = LangChain(embeddings=FakeEmbeddings(size=size), dimension=size) 15 | data = l.to_embeddings("foo") 16 | assert len(data) == size 17 | -------------------------------------------------------------------------------- /tests/unit_tests/embedding/test_onnx.py: -------------------------------------------------------------------------------- 1 | from gptcache.embedding import Onnx 2 | from gptcache.adapter.api import _get_model 3 | 4 | 5 | def test_onnx(): 6 | t = Onnx() 7 | data = t.to_embeddings("foo") 8 | assert len(data) == t.dimension, f"{len(data)}, {t.dimension}" 9 | 10 | t = _get_model("onnx") 11 | data = t.to_embeddings("foo") 12 | assert len(data) == t.dimension, f"{len(data)}, {t.dimension}" 13 | -------------------------------------------------------------------------------- /tests/unit_tests/embedding/test_paddlenlp.py: -------------------------------------------------------------------------------- 1 | from gptcache.embedding import PaddleNLP 2 | from gptcache.adapter.api import _get_model 3 | 4 | 5 | def test_paddlenlp(): 6 | t = PaddleNLP("ernie-3.0-nano-zh") 7 | dimension = t.dimension 8 | data = t.to_embeddings("中国") 9 | assert len(data) == dimension, f"{len(data)}, {t.dimension}" 10 | 11 | t = _get_model(model_src="paddlenlp", model_config={"model": "ernie-3.0-nano-zh"}) 12 | dimension = t.dimension 13 | data = t.to_embeddings("中国") 14 | assert len(data) == dimension, f"{len(data)}, {t.dimension}" 15 | -------------------------------------------------------------------------------- /tests/unit_tests/embedding/test_rwkv.py: -------------------------------------------------------------------------------- 1 | from gptcache.adapter.api import _get_model 2 | from gptcache.embedding import Rwkv 3 | 4 | 5 | def test_rwkv(): 6 | t = Rwkv("sgugger/rwkv-430M-pile") 7 | data = t.to_embeddings("foo") 8 | assert len(data) == t.dimension, f"{len(data)}, {t.dimension}" 9 | 10 | t = _get_model(model_src="rwkv", model_config={"model": "sgugger/rwkv-430M-pile"}) 11 | data = t.to_embeddings("foo") 12 | assert len(data) == t.dimension, f"{len(data)}, {t.dimension}" 13 | 14 | -------------------------------------------------------------------------------- /tests/unit_tests/embedding/test_sbert.py: -------------------------------------------------------------------------------- 1 | from gptcache.adapter.api import _get_model 2 | from gptcache.embedding import SBERT 3 | 4 | 5 | def test_sbert(): 6 | t = SBERT("all-MiniLM-L6-v2") 7 | dimension = t.dimension 8 | data = t.to_embeddings("foo") 9 | assert len(data) == dimension, f"{len(data)}, {t.dimension}" 10 | 11 | t = _get_model(model_src="sbert", model_config={"model": "all-MiniLM-L6-v2"}) 12 | dimension = t.dimension 13 | data = t.to_embeddings("foo") 14 | assert len(data) == dimension, f"{len(data)}, {t.dimension}" 15 | 16 | question = [ 17 | "what is apple?", 18 | "what is intel?", 19 | "what is openai?", 20 | ] 21 | answer = ["apple", "intel", "openai"] 22 | for q, _ in zip(question, answer): 23 | data = t.to_embeddings(q) 24 | assert len(data) == dimension, f"{len(data)}, {t.dimension}" 25 | -------------------------------------------------------------------------------- /tests/unit_tests/embedding/test_timm.py: -------------------------------------------------------------------------------- 1 | from io import BytesIO 2 | 3 | import requests 4 | 5 | from gptcache.adapter.api import _get_model 6 | from gptcache.embedding import Timm 7 | 8 | 9 | def test_timm(): 10 | url = 'https://raw.githubusercontent.com/zilliztech/GPTCache/main/docs/GPTCache.png' 11 | image_bytes = requests.get(url).content 12 | image_file = BytesIO(image_bytes) # Convert image to file-like object 13 | 14 | encoder = Timm(model='resnet50') 15 | embed = encoder.to_embeddings(image_file) 16 | assert len(embed) == encoder.dimension 17 | 18 | encoder = _get_model(model_src="timm", model_config={"model": "resnet50"}) 19 | embed = encoder.to_embeddings(image_file) 20 | assert len(embed) == encoder.dimension 21 | 22 | 23 | if __name__ == "__main__": 24 | test_timm() -------------------------------------------------------------------------------- /tests/unit_tests/embedding/test_uform.py: -------------------------------------------------------------------------------- 1 | from io import BytesIO 2 | 3 | import requests 4 | 5 | from gptcache.adapter.api import _get_model 6 | from gptcache.utils import import_uform, import_pillow 7 | from gptcache.utils.error import ParamError 8 | 9 | import_uform() 10 | import_pillow() 11 | 12 | 13 | def test_uform(): 14 | encoder = _get_model("uform") 15 | embed = encoder.to_embeddings("Hello, world.") 16 | assert len(embed) == encoder.dimension 17 | 18 | url = "https://raw.githubusercontent.com/zilliztech/GPTCache/main/docs/GPTCache.png" 19 | image_bytes = requests.get(url).content 20 | image_file = BytesIO(image_bytes) 21 | 22 | encoder = _get_model("uform", model_config={"embedding_type": "image"}) 23 | embed = encoder.to_embeddings(image_file) 24 | assert len(embed) == encoder.dimension 25 | 26 | is_exception = False 27 | try: 28 | _get_model("uform", model_config={"embedding_type": "foo"}) 29 | except ParamError: 30 | is_exception = True 31 | assert is_exception 32 | -------------------------------------------------------------------------------- /tests/unit_tests/embedding/test_vit.py: -------------------------------------------------------------------------------- 1 | from io import BytesIO 2 | 3 | import requests 4 | 5 | from gptcache.adapter.api import _get_model 6 | from gptcache.utils import import_pillow, import_vit 7 | 8 | 9 | def test_timm(): 10 | import_vit() 11 | import_pillow() 12 | 13 | from PIL import Image 14 | from gptcache.embedding import ViT 15 | 16 | url = 'https://raw.githubusercontent.com/zilliztech/GPTCache/main/docs/GPTCache.png' 17 | image_bytes = requests.get(url).content 18 | image_data = BytesIO(image_bytes) # Convert image to file-like object 19 | image = Image.open(image_data) 20 | encoder = ViT(model="google/vit-base-patch16-384") 21 | embed = encoder.to_embeddings(image) 22 | assert len(embed) == encoder.dimension 23 | 24 | encoder = _get_model(model_src="vit") 25 | embed = encoder.to_embeddings(image) 26 | assert len(embed) == encoder.dimension 27 | 28 | if __name__ == "__main__": 29 | test_timm() -------------------------------------------------------------------------------- /tests/unit_tests/manager/test_base.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | 3 | from gptcache.utils.error import NotFoundError 4 | from gptcache.manager import CacheBase, VectorBase 5 | from gptcache.manager.scalar_data.manager import CacheBase as InnerCacheBase 6 | from gptcache.manager.vector_data.manager import VectorBase as InnerVectorBase 7 | 8 | 9 | class TestBaseStore(unittest.TestCase): 10 | def test_cache_base(self): 11 | with self.assertRaises(EnvironmentError): 12 | InnerCacheBase() 13 | 14 | with self.assertRaises(NotFoundError): 15 | CacheBase("test_cache_base") 16 | 17 | def test_vector_base(self): 18 | with self.assertRaises(EnvironmentError): 19 | InnerVectorBase() 20 | 21 | with self.assertRaises(NotFoundError): 22 | VectorBase("test_cache_base") 23 | -------------------------------------------------------------------------------- /tests/unit_tests/manager/test_chromadb.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | 3 | import numpy as np 4 | 5 | from gptcache.manager import VectorBase 6 | from gptcache.manager.vector_data.base import VectorData 7 | 8 | 9 | class TestChromadb(unittest.TestCase): 10 | def test_normal(self): 11 | db = VectorBase("chromadb", client_settings={}, top_k=3) 12 | db.mul_add([VectorData(id=i, data=np.random.sample(10)) for i in range(100)]) 13 | search_res = db.search(np.random.sample(10)) 14 | self.assertEqual(len(search_res), 3) 15 | db.delete(["1", "3", "5", "7"]) 16 | self.assertEqual(db._collection.count(), 96) 17 | -------------------------------------------------------------------------------- /tests/unit_tests/manager/test_map.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | from gptcache.manager.data_manager import MapDataManager 4 | 5 | data_map_path = "data_map.txt" 6 | 7 | 8 | def test_map(): 9 | if os.path.isfile(data_map_path): 10 | os.remove(data_map_path) 11 | 12 | data_manager = MapDataManager(data_map_path, 3) 13 | a = "a" 14 | for i in range(4): 15 | data_manager.save(chr(ord(a) + i), str(i), chr(ord(a) + i)) 16 | assert len(data_manager.search("a")) == 0 17 | question, answer, emb, _ = data_manager.search("b")[0] 18 | assert question == "b", question 19 | assert answer == "1", answer 20 | assert emb == "b", emb 21 | data_manager.close() 22 | -------------------------------------------------------------------------------- /tests/unit_tests/manager/test_milvusdb.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | import numpy as np 3 | from tempfile import TemporaryDirectory 4 | 5 | from gptcache.manager.vector_data import VectorBase 6 | from gptcache.manager.vector_data.base import VectorData 7 | 8 | 9 | class TestMilvusDB(unittest.TestCase): 10 | def test_normal(self): 11 | with TemporaryDirectory(dir="./") as root: 12 | size = 1000 13 | dim = 512 14 | top_k = 10 15 | 16 | db = VectorBase( 17 | "milvus", 18 | top_k=top_k, 19 | dimension=dim, 20 | port="10086", 21 | local_mode=True, 22 | local_data=str(root), 23 | index_params={ 24 | "metric_type": "L2", 25 | "index_type": "IVF_FLAT", 26 | "params": {"nlist": 128}, 27 | }, 28 | ) 29 | data = np.random.randn(size, dim).astype(np.float32) 30 | db.mul_add([VectorData(id=i, data=v) for v, i in zip(data, range(size))]) 31 | self.assertEqual(len(db.search(data[0])), top_k) 32 | db.mul_add([VectorData(id=size, data=data[0])]) 33 | ret = db.search(data[0]) 34 | self.assertIn(ret[0][1], [0, size]) 35 | self.assertIn(ret[1][1], [0, size]) 36 | db.delete([0, 1, 2, 3, 4, 5, size]) 37 | ret = db.search(data[0]) 38 | self.assertNotIn(ret[0][1], [0, size]) 39 | db.rebuild() 40 | db.close() 41 | -------------------------------------------------------------------------------- /tests/unit_tests/manager/test_object_storage.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | import mock 3 | import os 4 | import requests 5 | from pathlib import Path 6 | import numpy as np 7 | from tempfile import TemporaryDirectory 8 | 9 | from gptcache.manager.object_data.local_storage import LocalObjectStorage 10 | from gptcache.manager.object_data.s3_storage import S3Storage 11 | from gptcache.manager import ObjectBase 12 | 13 | 14 | class TestLocal(unittest.TestCase): 15 | def test_normal(self): 16 | with TemporaryDirectory(dir="./") as root: 17 | o = LocalObjectStorage(root) 18 | data = b'My test' 19 | fp = o.put(data) 20 | self.assertTrue(Path(fp).is_file()) 21 | self.assertEqual(o.get(fp), data) 22 | self.assertEqual(o.get_access_link(fp), fp) 23 | o.delete([fp]) 24 | self.assertFalse(Path(fp).is_file()) 25 | 26 | 27 | class TestS3(unittest.TestCase): 28 | def test_normal(self): 29 | access_key = os.environ.get('AWS_ACCESS_KEY_ID') 30 | secret_key = os.environ.get('AWS_SECRET_ACCESS_KEY') 31 | bucket = os.environ.get('BUCKET') 32 | endpoint = os.environ.get('ENDPOINT') 33 | if access_key is None or secret_key is None or bucket is None: 34 | return 35 | o = S3Storage(bucket, 'gptcache', access_key, secret_key, endpoint) 36 | data = b'My test' 37 | fp = o.put(data) 38 | self.assertEqual(o.get(fp), data) 39 | link = o.get_access_link(fp) 40 | self.assertEqual(requests.get(link, verify=False).content, data) 41 | o.delete([fp]) 42 | self.assertIsNone(o.get(fp)) 43 | 44 | class TestBase(unittest.TestCase): 45 | def test_local(self): 46 | with TemporaryDirectory(dir="./") as root: 47 | o = ObjectBase("local", path = root) 48 | data = b'My test' 49 | fp = o.put(data) 50 | self.assertTrue(Path(fp).is_file()) 51 | self.assertEqual(o.get(fp), data) 52 | self.assertEqual(o.get_access_link(fp), fp) 53 | o.delete([fp]) 54 | self.assertFalse(Path(fp).is_file()) 55 | 56 | def test_s3(self): 57 | with mock.patch("boto3.Session") as mock_session: 58 | o = ObjectBase("s3", bucket="", path_prefix="", 59 | access_key="", secret_key="") 60 | data = b"My test" 61 | fp = o.put(data) 62 | o.get(fp) 63 | o.get_access_link(fp) 64 | -------------------------------------------------------------------------------- /tests/unit_tests/manager/test_pgvector.py: -------------------------------------------------------------------------------- 1 | import os 2 | import unittest 3 | 4 | import numpy as np 5 | 6 | from gptcache.manager.vector_data import VectorBase 7 | from gptcache.manager.vector_data.base import VectorData 8 | 9 | 10 | class TestPgvector(unittest.TestCase): 11 | def test_normal(self): 12 | size = 1000 13 | dim = 10 14 | top_k = 10 15 | 16 | url = os.getenv("POSTGRES_URL", "postgresql://postgres:postgres@localhost:5432/postgres") 17 | 18 | db = VectorBase( 19 | "pgvector", 20 | top_k=top_k, 21 | dimension=dim, 22 | url=url, 23 | index_params={ 24 | "index_type": "L2", 25 | "params": {"lists": 100, "probes": 10}, 26 | }, 27 | ) 28 | db.delete([i for i in range(size)]) 29 | data = np.random.randn(size, dim).astype(np.float32) 30 | db.mul_add([VectorData(id=i, data=v) for v, i in zip(data, range(size))]) 31 | self.assertEqual(len(db.search(data[0])), top_k) 32 | db.mul_add([VectorData(id=size, data=data[0])]) 33 | ret = db.search(data[0]) 34 | print(ret) 35 | self.assertIn(ret[0][1], [0, size]) 36 | self.assertIn(ret[1][1], [0, size]) 37 | db.delete([0, 1, 2, 3, 4, 5, size]) 38 | ret = db.search(data[0]) 39 | print(ret) 40 | self.assertNotIn(ret[0][1], [0, size]) 41 | db.rebuild() 42 | db.close() 43 | -------------------------------------------------------------------------------- /tests/unit_tests/manager/test_qdrant.py: -------------------------------------------------------------------------------- 1 | import os 2 | import unittest 3 | 4 | import numpy as np 5 | 6 | from gptcache.manager.vector_data import VectorBase 7 | from gptcache.manager.vector_data.base import VectorData 8 | 9 | 10 | class TestQdrant(unittest.TestCase): 11 | def test_normal(self): 12 | size = 10 13 | dim = 2 14 | top_k = 10 15 | qdrant = VectorBase( 16 | "qdrant", 17 | top_k=top_k, 18 | dimension=dim, 19 | location=":memory:" 20 | ) 21 | data = np.random.randn(size, dim).astype(np.float32) 22 | qdrant.mul_add([VectorData(id=i, data=v) for v, i in zip(data, range(size))]) 23 | search_result = qdrant.search(data[0], top_k) 24 | self.assertEqual(len(search_result), top_k) 25 | qdrant.mul_add([VectorData(id=size, data=data[0])]) 26 | ret = qdrant.search(data[0]) 27 | self.assertIn(ret[0][1], [0, size]) 28 | self.assertIn(ret[1][1], [0, size]) 29 | qdrant.delete([0, 1, 2, 3, 4, 5, size]) 30 | ret = qdrant.search(data[0]) 31 | self.assertNotIn(ret[0][1], [0, size]) 32 | qdrant.rebuild() 33 | qdrant.close() 34 | -------------------------------------------------------------------------------- /tests/unit_tests/manager/test_redis.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | from gptcache.embedding import Onnx 4 | from gptcache.manager import VectorBase 5 | from gptcache.manager.vector_data.base import VectorData 6 | 7 | 8 | def test_redis_vector_store(): 9 | encoder = Onnx() 10 | dim = encoder.dimension 11 | vector_base = VectorBase("redis", dimension=dim) 12 | vector_base.mul_add([VectorData(id=i, data=np.random.rand(dim)) for i in range(10)]) 13 | 14 | search_res = vector_base.search(np.random.rand(dim)) 15 | print(search_res) 16 | assert len(search_res) == 1 17 | 18 | search_res = vector_base.search(np.random.rand(dim), top_k=10) 19 | print(search_res) 20 | assert len(search_res) == 10 21 | 22 | vector_base.delete([i for i in range(5)]) 23 | 24 | search_res = vector_base.search(np.random.rand(dim), top_k=10) 25 | print(search_res) 26 | assert len(search_res) == 5 27 | -------------------------------------------------------------------------------- /tests/unit_tests/manager/test_usearch.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | 3 | import numpy as np 4 | 5 | from gptcache.manager.vector_data import VectorBase 6 | from gptcache.manager.vector_data.base import VectorData 7 | 8 | 9 | class TestUSearchDB(unittest.TestCase): 10 | def test_normal(self): 11 | size = 1000 12 | dim = 512 13 | top_k = 10 14 | 15 | db = VectorBase( 16 | "usearch", 17 | index_file_path='./index.usearch', 18 | dimension=dim, 19 | top_k=top_k, 20 | metric='cos', 21 | dtype='f32', 22 | ) 23 | db.mul_add([VectorData(id=i, data=np.random.rand(dim)) 24 | for i in range(size)]) 25 | self.assertEqual(len(db.search(np.random.rand(dim))), top_k) 26 | self.assertEqual(db.count(), size) 27 | db.close() 28 | -------------------------------------------------------------------------------- /tests/unit_tests/manager/test_weaviate.py: -------------------------------------------------------------------------------- 1 | # import unittest 2 | # import numpy as np 3 | 4 | # from gptcache.manager.vector_data import VectorBase 5 | # from gptcache.manager.vector_data.base import VectorData 6 | 7 | 8 | # class TestWeaviateDB(unittest.TestCase): 9 | # def test_normal(self): 10 | # size = 1000 11 | # dim = 512 12 | # top_k = 10 13 | # class_name = "Vectorcache" 14 | 15 | # db = VectorBase( 16 | # "weaviate", 17 | # class_name=class_name, 18 | # top_k=top_k 19 | # ) 20 | 21 | # created_class_name = db._create_class() 22 | # self.assertEqual(class_name, created_class_name) 23 | # data = np.random.randn(size, dim).astype(np.float32) 24 | # db.mul_add([VectorData(id=i, data=v) for v, i in zip(data, range(size))]) 25 | # self.assertEqual(len(db.search(data[0])), top_k) 26 | # db.mul_add([VectorData(id=size, data=data[0])]) 27 | # ret = db.search(data[0]) 28 | # self.assertIn(ret[0][1], [0, size]) 29 | # db.delete([0, 1, 2, 3, 4, 5, size]) 30 | # ret = db.search(data[0]) 31 | # self.assertNotIn(ret[0][1], [0, size]) 32 | # db.rebuild() 33 | # db.update_embeddings(6, data[7]) 34 | # emb = db.get_embeddings(6) 35 | # self.assertEqual(emb.tolist(), data[7].tolist()) 36 | # emb = db.get_embeddings(0) 37 | # self.assertIsNone(emb) 38 | # db.close() 39 | 40 | # custom_class_name = "Customcache" 41 | # class_schema = { 42 | # "class": custom_class_name, 43 | # "description": "LLM response cache", 44 | # "properties": [ 45 | # { 46 | # "name": "data_id", 47 | # "dataType": ["int"], 48 | # "description": "The data-id generated by GPTCache for vectors.", 49 | # } 50 | # ], 51 | # "vectorIndexConfig": {"distance": "cosine"}, 52 | # } 53 | 54 | # db = VectorBase( 55 | # "weaviate", 56 | # class_schema=class_schema, 57 | # top_k=top_k 58 | # ) 59 | # created_class_name = db._create_class() 60 | # self.assertEqual(custom_class_name, created_class_name) 61 | # db.close() 62 | -------------------------------------------------------------------------------- /tests/unit_tests/processor/test_post.py: -------------------------------------------------------------------------------- 1 | from gptcache.processor.post import random_one, first, nop, temperature_softmax 2 | 3 | 4 | def test_random_one(): 5 | message = random_one(["foo", "foo2"]) 6 | assert message 7 | 8 | 9 | def test_first(): 10 | message = first(["foo", "foo2"]) 11 | assert message == "foo" 12 | 13 | 14 | def test_nop(): 15 | message = nop(["foo", "foo2"]) 16 | assert "foo" in message 17 | assert "foo2" in message 18 | 19 | 20 | def test_temperature_softmax(): 21 | message = temperature_softmax(messages=["foo", "foo2"], scores=[0.0, 1.0], temperature=0.5) 22 | assert message in ["foo", "foo2"] 23 | 24 | message = temperature_softmax(messages=["foo", "foo2"], scores=[0.9, 0.1], temperature=0.0) 25 | assert message == "foo" 26 | 27 | message = temperature_softmax(messages=["foo", "foo2"], scores=[0.1, 0.9], temperature=0.0) 28 | assert message == "foo2" 29 | 30 | 31 | if __name__ == "__main__": 32 | test_first() 33 | test_nop() 34 | test_random_one() 35 | test_temperature_softmax() -------------------------------------------------------------------------------- /tests/unit_tests/processor/test_pre.py: -------------------------------------------------------------------------------- 1 | from gptcache.processor.pre import ( 2 | last_content, 3 | all_content, 4 | nop, 5 | last_content_without_prompt, 6 | get_prompt, get_openai_moderation_input, 7 | concat_all_queries 8 | ) 9 | 10 | from gptcache.config import Config 11 | 12 | def test_last_content(): 13 | content = last_content({"messages": [{"content": "foo1"}, {"content": "foo2"}]}) 14 | 15 | assert content == "foo2" 16 | 17 | 18 | def test_last_content_without_prompt(): 19 | content = last_content_without_prompt( 20 | {"messages": [{"content": "foo1"}, {"content": "foo2"}]} 21 | ) 22 | assert content == "foo2" 23 | 24 | content = last_content_without_prompt( 25 | {"messages": [{"content": "foo1"}, {"content": "foo2"}]}, prompts=None 26 | ) 27 | assert content == "foo2" 28 | 29 | content = last_content_without_prompt( 30 | {"messages": [{"content": "foo1"}, {"content": "foo2"}]}, prompts=["foo"] 31 | ) 32 | assert content == "2" 33 | 34 | 35 | def test_all_content(): 36 | content = all_content({"messages": [{"content": "foo1"}, {"content": "foo2"}]}) 37 | 38 | assert content == "foo1\nfoo2" 39 | 40 | 41 | def test_nop(): 42 | content = nop({"str": "hello"}) 43 | assert content == {"str": "hello"} 44 | 45 | 46 | def test_get_prompt(): 47 | content = get_prompt({"prompt": "foo"}) 48 | assert content == "foo" 49 | 50 | 51 | def test_get_openai_moderation_input(): 52 | content = get_openai_moderation_input({"input": ["hello", "world"]}) 53 | assert content == "['hello', 'world']" 54 | 55 | 56 | def test_get_messages_last_content(): 57 | content = last_content({"messages": [{"content": "foo1"}, {"content": "foo2"}]}) 58 | assert content == "foo2" 59 | 60 | def test_concat_all_queries(): 61 | config = Config() 62 | config.context_len = 2 63 | content = concat_all_queries({"messages":[{"role": "system", "content": "foo1"}, 64 | {"role": "user", "content": "foo2"}, 65 | {"role": "assistant","content": "foo3"}, 66 | {"role": "user", "content": "foo4"}, 67 | {"role": "assistant","content": "foo5"}, 68 | {"role": "user", "content": "foo6"}]}, **{'cache_config':config}) 69 | assert content == 'USER: foo4\nUSER: foo6' 70 | 71 | 72 | if __name__ == '__main__': 73 | test_concat_all_queries() 74 | -------------------------------------------------------------------------------- /tests/unit_tests/similarity_evaluation/test_cohere_rerank.py: -------------------------------------------------------------------------------- 1 | import os 2 | from unittest.mock import patch 3 | 4 | from gptcache.adapter.api import _get_eval 5 | from gptcache.utils import import_cohere 6 | 7 | import_cohere() 8 | 9 | from cohere.responses import Reranking 10 | 11 | 12 | def test_cohere_rerank(): 13 | os.environ["CO_API_KEY"] = "API" 14 | 15 | evaluation = _get_eval("cohere") 16 | 17 | min_value, max_value = evaluation.range() 18 | assert min_value < 0.001 19 | assert max_value > 0.999 20 | 21 | with patch("cohere.Client.rerank") as mock_create: 22 | mock_create.return_value = Reranking( 23 | response={ 24 | "meta": {"api_version": {"version": "2022-12-06"}}, 25 | "results": [], 26 | } 27 | ) 28 | evaluation = _get_eval("cohere") 29 | score = evaluation.evaluation( 30 | {"question": "What is the color of sky?"}, 31 | {"answer": "the color of sky is blue"}, 32 | ) 33 | assert score < 0.01 34 | 35 | with patch("cohere.Client.rerank") as mock_create: 36 | mock_create.return_value = Reranking( 37 | response={ 38 | "meta": {"api_version": {"version": "2022-12-06"}}, 39 | "results": [ 40 | { 41 | "relevance_score": 0.9871293, 42 | "index": 0, 43 | } 44 | ], 45 | } 46 | ) 47 | evaluation = _get_eval("cohere") 48 | score = evaluation.evaluation( 49 | {"question": "What is the color of sky?"}, 50 | {"answer": "the color of sky is blue"}, 51 | ) 52 | assert score > 0.9 53 | -------------------------------------------------------------------------------- /tests/unit_tests/similarity_evaluation/test_evaluation_kreciprocal.py: -------------------------------------------------------------------------------- 1 | from gptcache.similarity_evaluation import KReciprocalEvaluation 2 | from gptcache.manager.vector_data.faiss import Faiss 3 | from gptcache.manager.vector_data.base import VectorData 4 | from gptcache.adapter.api import _get_eval 5 | import numpy as np 6 | import math 7 | 8 | def normalize(vec): 9 | norm = np.linalg.norm(vec) 10 | return vec / norm 11 | 12 | faiss = Faiss('./none', 3, 10) 13 | 14 | 15 | def _test_evaluation(evaluation): 16 | narr1 = normalize(np.array([1.0, 2.0, 3.0])) 17 | faiss.mul_add([VectorData(id=0, data=narr1)]) 18 | narr2 = normalize(np.array([2.0, 3.0, 4.0])) 19 | faiss.mul_add([VectorData(id=1, data=narr2)]) 20 | narr3 = normalize(np.array([3.0, 4.0, 5.0])) 21 | faiss.mul_add([VectorData(id=2, data=narr3)]) 22 | evaluation = KReciprocalEvaluation(vectordb=faiss, top_k=2) 23 | query1 = normalize(np.array([1.1, 2.1, 3.1])) 24 | query2 = normalize(np.array([101.1, 102.1, 103.1])) 25 | 26 | score1 = evaluation.evaluation({'question': 'question1', 'embedding': query1}, {'question': 'question2', 'embedding': narr1}) 27 | score2 = evaluation.evaluation({'question': 'question1', 'embedding': query2}, {'question': 'question2', 'embedding': narr1}) 28 | 29 | assert score1 > 3.99 30 | assert math.isclose(score2, 0) 31 | 32 | def test_kreciprocal(): 33 | evaluation = KReciprocalEvaluation(vectordb=faiss, top_k=2) 34 | _test_evaluation(evaluation) 35 | 36 | def test_get_eval(): 37 | evaluation = _get_eval(strategy="kreciprocal", kws={"vectordb": faiss, "top_k": 2}) 38 | _test_evaluation(evaluation) 39 | -------------------------------------------------------------------------------- /tests/unit_tests/similarity_evaluation/test_evaluation_onnx.py: -------------------------------------------------------------------------------- 1 | import math 2 | 3 | from gptcache.adapter.api import _get_eval 4 | from gptcache.similarity_evaluation import OnnxModelEvaluation 5 | 6 | 7 | def _test_evaluation(evaluation): 8 | range_min, range_max = evaluation.range() 9 | assert math.isclose(range_min, 0.0) 10 | assert math.isclose(range_max, 1.0) 11 | 12 | score = evaluation.evaluation({"question": "hello"}, {"question": "hello"}) 13 | assert math.isclose(score, 1.0) 14 | 15 | query = "Can you pass a urine test for meth in 4 days?" 16 | candidate_1 = "Can meth be detected in a urine test if last used was Thursday night and the test was tuesday morning?" 17 | candidate_2 = "how old are you?" 18 | 19 | score = evaluation.evaluation({"question": query}, {"question": candidate_1}) 20 | assert isinstance(score, float), type(score) 21 | assert score > 0.8 22 | 23 | score = evaluation.evaluation({"question": query}, {"question": candidate_2}) 24 | assert score < 0.1 25 | 26 | 27 | def test_onnx(): 28 | evaluation = OnnxModelEvaluation() 29 | _test_evaluation(evaluation) 30 | 31 | 32 | def test_get_eval(): 33 | evaluation = _get_eval("onnx") 34 | _test_evaluation(evaluation) 35 | -------------------------------------------------------------------------------- /tests/unit_tests/similarity_evaluation/test_evaluation_sbert.py: -------------------------------------------------------------------------------- 1 | import math 2 | 3 | from gptcache.adapter.api import _get_eval 4 | from gptcache.similarity_evaluation import SbertCrossencoderEvaluation 5 | 6 | 7 | def _test_evaluation(evaluation): 8 | range_min, range_max = evaluation.range() 9 | assert math.isclose(range_min, 0.0) 10 | assert math.isclose(range_max, 1.0) 11 | 12 | score = evaluation.evaluation({"question": "hello"}, {"question": "hello"}) 13 | assert math.isclose(score, 1.0) 14 | 15 | query = "Can you pass a urine test for meth in 4 days?" 16 | candidate_1 = "Can meth be detected in a urine test if last used was Thursday night and the test was tuesday morning?" 17 | candidate_2 = "how old are you?" 18 | 19 | score = evaluation.evaluation({"question": query}, {"question": candidate_1}) 20 | assert score > 0.8 21 | 22 | score = evaluation.evaluation({"question": query}, {"question": candidate_2}) 23 | assert score < 0.1 24 | 25 | 26 | def test_sbert(): 27 | evaluation = SbertCrossencoderEvaluation() 28 | _test_evaluation(evaluation) 29 | 30 | 31 | def test_get_eval(): 32 | evaluation = _get_eval("sbert_crossencoder") 33 | _test_evaluation(evaluation) 34 | 35 | if __name__ == '__main__': 36 | test_sbert() 37 | -------------------------------------------------------------------------------- /tests/unit_tests/similarity_evaluation/test_evaluation_sequence.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | from gptcache.adapter.api import _get_eval 4 | from gptcache.similarity_evaluation import SequenceMatchEvaluation 5 | from gptcache.similarity_evaluation.sequence_match import reweight 6 | 7 | 8 | def normalize(vec): 9 | norm = np.linalg.norm(vec) 10 | return vec / norm 11 | 12 | 13 | def _test_evaluation(evaluation): 14 | evaluation = SequenceMatchEvaluation([0.1, 0.2, 0.7], "onnx") 15 | score1 = evaluation.evaluation( 16 | {"question": "USER:foo1\nUSER:foo2\nUSER:foo3\n"}, 17 | {"question": "USER:foo1\nUSER:foo2\nUSER:foo3\n"}, 18 | ) 19 | score2 = evaluation.evaluation( 20 | {"question": "USER:foo1\nUSER:foo2\nUSER:foo3\n"}, 21 | {"question": "USER:foo1\nUSER:foo2\n"}, 22 | ) 23 | evaluation = SequenceMatchEvaluation([0.2, 0.8], "onnx") 24 | score2 = evaluation.evaluation( 25 | {"question": "USER:foo1\nUser:foo2\nUser:foo3\n"}, 26 | {"question": "USER:foo1\nUser:foo2\n"}, 27 | ) 28 | assert True 29 | 30 | 31 | def test_sequence_match(): 32 | evaluation = SequenceMatchEvaluation([0.1, 0.2, 0.7], "onnx") 33 | evaluation.range() 34 | _test_evaluation(evaluation) 35 | 36 | 37 | def test_get_eval(): 38 | evaluation = _get_eval( 39 | strategy="sequence_match", 40 | kws={ 41 | "embedding_extractor": "onnx", 42 | "weights": [0.1, 0.2, 0.7], 43 | "embedding_config": {"model": "GPTCache/paraphrase-albert-onnx"}, 44 | }, 45 | ) 46 | _test_evaluation(evaluation) 47 | 48 | 49 | def test_reweigth(): 50 | ws = reweight([0.7, 0.2, 0.1], 4) 51 | assert len(ws) == 3 52 | ws = reweight([0.7, 0.2, 0.1], 3) 53 | assert len(ws) == 3 54 | ws = reweight([0.7, 0.2, 0.1], 2) 55 | assert len(ws) == 2 56 | ws = reweight([0.7, 0.2, 0.1], 1) 57 | assert len(ws) == 1 58 | 59 | 60 | if __name__ == "__main__": 61 | test_sequence_match() 62 | -------------------------------------------------------------------------------- /tests/unit_tests/similarity_evaluation/test_evaluation_string.py: -------------------------------------------------------------------------------- 1 | import math 2 | 3 | from gptcache.adapter.api import _get_eval 4 | from gptcache.similarity_evaluation import ExactMatchEvaluation 5 | 6 | def _test_evaluation(evaluation): 7 | range_min, range_max = evaluation.range() 8 | assert math.isclose(range_min, 0.0) 9 | assert math.isclose(range_max, 1.0) 10 | 11 | score = evaluation.evaluation({"question": "hello"}, {"question": "hello"}) 12 | assert math.isclose(score, 1.0) 13 | 14 | score = evaluation.evaluation({"question": "tello"}, {"question": "hello"}) 15 | assert math.isclose(score, 0.0) 16 | 17 | 18 | def test_exact_match_evaluation(): 19 | evaluation = ExactMatchEvaluation() 20 | _test_evaluation(evaluation) 21 | 22 | 23 | def test_get_eval(): 24 | evaluation = _get_eval("exact") 25 | _test_evaluation(evaluation) 26 | -------------------------------------------------------------------------------- /tests/unit_tests/similarity_evaluation/test_evalution_time.py: -------------------------------------------------------------------------------- 1 | import datetime 2 | 3 | from gptcache.manager.scalar_data.base import CacheData 4 | from gptcache.similarity_evaluation import TimeEvaluation 5 | 6 | 7 | def test_evaluation_time(): 8 | eval = TimeEvaluation("distance", {}, time_range=2) 9 | assert eval.range() == (0.0, 4.0) 10 | 11 | similarity = eval.evaluation({}, {"search_result": (3.5, None)}) 12 | assert similarity == 0.0 13 | 14 | similarity = eval.evaluation( 15 | {}, {"search_result": (3.5, None), "cache_data": CacheData("a", "b")} 16 | ) 17 | assert similarity == 0.0 18 | 19 | similarity = eval.evaluation( 20 | {}, 21 | { 22 | "search_result": (3.5, None), 23 | "cache_data": CacheData("a", "b", create_on=datetime.datetime(2022, 1, 1)), 24 | }, 25 | ) 26 | assert similarity == 0.0 27 | 28 | similarity = eval.evaluation( 29 | {}, 30 | { 31 | "search_result": (3.5, None), 32 | "cache_data": CacheData("a", "b", create_on=datetime.datetime.now()), 33 | }, 34 | ) 35 | assert similarity == 0.5 36 | -------------------------------------------------------------------------------- /tests/unit_tests/similarity_evaluation/test_np.py: -------------------------------------------------------------------------------- 1 | import math 2 | 3 | import numpy as np 4 | 5 | from gptcache.adapter.api import _get_eval 6 | from gptcache.similarity_evaluation import NumpyNormEvaluation 7 | 8 | 9 | embedding_func = lambda x: np.array([1, 1]) 10 | 11 | 12 | def _test_evaluation(evaluation): 13 | 14 | range_min, range_max = evaluation.range() 15 | # print(range_max) 16 | assert math.isclose(range_min, 0.0) 17 | assert math.isclose(range_max, 2.0) 18 | 19 | score = evaluation.evaluation( 20 | {"embedding": np.array([-0.5, -0.5])}, {"embedding": np.array([1, 1])} 21 | ) 22 | assert math.isclose(score, 0.0, abs_tol=0.001), score 23 | 24 | score = evaluation.evaluation( 25 | {"embedding": np.array([1, 2, 3, 4])}, 26 | {"embedding": np.array([0.1, 0.2, 0.3, 0.4])}, 27 | ) 28 | 29 | assert math.isclose(score, 2.0, abs_tol=0.001), score 30 | 31 | score = evaluation.evaluation( 32 | {"question": "test"}, 33 | {"question": "test"} 34 | ) 35 | assert math.isclose(score, 2.0), score 36 | 37 | score = evaluation.evaluation( 38 | {"question": "test1"}, 39 | {"question": "test2"} 40 | ) 41 | assert math.isclose(score, 2.0), score 42 | 43 | 44 | def test_norm(): 45 | evaluation = NumpyNormEvaluation(enable_normal=True, question_embedding_function=embedding_func) 46 | _test_evaluation(evaluation) 47 | 48 | 49 | def test_get_eval(): 50 | evaluation = _get_eval(strategy="numpy", kws={"enable_normal": True, "question_embedding_function": embedding_func}) 51 | 52 | 53 | if __name__ == "__main__": 54 | test_norm() 55 | -------------------------------------------------------------------------------- /tests/unit_tests/similarity_evaluation/test_simple.py: -------------------------------------------------------------------------------- 1 | import math 2 | 3 | from gptcache.adapter.api import _get_eval 4 | from gptcache.similarity_evaluation import SearchDistanceEvaluation 5 | 6 | 7 | def _test_evaluation_default(evaluation): 8 | range_min, range_max = evaluation.range() 9 | assert math.isclose(range_min, 0.0) 10 | assert math.isclose(range_max, 4.0) 11 | 12 | score = evaluation.evaluation({}, {"search_result": (1, None)}) 13 | assert math.isclose(score, 3.0) 14 | 15 | score = evaluation.evaluation({}, {"search_result": (-1, None)}) 16 | assert math.isclose(score, 4.0) 17 | 18 | 19 | def _test_evaluation_config(evaluation): 20 | range_min, range_max = evaluation.range() 21 | assert math.isclose(range_min, 0.0) 22 | assert math.isclose(range_max, 10.0) 23 | 24 | score = evaluation.evaluation({}, {"search_result": (5, None)}) 25 | assert math.isclose(score, 5.0) 26 | score = evaluation.evaluation({}, {"search_result": (20, None)}) 27 | assert math.isclose(score, 10.0) 28 | 29 | 30 | def test_search_distance_evaluation(): 31 | evaluation = SearchDistanceEvaluation() 32 | _test_evaluation_default(evaluation) 33 | 34 | evaluation = SearchDistanceEvaluation(max_distance=10, positive=True) 35 | _test_evaluation_config(evaluation) 36 | 37 | 38 | def test_get_eval(): 39 | evaluation = _get_eval("distance") 40 | _test_evaluation_default(evaluation) 41 | 42 | evaluation = _get_eval(strategy="distance", kws = {"max_distance": 10, "positive": True}) 43 | _test_evaluation_config(evaluation) 44 | -------------------------------------------------------------------------------- /tests/unit_tests/test_client.py: -------------------------------------------------------------------------------- 1 | from unittest.mock import patch, Mock 2 | 3 | from gptcache.utils import import_httpx 4 | 5 | import_httpx() 6 | from gptcache.client import Client 7 | 8 | 9 | def test_client(): 10 | client = Client() 11 | with patch("httpx.AsyncClient.post") as mock_response: 12 | mock_response.return_value = Mock(status_code=200) 13 | status_code = client.put("Hi", "Hi back") 14 | assert status_code == 200 15 | 16 | with patch("httpx.AsyncClient.post") as mock_response: 17 | m = Mock() 18 | attrs = {"json.return_value": {"answer": "Hi back"}} 19 | m.configure_mock(**attrs) 20 | mock_response.return_value = m 21 | ans = client.get("Hi") 22 | assert ans == "Hi back" 23 | -------------------------------------------------------------------------------- /tests/unit_tests/test_core.py: -------------------------------------------------------------------------------- 1 | import time 2 | 3 | from gptcache import cache, Config 4 | from gptcache.report import Report 5 | from gptcache.utils.cache_func import cache_all 6 | from gptcache.utils.time import time_cal 7 | 8 | 9 | def test_time_cal(): 10 | def log_time_func(fname, delta_time): 11 | assert fname == "unit_test" 12 | assert delta_time > 0.1 13 | 14 | cache.config = Config(log_time_func=log_time_func) 15 | 16 | @time_cal 17 | def time_cal_annotation(): 18 | time.sleep(0.2) 19 | 20 | func_name = "test_time_cal" 21 | 22 | def log_time_func(fname, delta_time): 23 | assert fname == func_name 24 | assert delta_time > 0.1 25 | 26 | cache.config = Config(log_time_func=log_time_func) 27 | 28 | def report_func(delta_time): 29 | assert delta_time > 0.1 30 | 31 | def time_cal_without_annotation(): 32 | time.sleep(0.2) 33 | 34 | time_cal( 35 | time_cal_without_annotation, func_name=func_name, report_func=report_func 36 | )() 37 | 38 | cache.config = None 39 | 40 | 41 | def test_cache_all(): 42 | assert cache_all() 43 | 44 | 45 | def test_report(): 46 | report = Report() 47 | report.embedding(1) 48 | report.embedding(3) 49 | report.search(2) 50 | report.search(4) 51 | report.hint_cache() 52 | report.hint_cache() 53 | 54 | assert report.average_embedding_time() == 2 55 | assert report.op_embedding.count == 2 56 | assert report.average_search_time() == 3 57 | assert report.op_search.count == 2 58 | assert report.hint_cache_count == 2 59 | -------------------------------------------------------------------------------- /tests/unit_tests/utils/test_error.py: -------------------------------------------------------------------------------- 1 | from gptcache.utils.error import ( 2 | CacheError, 3 | NotInitError, 4 | NotFoundError, 5 | ParamError, 6 | ) 7 | 8 | 9 | def test_error_type(): 10 | not_init_error = NotInitError() 11 | assert issubclass(type(not_init_error), CacheError) 12 | 13 | not_found_store_error = NotFoundError("unittest", "test_error_type") 14 | assert issubclass(type(not_found_store_error), CacheError) 15 | 16 | param_error = ParamError("unittest") 17 | assert issubclass(type(param_error), CacheError) 18 | 19 | 20 | def test_wrap(): 21 | import openai 22 | 23 | from gptcache.utils.error import wrap_error 24 | 25 | def raise_error(): 26 | try: 27 | raise openai.error.OpenAIError(message="test") 28 | except openai.error.OpenAIError as e: 29 | raise wrap_error(e) 30 | 31 | is_exception = False 32 | try: 33 | raise_error() 34 | except openai.error.OpenAIError as e: 35 | is_exception = True 36 | 37 | assert is_exception 38 | -------------------------------------------------------------------------------- /tests/unit_tests/utils/test_log.py: -------------------------------------------------------------------------------- 1 | from gptcache.utils.log import gptcache_log 2 | 3 | 4 | def test_error_type(): 5 | gptcache_log.setLevel("INFO") 6 | gptcache_log.error("Cache log error.") 7 | gptcache_log.warning("Cache log warning.") 8 | gptcache_log.info("Cache log info.") 9 | assert gptcache_log.level == 20 10 | -------------------------------------------------------------------------------- /tests/unit_tests/utils/test_response.py: -------------------------------------------------------------------------------- 1 | from gptcache.utils.response import ( 2 | get_message_from_openai_answer, 3 | get_stream_message_from_openai_answer, 4 | ) 5 | 6 | 7 | def test_get_message_from_openai_answer(): 8 | message = get_message_from_openai_answer( 9 | { 10 | "choices": [ 11 | { 12 | "finish_reason": "stop", 13 | "index": 0, 14 | "message": {"content": "hello", "role": "assistant"}, 15 | } 16 | ], 17 | "created": 1677825456, 18 | "id": "chatcmpl-6ptKqrhgRoVchm58Bby0UvJzq2ZuQ", 19 | "model": "gpt-3.5-turbo-0301", 20 | "object": "chat.completion", 21 | "usage": { 22 | "completion_tokens": 301, 23 | "prompt_tokens": 36, 24 | "total_tokens": 337, 25 | }, 26 | } 27 | ) 28 | assert message == "hello" 29 | 30 | 31 | def test_get_stream_message_from_openai_answer(): 32 | message = get_stream_message_from_openai_answer( 33 | { 34 | "choices": [ 35 | {"delta": {"role": "assistant"}, "finish_reason": None, "index": 0} 36 | ], 37 | "created": 1677825464, 38 | "id": "chatcmpl-6ptKyqKOGXZT6iQnqiXAH8adNLUzD", 39 | "model": "gpt-3.5-turbo-0301", 40 | "object": "chat.completion.chunk", 41 | } 42 | ) 43 | assert message == "" 44 | 45 | message = get_stream_message_from_openai_answer( 46 | { 47 | "choices": [{"delta": {"content": "2"}, "finish_reason": None, "index": 0}], 48 | "created": 1677825464, 49 | "id": "chatcmpl-6ptKyqKOGXZT6iQnqiXAH8adNLUzD", 50 | "model": "gpt-3.5-turbo-0301", 51 | "object": "chat.completion.chunk", 52 | } 53 | ) 54 | assert message == "2" 55 | --------------------------------------------------------------------------------