├── .github
    ├── ISSUE_TEMPLATE
    │   ├── bug_report.yaml
    │   ├── documentation_improvement.yaml
    │   ├── enhancement.yaml
    │   └── feature_request.yaml
    ├── mergify.yml
    └── workflows
    │   ├── Nightly_CI_main.yaml
    │   ├── build_dev_python_package.yaml
    │   ├── publish_dev_package.yaml
    │   ├── publish_release_image.yaml
    │   ├── publish_release_package.yaml
    │   ├── pylint.yaml
    │   └── unit_test_main.yaml
├── .gitignore
├── LICENSE
├── MANIFEST.in
├── Makefile
├── OWNERS
├── README.md
├── cache_config_template.yml
├── codecov.yml
├── docs
    ├── .readthedocs.yaml
    ├── GPT-Cache-Multinode.png
    ├── GPTCache-Distributed-Search.png
    ├── GPTCache-Local-Search.png
    ├── GPTCache.png
    ├── GPTCacheStructure.png
    ├── Makefile
    ├── _exts
    │   ├── docgen2.py
    │   └── index_con.py
    ├── _templates
    │   ├── author.html
    │   ├── copyright.html
    │   ├── function.rst
    │   └── index.rst
    ├── bootcamp
    │   ├── assets
    │   │   ├── image_generation_gradio.png
    │   │   ├── speech_to_text_gradio.png
    │   │   └── vqa.png
    │   ├── langchain
    │   │   ├── baby_agi.ipynb
    │   │   ├── index.rst
    │   │   ├── qa_generation.ipynb
    │   │   ├── question_answering.ipynb
    │   │   └── sqlite.ipynb
    │   ├── llama_index
    │   │   ├── index.rst
    │   │   └── webpage_qa.ipynb
    │   ├── openai
    │   │   ├── chat.ipynb
    │   │   ├── image_generation.ipynb
    │   │   ├── index.rst
    │   │   ├── language_translate.ipynb
    │   │   ├── speech_to_text.ipynb
    │   │   ├── sql_translate.ipynb
    │   │   └── tweet_classifier.ipynb
    │   ├── replicate
    │   │   ├── index.rst
    │   │   └── visual_question_answering.ipynb
    │   ├── streamlit
    │   │   ├── gptcache-streamlit-audio
    │   │   │   ├── .streamlit
    │   │   │   │   └── config.toml
    │   │   │   ├── README.md
    │   │   │   ├── audio.py
    │   │   │   ├── example.png
    │   │   │   ├── local
    │   │   │   │   └── .cache
    │   │   │   └── requirements.txt
    │   │   └── gptcache-streamlit-image
    │   │   │   ├── README.md
    │   │   │   ├── example.png
    │   │   │   ├── imagen.py
    │   │   │   ├── local
    │   │   │       └── .cache
    │   │   │   └── requirements.txt
    │   ├── temperature
    │   │   ├── chat.ipynb
    │   │   ├── create_image.ipynb
    │   │   └── index.rst
    │   └── vertex
    │   │   ├── index.rst
    │   │   └── vertexai_caching.ipynb
    ├── conf.py
    ├── configure_it.md
    ├── contributing.md
    ├── feature.md
    ├── gptcache_live.pdf
    ├── horizontal-scaling-usage.md
    ├── index.rst
    ├── make.bat
    ├── references
    │   └── index.rst
    ├── release_note.md
    ├── requirements.txt
    ├── toc.bak
    └── usage.md
├── examples
    ├── README.md
    ├── adapter
    │   ├── api.py
    │   ├── langchain_llms.py
    │   └── openai_chatgpt.py
    ├── benchmark
    │   ├── benchmark_sqlite_faiss_onnx.py
    │   ├── mock_data.json
    │   ├── similiar_qqp.json.gz
    │   └── similiar_qqp_full.json.gz
    ├── context_process
    │   ├── selective_context.py
    │   └── summarization_context.py
    ├── data_manager
    │   ├── map_manager.py
    │   ├── scalar_store.py
    │   └── vector_store.py
    ├── embedding
    │   ├── default.py
    │   ├── onnx.py
    │   ├── paddlenlp.py
    │   └── random.py
    ├── eviction
    │   └── distributed_eviction.py
    ├── integrate
    │   ├── diffusers
    │   │   └── stable_diffusion.py
    │   ├── dolly
    │   │   └── basic_usage.py
    │   ├── langchain
    │   │   ├── langchain_llms_mock.py
    │   │   ├── langchain_prompt_openai.py
    │   │   ├── langchain_qa_chain.py
    │   │   └── langchain_similaritycache_openai.py
    │   ├── llama_cpp
    │   │   └── basic_usage.py
    │   ├── openai
    │   │   ├── basic_usage.py
    │   │   ├── create_image.py
    │   │   ├── qa.py
    │   │   ├── readme.py
    │   │   └── summarize.py
    │   ├── replicate
    │   │   └── vqa.py
    │   └── stability
    │   │   └── text_to_image.py
    ├── processor
    │   └── temperature_example.py
    ├── session
    │   └── session.py
    ├── similarity_evaluation
    │   ├── exact_match.py
    │   ├── onnx.py
    │   ├── search_distance.py
    │   └── sequence_match.py
    └── vqa_demo.py
├── gptcache
    ├── __init__.py
    ├── adapter
    │   ├── __init__.py
    │   ├── adapter.py
    │   ├── api.py
    │   ├── base.py
    │   ├── diffusers.py
    │   ├── dolly.py
    │   ├── langchain_models.py
    │   ├── llama_cpp.py
    │   ├── minigpt4.py
    │   ├── openai.py
    │   ├── replicate.py
    │   └── stability_sdk.py
    ├── client.py
    ├── config.py
    ├── core.py
    ├── embedding
    │   ├── __init__.py
    │   ├── base.py
    │   ├── cohere.py
    │   ├── data2vec.py
    │   ├── fasttext.py
    │   ├── huggingface.py
    │   ├── langchain.py
    │   ├── onnx.py
    │   ├── openai.py
    │   ├── paddlenlp.py
    │   ├── rwkv.py
    │   ├── sbert.py
    │   ├── string.py
    │   ├── timm.py
    │   ├── uform.py
    │   └── vit.py
    ├── manager
    │   ├── __init__.py
    │   ├── data_manager.py
    │   ├── eviction
    │   │   ├── __init__.py
    │   │   ├── base.py
    │   │   ├── distributed_cache.py
    │   │   ├── manager.py
    │   │   ├── memory_cache.py
    │   │   └── redis_eviction.py
    │   ├── eviction_manager.py
    │   ├── factory.py
    │   ├── object_data
    │   │   ├── __init__.py
    │   │   ├── base.py
    │   │   ├── local_storage.py
    │   │   ├── manager.py
    │   │   └── s3_storage.py
    │   ├── scalar_data
    │   │   ├── __init__.py
    │   │   ├── base.py
    │   │   ├── dynamo_storage.py
    │   │   ├── manager.py
    │   │   ├── mongo.py
    │   │   ├── redis_storage.py
    │   │   └── sql_storage.py
    │   └── vector_data
    │   │   ├── __init__.py
    │   │   ├── base.py
    │   │   ├── chroma.py
    │   │   ├── docarray_index.py
    │   │   ├── faiss.py
    │   │   ├── hnswlib_store.py
    │   │   ├── manager.py
    │   │   ├── milvus.py
    │   │   ├── pgvector.py
    │   │   ├── qdrant.py
    │   │   ├── redis_vectorstore.py
    │   │   ├── usearch.py
    │   │   └── weaviate.py
    ├── processor
    │   ├── __init__.py
    │   ├── check_hit.py
    │   ├── context
    │   │   ├── __init__.py
    │   │   ├── concat_context.py
    │   │   ├── context.py
    │   │   ├── selective_context.py
    │   │   └── summarization_context.py
    │   ├── post.py
    │   └── pre.py
    ├── report.py
    ├── session.py
    ├── similarity_evaluation
    │   ├── __init__.py
    │   ├── cohere_rerank.py
    │   ├── distance.py
    │   ├── exact_match.py
    │   ├── kreciprocal.py
    │   ├── np.py
    │   ├── onnx.py
    │   ├── sbert_crossencoder.py
    │   ├── sequence_match.py
    │   ├── similarity_evaluation.py
    │   └── time.py
    └── utils
    │   ├── __init__.py
    │   ├── cache_func.py
    │   ├── dependency_control.py
    │   ├── error.py
    │   ├── lazy_import.py
    │   ├── log.py
    │   ├── response.py
    │   ├── softmax.py
    │   ├── time.py
    │   └── token.py
├── gptcache_server
    ├── __init__.py
    ├── dockerfiles
    │   └── Dockerfile
    └── server.py
├── pylint.conf
├── requirements.txt
├── scripts
    ├── manage_conda_env.sh
    └── remove_example_cache.sh
├── setup.py
└── tests
    ├── integration_tests
        ├── base
        │   └── client_base.py
        ├── common
        │   ├── common_func.py
        │   └── common_type.py
        ├── config
        │   └── log_config.py
        ├── examples
        │   ├── map
        │   │   └── test_example_map.py
        │   ├── sqlite_faiss_mock
        │   │   └── test_example_sqlite_faiss.py
        │   └── sqlite_faiss_onnx
        │   │   └── test_example_sqlite_faiss_onnx.py
        ├── processor
        │   └── pre
        │   │   └── test_pre_without_prompt.py
        ├── test_redis_onnx.py
        ├── test_sqlite_faiss_onnx.py
        ├── test_sqlite_milvus_sbert.py
        └── utils
        │   └── util_log.py
    ├── pytest.ini
    ├── requirements.txt
    └── unit_tests
        ├── adapter
            ├── test_adapter.py
            ├── test_api.py
            ├── test_diffusers.py
            ├── test_dolly.py
            ├── test_langchain_models.py
            ├── test_llama_cpp.py
            ├── test_openai.py
            ├── test_replicate.py
            └── test_stability.py
        ├── embedding
            ├── test_cohere.py
            ├── test_data2vec.py
            ├── test_embedding_openai.py
            ├── test_embedding_string.py
            ├── test_fasttext.py
            ├── test_huggingface.py
            ├── test_langchain.py
            ├── test_onnx.py
            ├── test_paddlenlp.py
            ├── test_rwkv.py
            ├── test_sbert.py
            ├── test_timm.py
            ├── test_uform.py
            └── test_vit.py
        ├── eviction
            ├── test_distributed_cache.py
            └── test_memory_cache.py
        ├── manager
            ├── test_base.py
            ├── test_chromadb.py
            ├── test_dynamo_storage.py
            ├── test_eviction.py
            ├── test_factory.py
            ├── test_local_index.py
            ├── test_map.py
            ├── test_milvusdb.py
            ├── test_mongo.py
            ├── test_object_storage.py
            ├── test_pgvector.py
            ├── test_qdrant.py
            ├── test_redis.py
            ├── test_redis_cache_storage.py
            ├── test_sql_scalar.py
            ├── test_usearch.py
            └── test_weaviate.py
        ├── processor
            ├── test_concat_context.py
            ├── test_context.py
            ├── test_post.py
            ├── test_pre.py
            ├── test_selective_context.py
            └── test_summarize_context.py
        ├── similarity_evaluation
            ├── test_cohere_rerank.py
            ├── test_evaluation_kreciprocal.py
            ├── test_evaluation_onnx.py
            ├── test_evaluation_sbert.py
            ├── test_evaluation_sequence.py
            ├── test_evaluation_string.py
            ├── test_evalution_time.py
            ├── test_np.py
            └── test_simple.py
        ├── test_client.py
        ├── test_core.py
        ├── test_session.py
        └── utils
            ├── test_error.py
            ├── test_log.py
            └── test_response.py


/.github/ISSUE_TEMPLATE/bug_report.yaml:
--------------------------------------------------------------------------------
 1 | name: 🐞 Bug Report
 2 | description: Create a bug report to help us improve GPTCache
 3 | title: "[Bug]: "
 4 | body:
 5 | - type: markdown
 6 |   attributes:
 7 |     value: |
 8 |       Thanks for taking the time to fill out this bug report! Please fill the form in English!
 9 | - type: textarea
10 |   attributes:
11 |     label: Current Behavior
12 |     description: A concise description of what you're experiencing.
13 |     placeholder: |
14 |       When I do <X>, <Y> happens and I see the error message attached below:
15 |       ```...```
16 |   validations:
17 |     required: true
18 | - type: textarea
19 |   attributes:
20 |     label: Expected Behavior
21 |     description: A concise description of what you expected to happen.
22 |     placeholder: When I do <X>, <Z> should happen instead.
23 |   validations:
24 |     required: false
25 | - type: textarea
26 |   attributes:
27 |     label: Steps To Reproduce
28 |     description: Steps to reproduce the behavior.
29 |     placeholder: |
30 |       1. In this environment...
31 |       2. With this config...
32 |       3. Run '...'
33 |       4. See error...
34 |     render: markdown
35 |   validations:
36 |     required: false
37 | - type: textarea
38 |   attributes:
39 |     label: Environment
40 |     description: |
41 |       Enter the Environment Details:
42 |     render: markdown
43 |   validations:
44 |     required: false
45 | - type: textarea
46 |   attributes:
47 |     label: Anything else?
48 |     description: |
49 |       Links? References? Anything that will give us more context about the issue you are encountering!
50 |   validations:
51 |     required: false


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/documentation_improvement.yaml:
--------------------------------------------------------------------------------
 1 | name: 📖 Documentation Improvement
 2 | description: Suggest improvements to our documentation
 3 | title: "[DOCS]: "
 4 | labels: [Documentation]
 5 | body:
 6 | - type: markdown
 7 |   attributes:
 8 |     value: |
 9 |       Thanks for taking the time to fill out this documentation improvement request!
10 | - type: textarea
11 |   attributes:
12 |     label: Documentation Link
13 |     description: Add a link to the page which needs improvement (if relevant)
14 |   validations:
15 |     required: false
16 | - type: textarea
17 |   attributes:
18 |     label: Describe the problem
19 |     description: Is the documentation missing? Or is it confusing? Why is it confusing?
20 |   validations:
21 |     required: false
22 | - type: textarea
23 |   attributes:
24 |     label: Describe the improvement
25 |     description: A clear and concise description of the improvement.
26 |   validations:
27 |     required: false
28 | - type: textarea
29 |   attributes:
30 |     label: Anything else?
31 |     description: |
32 |       Links? References? Anything that will give us more context about the issue you are encountering!
33 |   validations:
34 |     required: false


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/enhancement.yaml:
--------------------------------------------------------------------------------
 1 | name: Enhancement Request
 2 | description: As a developer, I want to make an enhancement for GPTCache
 3 | title: "[Enhancement]:"
 4 | labels: [kind/enhancement]
 5 | body:
 6 | - type: markdown
 7 |   attributes:
 8 |     value: |
 9 |       Thanks for taking the time to request/suggest an enhancement for GPTCache! Please fill the form in English!
10 | - type: textarea
11 |   attributes:
12 |     label: What would you like to be added?
13 |     description: A concise description of what you're expecting/suggesting.
14 |     placeholder: |
15 |       I would like to suggest/request a feature that's like...
16 |   validations:
17 |     required: false
18 | - type: textarea
19 |   attributes:
20 |     label: Why is this needed?
21 |     description: A concise description of the reason/motivation
22 |   validations:
23 |     required: false
24 | - type: textarea
25 |   attributes:
26 |     label: Anything else?
27 |     description: |
28 |       Links? References? Anything that will give us more context about this!
29 |   validations:
30 |     required: false


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/feature_request.yaml:
--------------------------------------------------------------------------------
 1 | name: 🚀 Feature Request
 2 | description: As a user, I want to request a feature for GPTCache
 3 | title: "[Feature]:"
 4 | labels: [kind/feature]
 5 | body:
 6 | - type: markdown
 7 |   attributes:
 8 |     value: |
 9 |       Thanks for taking the time to request a feature for GPTCache! Please fill the form in English!
10 | - type: textarea
11 |   attributes:
12 |     label: Is your feature request related to a problem? Please describe.
13 |     description: A concise description of the problem you are facing or the motivetion behind this feature request.
14 |     placeholder: |
15 |       I faced a problem due to which ...
16 |   validations:
17 |     required: false
18 | - type: textarea
19 |   attributes:
20 |     label: Describe the solution you'd like.
21 |     description: A concise description of the solution for the issue.
22 |   validations:
23 |     required: false
24 | - type: textarea
25 |   attributes:
26 |     label: Describe an alternate solution.
27 |     description: Is there any other approack to solve the problem?
28 |   validations:
29 |     required: false
30 | - type: textarea
31 |   attributes:
32 |     label: Anything else? (Additional Context)
33 |     description: |
34 |       Links? References? Anything that will give us more context about this!
35 |   validations:
36 |     required: false


--------------------------------------------------------------------------------
/.github/workflows/Nightly_CI_main.yaml:
--------------------------------------------------------------------------------
 1 | name: Nightly CI
 2 | 
 3 | on:
 4 |   schedule:
 5 |     # * is a special character in YAML so you have to quote this string
 6 |     #        ┌───────────── minute (0 - 59)
 7 |     #        │ ┌───────────── hour (0 - 23)
 8 |     #        │ │ ┌───────────── day of the month (1 - 31)
 9 |     #        │ │ │ ┌───────────── month (1 - 12 or JAN-DEC)
10 |     #        │ │ │ │ ┌───────────── day of the week (0 - 6 or SUN-SAT)
11 |     #        │ │ │ │ │
12 |     #        │ │ │ │ │
13 |     #        │ │ │ │ │
14 |     - cron: "0 22 * * *"
15 |   pull_request:
16 |     branches:
17 |       - main
18 |       - dev
19 |     paths:
20 |       - '.github/workflows/Nightly_CI_main.yaml'
21 | 
22 | jobs:
23 |   nightly-CI-gpt-cache-cli-main:
24 |     runs-on: ubuntu-20.04
25 |     strategy:
26 |       fail-fast: false
27 |     services:
28 |       postgres:
29 |         image: ankane/pgvector
30 |         ports:
31 |           - 5432:5432
32 |         env:
33 |           POSTGRES_PASSWORD: postgres
34 |         options: --health-cmd pg_isready --health-interval 10s --health-timeout 5s --health-retries 5
35 |     steps:
36 |       - uses: actions/checkout@main
37 | 
38 |       - name: Set up Python 3.8
39 |         uses: actions/setup-python@v4
40 |         with:
41 |           python-version: 3.8
42 |           cache: pip
43 | 
44 |       - name: Install dependency
45 |         shell: bash
46 |         run: |
47 |           pip install -r requirements.txt
48 | 
49 |       - name: Build and install
50 |         shell: bash
51 |         run: |
52 |           python setup.py install
53 | 
54 |       - name: Install test dependency
55 |         shell: bash
56 |         working-directory: tests
57 |         run: |
58 |           pip install -r requirements.txt
59 | 
60 |       - name: Download the `en_core_web_sm` model
61 |         shell: bash
62 |         working-directory: tests
63 |         run: |
64 |           python3 -m spacy download en_core_web_sm
65 | 
66 |       - name: Nightly CI Tests
67 |         timeout-minutes: 30
68 |         shell: bash
69 |         working-directory: tests
70 |         run: |
71 |           export IS_CI=true
72 |           export PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION=python
73 |           python3 -m pytest ./ --tags L2
74 | 


--------------------------------------------------------------------------------
/.github/workflows/build_dev_python_package.yaml:
--------------------------------------------------------------------------------
 1 | name: Build dev package
 2 | on:
 3 |   push:
 4 |     branches:
 5 |       - main
 6 |       - dev
 7 |     paths:
 8 |       - 'examples/**'
 9 |       - 'gptcache/**'
10 |       - 'tests/**'
11 |       - '!**.md'
12 |       - '.github/workflows/**'
13 |   pull_request:
14 |     branches:
15 |       - main
16 |       - dev
17 |     paths:
18 |       - 'examples/**'
19 |       - 'gptcache/**'
20 |       - 'tests/**'
21 |       - '!**.md'
22 |       - '.github/workflows/**'
23 |   workflow_dispatch:
24 | 
25 | jobs:
26 |   build-pypi:
27 |     runs-on: ubuntu-20.04
28 |     steps:
29 |     - uses: actions/checkout@master
30 |     - name: Setup Python
31 |       uses: actions/setup-python@v4
32 |       with:
33 |         python-version: 3.8
34 |     # - name: Get history and tags for SCM versioning
35 |     #   run: |
36 |     #       git fetch --prune --unshallow
37 |     #       git fetch --depth=1 origin +refs/tags/*:refs/tags/*
38 |     - name: Install pypa/build
39 |       run: >-
40 |         python -m
41 |         pip install
42 |         build
43 |         --user
44 |     - name: Build a binary wheel and a source tarball
45 |       run: >-
46 |         python -m
47 |         build
48 |         --sdist
49 |         --wheel
50 |         --outdir dist/
51 |         .
52 |     - name: Archive production artifacts
53 |       uses: actions/upload-artifact@v3
54 |       with:
55 |         name: dist
56 |         path: |
57 |           dist
58 |           !dist/**/*.md
59 | 


--------------------------------------------------------------------------------
/.github/workflows/publish_dev_package.yaml:
--------------------------------------------------------------------------------
 1 | name: Publish package to TestPyPI
 2 | # on:
 3 | #   push:
 4 | #     branches:
 5 | #       - 'main'
 6 | #     paths:
 7 | #       - 'gptcache/**'
 8 | #       - '!**.md'
 9 | #       - '.github/workflows/publish_dev_package.yaml'
10 | #   pull_request:
11 | #     branches:
12 | #       - main
13 | #     paths:
14 | #       - '.github/workflows/publish_dev_package.yaml'
15 | on:
16 |   workflow_dispatch:
17 | 
18 |   schedule:
19 |     # * is a special character in YAML so you have to quote this string
20 |     #         ┌───────────── minute (0 - 59)
21 |     #         │ ┌───────────── hour (0 - 23)
22 |     #         │ │ ┌───────────── day of the month (1 - 31)
23 |     #         │ │ │ ┌───────────── month (1 - 12 or JAN-DEC)
24 |     #         │ │ │ │ ┌───────────── day of the week (0 - 6 or SUN-SAT)
25 |     #         │ │ │ │ │
26 |     #         │ │ │ │ │
27 |     #         │ │ │ │ │
28 |     - cron:  '30 9 * * 1,4'
29 | 
30 | 
31 | jobs:
32 |   build-and-publish-dev:
33 |     runs-on: ubuntu-20.04
34 |     steps:
35 |     - uses: actions/checkout@master
36 |     - name: Setup Python
37 |       uses: actions/setup-python@v4
38 |       with:
39 |         python-version: 3.8
40 |     # - name: Get history and tags for SCM versioning
41 |     #   run: |
42 |     #       git fetch --prune --unshallow
43 |     #       git fetch --depth=1 origin +refs/tags/*:refs/tags/*
44 |     - name: Install pypi/build
45 |       run: >-
46 |         python -m
47 |         pip install
48 |         build
49 |         --user
50 |     - name: Build a binary wheel and a source tarball
51 |       run: >-
52 |         python -m
53 |         build
54 |         --sdist
55 |         --wheel
56 |         --outdir dist/
57 |         .
58 |     - name: Publish distribution 📦 to Test PyPI
59 |       uses: pypa/gh-action-pypi-publish@master
60 |       with:
61 |         password: ${{ secrets.PYPI_TEST_TOKEN }}
62 |         repository_url: https://test.pypi.org/legacy/
63 | 


--------------------------------------------------------------------------------
/.github/workflows/publish_release_image.yaml:
--------------------------------------------------------------------------------
 1 | name: Publish image to dockerhub
 2 | on:
 3 |   release:
 4 |     types: [published]
 5 |   workflow_dispatch:
 6 | 
 7 | jobs:
 8 |   build-and-publish-image:
 9 |     runs-on: ubuntu-20.04
10 |     steps:
11 |     - name: Checkout 
12 |       uses: actions/checkout@master
13 | 
14 |     - name: Get version
15 |       id: get_version
16 |       run: echo ::set-output name=VERSION::${GITHUB_REF/refs\/tags\//}
17 | 
18 |     - name: Docker Build
19 |       shell: bash
20 |       run: |
21 |         docker build "./gptcache_server/dockerfiles" -t "zilliz/gptcache:${{ steps.get_version.outputs.VERSION }}"
22 | 
23 |     - name: Docker login 
24 |       if: success()
25 |       uses: azure/docker-login@v1
26 |       with:
27 |         username: ${{ secrets.ZILLIZ_DOCKERHUB_USERNAME }} 
28 |         password: ${{ secrets.ZILLIZ_DOCKERHUB_PASSWORD }}
29 | 
30 |     - name: Docker Push
31 |       if: success()
32 |       continue-on-error: false
33 |       shell: bash
34 |       run: |
35 |         docker push zilliz/gptcache:${{ steps.get_version.outputs.VERSION }}
36 |         docker tag zilliz/gptcache:${{ steps.get_version.outputs.VERSION }} zilliz/gptcache:latest
37 |         docker push zilliz/gptcache:latest


--------------------------------------------------------------------------------
/.github/workflows/publish_release_package.yaml:
--------------------------------------------------------------------------------
 1 | name: Publish package to PyPI
 2 | # on:
 3 | #   push:
 4 | #     branches:
 5 | #       - 'main'
 6 | #     paths:
 7 | #       - 'gptcache/**'
 8 | #       - '!**.md'
 9 | #       - '.github/workflows/publish_dev_package.yaml'
10 | #   pull_request:
11 | #     branches:
12 | #       - main
13 | #     paths:
14 | #       - '.github/workflows/publish_dev_package.yaml'
15 | on:
16 |   workflow_dispatch:
17 | 
18 |   release:
19 |     types: [published]
20 |       # tags:
21 |       #   description: 'Test scenario tags'
22 |       #   required: false
23 |       #   type: boolean
24 | 
25 | jobs:
26 |   build-and-publish-release:
27 |     runs-on: ubuntu-20.04
28 |     steps:
29 |     - uses: actions/checkout@master
30 |     - name: Setup Python
31 |       uses: actions/setup-python@v4
32 |       with:
33 |         python-version: 3.8
34 |     # - name: Get history and tags for SCM versioning
35 |     #   run: |
36 |     #       git fetch --prune --unshallow
37 |     #       git fetch --depth=1 origin +refs/tags/*:refs/tags/*
38 |     - name: Install pypi/build
39 |       run: >-
40 |         python -m
41 |         pip install
42 |         build
43 |         --user
44 |     - name: Build a binary wheel and a source tarball
45 |       run: >-
46 |         python -m
47 |         build
48 |         --sdist
49 |         --wheel
50 |         --outdir dist/
51 |         .
52 |     - name: Publish distribution 📦 to Test PyPI
53 |       uses: pypa/gh-action-pypi-publish@master
54 |       with:
55 |         password: ${{ secrets.PYPI_TOKEN }}


--------------------------------------------------------------------------------
/.github/workflows/pylint.yaml:
--------------------------------------------------------------------------------
 1 | name: Pylint
 2 | on:
 3 |   push:
 4 |     branches:
 5 |       - main
 6 |       - dev
 7 |       # file paths to consider in the event
 8 |     paths:
 9 |       - 'examples/**'
10 |       - 'gptcache/**'
11 |       - 'tests/**'
12 |       - 'docs/**'
13 |       - '!**.md'
14 |       - '.github/workflows/**'
15 |   pull_request:
16 |     branches:
17 |       - main
18 |       - dev
19 |       # file paths to consider in the event
20 |     paths:
21 |       - 'examples/**'
22 |       - 'gptcache/**'
23 |       - 'tests/**'
24 |       - 'docs/**'
25 |       - '!**.md'
26 |       - '.github/workflows/**'
27 |   workflow_dispatch:
28 | 
29 | jobs:
30 |   pylint:
31 |     runs-on: ubuntu-20.04
32 |     steps:
33 |     - uses: actions/checkout@v3.0.0
34 |     - name: Setup Python
35 |       uses: actions/setup-python@v4
36 |       with:
37 |         python-version: 3.8
38 |     - name: Python pylint
39 |       run: |
40 |         pip install pylint==2.10.2
41 |         make pylint_check
42 |     - name: Make the readthedoc html
43 |       shell: bash
44 |       working-directory: docs
45 |       run: |
46 |         pip install -r requirements.txt
47 |         make html
48 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | The MIT License (MIT)
 2 | 
 3 | Copyright (c) Zilliz
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/MANIFEST.in:
--------------------------------------------------------------------------------
1 | include requirements.txt
2 | 


--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
 1 | install:
 2 | 	@pip install -r requirements.txt
 3 | 	@python setup.py install
 4 | 
 5 | pip_upgrade:
 6 | 	@python -m pip install --upgrade pip
 7 | 
 8 | package:
 9 | 	@python setup.py sdist bdist_wheel
10 | 
11 | upload:
12 | 	@python -m twine upload dist/*
13 | 
14 | upload_test:
15 | 	@python -m twine upload --repository-url https://test.pypi.org/legacy/ dist/*
16 | 
17 | remove_example_cache:
18 | 	@bash ./scripts/remove_example_cache.sh
19 | 
20 | create_conda_env:
21 | 	@bash ./scripts/manage_conda_env.sh create
22 | 
23 | remove_conda_env:
24 | 	@bash ./scripts/manage_conda_env.sh remove
25 | 
26 | pylint_check:
27 | 	pylint --rcfile=pylint.conf --output-format=colorized gptcache
28 | 
29 | pytest:
30 | 	pytest tests/


--------------------------------------------------------------------------------
/OWNERS:
--------------------------------------------------------------------------------
 1 | filters:
 2 |   ".*":
 3 |     reviewers:
 4 |       - SimFG
 5 |       - xiaofan-luan
 6 |       - cxie
 7 |     approvers:
 8 |       - SimFG
 9 |       - xiaofan-luan
10 |       - cxie
11 | 


--------------------------------------------------------------------------------
/cache_config_template.yml:
--------------------------------------------------------------------------------
 1 | # For `model_src`, `evaluation`, `post_function`, `pre_function`,
 2 | # `storage_config` options, Check README for more.
 3 | 
 4 | embedding:
 5 |     onnx
 6 | embedding_config:
 7 |     # Set model kws here including `model`, `api_key` if needed
 8 | storage_config:
 9 |     data_dir:
10 |         gptcache_data
11 |     manager:
12 |         sqlite,faiss
13 |     vector_params:
14 |         # Set vector storage related params here
15 | evaluation: 
16 |     distance
17 | evaluation_config:
18 |     # Set evaluation metric kws here
19 | pre_function:
20 |     get_prompt
21 | post_function:
22 |     first
23 | config:
24 |     similarity_threshold: 0.8
25 |     # Set other config here
26 | 


--------------------------------------------------------------------------------
/codecov.yml:
--------------------------------------------------------------------------------
 1 | # Configuration File for CodeCov
 2 | codecov:
 3 |   require_ci_to_pass: no
 4 |   notify:
 5 |     require_ci_to_pass: no
 6 |     wait_for_ci: false
 7 | 
 8 | coverage:
 9 |   precision: 2
10 |   round: down
11 |   range: "70...100"
12 | 
13 |   status:
14 |     project:
15 |       default:
16 |         target: 90%
17 |         threshold: 0% #Allow the coverage to drop by threshold%, and posting a success status.
18 |     patch:
19 |       default:
20 |         target: 90%   #target of patch diff
21 |         threshold: 0%
22 |         if_ci_failed: error #success, failure, error, ignore
23 | 
24 | comment:
25 |   layout: "reach, diff, flags, files"
26 |   behavior: default
27 |   require_changes: false
28 |   branches: # branch names that can post comment
29 |     - main
30 |     - dev
31 | 
32 | ignore:
33 |   - "LICENSES"
34 |   - ".git"
35 |   - "*.yml"
36 |   - "*.md"
37 |   - "**/minigpt4.py"
38 | 


--------------------------------------------------------------------------------
/docs/.readthedocs.yaml:
--------------------------------------------------------------------------------
1 | version: 2
2 | 
3 | python:
4 |    version: 3.8
5 |    system_packages: true


--------------------------------------------------------------------------------
/docs/GPT-Cache-Multinode.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zilliztech/GPTCache/48f8e768d7dcd7f66d948ad07914a630a382b45b/docs/GPT-Cache-Multinode.png


--------------------------------------------------------------------------------
/docs/GPTCache-Distributed-Search.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zilliztech/GPTCache/48f8e768d7dcd7f66d948ad07914a630a382b45b/docs/GPTCache-Distributed-Search.png


--------------------------------------------------------------------------------
/docs/GPTCache-Local-Search.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zilliztech/GPTCache/48f8e768d7dcd7f66d948ad07914a630a382b45b/docs/GPTCache-Local-Search.png


--------------------------------------------------------------------------------
/docs/GPTCache.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zilliztech/GPTCache/48f8e768d7dcd7f66d948ad07914a630a382b45b/docs/GPTCache.png


--------------------------------------------------------------------------------
/docs/GPTCacheStructure.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zilliztech/GPTCache/48f8e768d7dcd7f66d948ad07914a630a382b45b/docs/GPTCacheStructure.png


--------------------------------------------------------------------------------
/docs/Makefile:
--------------------------------------------------------------------------------
 1 | # Minimal makefile for Sphinx documentation
 2 | #
 3 | 
 4 | # You can set these variables from the command line, and also
 5 | # from the environment for the first two.
 6 | SPHINXOPTS    ?=
 7 | SPHINXBUILD   ?= sphinx-build
 8 | SPHINXAUTOBUILD   ?= sphinx-autobuild
 9 | SOURCEDIR     = .
10 | BUILDDIR      = _build
11 | 
12 | # Put it first so that "make" without argument is like "make help".
13 | help:
14 | 	@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
15 | 
16 | .PHONY: help Makefile
17 | 
18 | # Catch-all target: route all unknown targets to Sphinx using the new
19 | # "make mode" option.  $(O) is meant as a shortcut for $(SPHINXOPTS).
20 | %: Makefile
21 | 	@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
22 | 


--------------------------------------------------------------------------------
/docs/_exts/index_con.py:
--------------------------------------------------------------------------------
 1 | from m2r2 import convert
 2 | import os
 3 | 
 4 | class IndexCon:
 5 |     
 6 |   def __init__(self, source, output):
 7 |     self.source = source
 8 |     self.output = output
 9 |     self.preprocess()
10 | 
11 |   def preprocess(self):
12 |     with open(self.source, 'r') as f:
13 |       
14 |       # remove the CI link from the file
15 |       lines = f.readlines()
16 |       lines = [line for line in lines if '[CI]' not in line]
17 | 
18 |       # change local links to the ones related to the _build/html directory and extension to .html
19 |       lines = [line.replace('](docs/', '](') for line in lines] 
20 |       lines = [line.replace('.md)', '.html)') for line in lines]
21 | 
22 |       # get the raw text within the <details> tag
23 |       start_details_tag = [line for line in lines if '<details>' in line]
24 |       summary_tag = [line for line in lines if '<summary>' in line]
25 |       end_details_tag = [line for line in lines if '</details>' in line]
26 |       start_details = lines.index(start_details_tag[0])
27 |       summary_line = lines.index(summary_tag[0])
28 |       end_details = lines.index(end_details_tag[0])
29 | 
30 |       before = convert(''.join(lines[:start_details-1]))
31 |       end = convert(''.join(lines[end_details+1:]))
32 | 
33 |       collapse_rst = lines[summary_line+1:end_details]
34 |       collapse_rst = [ "**" + x.split("# ")[1][:-1] + "**\n" if '# ' in x else x for x in collapse_rst]
35 | 
36 |       # print(collapse_rst)
37 | 
38 |       collapse_rst = convert(''.join(collapse_rst))
39 |       collapse_rst = collapse_rst.split("\n")
40 |       collapse_rst = [ '    ' + x for x in collapse_rst]
41 | 
42 |       collapse_rst = [f'\n.. collapse:: Click to SHOW examples\n'] + collapse_rst
43 |       os.remove(self.output)
44 | 
45 |       with open(self.output, 'a') as f:
46 |         f.write(before)
47 |         f.write('\n'.join(collapse_rst))
48 |         f.write(end)
49 |         f.write('\n\n')
50 |         
51 |         with open('toc.bak', 'r') as t:
52 |           f.write(t.read())
53 | 
54 | if __name__ == '__main__':
55 |   index = IndexCon('../../README.md')
56 | 
57 | 


--------------------------------------------------------------------------------
/docs/_templates/author.html:
--------------------------------------------------------------------------------
1 | By <a href="https://zilliz.com/cloud">Zilliz Inc.</a>


--------------------------------------------------------------------------------
/docs/_templates/copyright.html:
--------------------------------------------------------------------------------
1 | <p>© Copyright 2023, <a href="https://zilliz.com/cloud">Zilliz Inc.</a></p>


--------------------------------------------------------------------------------
/docs/_templates/function.rst:
--------------------------------------------------------------------------------
 1 | {{ module_name | cap }}
 2 | {{ module_name | title_bar }}
 3 | 
 4 | .. contents:: Index
 5 | 
 6 | {% for func in funcs  -%}
 7 | {{func[0]}}
 8 | {{ func[0] | section_bar }}
 9 | .. automodule:: {{func[1]}}
10 |    :members:
11 |    :undoc-members:
12 |    :show-inheritance:
13 | 
14 | {% endfor %}


--------------------------------------------------------------------------------
/docs/_templates/index.rst:
--------------------------------------------------------------------------------
 1 | 🥸 API References
 2 | =================
 3 | 
 4 | .. toctree::
 5 |    :maxdepth: 1
 6 |    :caption: Contents:
 7 | 
 8 | {% for module in modules %}
 9 |    {{ module }}
10 | {%- endfor -%}


--------------------------------------------------------------------------------
/docs/bootcamp/assets/image_generation_gradio.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zilliztech/GPTCache/48f8e768d7dcd7f66d948ad07914a630a382b45b/docs/bootcamp/assets/image_generation_gradio.png


--------------------------------------------------------------------------------
/docs/bootcamp/assets/speech_to_text_gradio.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zilliztech/GPTCache/48f8e768d7dcd7f66d948ad07914a630a382b45b/docs/bootcamp/assets/speech_to_text_gradio.png


--------------------------------------------------------------------------------
/docs/bootcamp/assets/vqa.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zilliztech/GPTCache/48f8e768d7dcd7f66d948ad07914a630a382b45b/docs/bootcamp/assets/vqa.png


--------------------------------------------------------------------------------
/docs/bootcamp/langchain/index.rst:
--------------------------------------------------------------------------------
 1 | LangChain
 2 | =================
 3 | 
 4 | .. toctree::
 5 |    :maxdepth: 1
 6 |    :caption: Contents:
 7 | 
 8 | 
 9 |    qa_generation
10 |    question_answering
11 |    sqlite
12 |    baby_agi


--------------------------------------------------------------------------------
/docs/bootcamp/llama_index/index.rst:
--------------------------------------------------------------------------------
1 | Llama Index
2 | =================
3 | 
4 | .. toctree::
5 |    :maxdepth: 1
6 |    :caption: Contents:
7 | 
8 | 
9 |    webpage_qa


--------------------------------------------------------------------------------
/docs/bootcamp/openai/index.rst:
--------------------------------------------------------------------------------
 1 | OpenAI
 2 | =================
 3 | 
 4 | .. toctree::
 5 |    :maxdepth: 1
 6 |    :caption: Contents:
 7 | 
 8 | 
 9 |    chat
10 |    image_generation
11 |    sql_translate
12 |    tweet_classifier
13 |    image_generation
14 |    speech_to_text
15 | 


--------------------------------------------------------------------------------
/docs/bootcamp/replicate/index.rst:
--------------------------------------------------------------------------------
1 | Replicate
2 | =================
3 | 
4 | .. toctree::
5 |    :maxdepth: 1
6 |    :caption: Contents:
7 | 
8 | 
9 |    visual_question_answering


--------------------------------------------------------------------------------
/docs/bootcamp/streamlit/gptcache-streamlit-audio/.streamlit/config.toml:
--------------------------------------------------------------------------------
1 | [theme]
2 | base="dark"
3 | 


--------------------------------------------------------------------------------
/docs/bootcamp/streamlit/gptcache-streamlit-audio/README.md:
--------------------------------------------------------------------------------
 1 | # GPTCache Demo for OpenAI Audio Transcription
 2 | This project demonstrates how [GPTcache](https://github.com/zilliztech/GPTCache) can be used to save costs when using OpenAI’s audio transcription API. It provides a simple Streamlit app that allows users to input an audio file and see the corresponding transcribed text. The app uses a cache to store previously generated transcriptions and reuses them for the same audio file, thus avoiding making duplicate API calls.
 3 | 
 4 | ## Requirements
 5 | * Python 3.6 or later 
 6 | * Dependencies listed in requirements.txt 
 7 | * OpenAI API key
 8 | ## Usage
 9 | 1. Clone the repository to your local machine
10 | Install the required packages: pip install -r requirements.txt
11 | 2. Run the app: streamlit run audio.py
12 | 3. Open the app in your browser at http://localhost:8501
13 | 4. Enter your OpenAI API key and upload an audio file to transcribe, then click “generate” to wait for the transcribed text to appear.
14 | If a cache hit occurred, you should see a message like “cache” at the bottom of the transcribed text.
15 | 
16 | <p align="center">
17 |   <img src="./example.png" alt="example"/>
18 | </p>
19 | 
20 | 
21 | 
22 | 


--------------------------------------------------------------------------------
/docs/bootcamp/streamlit/gptcache-streamlit-audio/example.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zilliztech/GPTCache/48f8e768d7dcd7f66d948ad07914a630a382b45b/docs/bootcamp/streamlit/gptcache-streamlit-audio/example.png


--------------------------------------------------------------------------------
/docs/bootcamp/streamlit/gptcache-streamlit-audio/local/.cache:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zilliztech/GPTCache/48f8e768d7dcd7f66d948ad07914a630a382b45b/docs/bootcamp/streamlit/gptcache-streamlit-audio/local/.cache


--------------------------------------------------------------------------------
/docs/bootcamp/streamlit/gptcache-streamlit-audio/requirements.txt:
--------------------------------------------------------------------------------
1 | gptcache
2 | pillow
3 | streamlit
4 | torch
5 | faiss-cpu
6 | torchaudio
7 | transformers
8 | sqlalchemy
9 | 


--------------------------------------------------------------------------------
/docs/bootcamp/streamlit/gptcache-streamlit-image/README.md:
--------------------------------------------------------------------------------
 1 | # GPTCache Demo for OpenAI Image Generation
 2 | This project demonstrates how [GPTcache](https://github.com/zilliztech/GPTCache) can be used to save costs when using OpenAI’s DALL-E API. It provides a simple Streamlit app that allows users to input a prompt and see the corresponding DALL-E output image. The app uses a cache to store previously generated images and reuses them for the same prompt, thus avoiding making duplicate API calls. There is an online [demo](https://gptcache-openai-image.streamlit.app/) hosted for preview.
 3 | 
 4 | ## Requirements
 5 | * Python 3.6 or later 
 6 | * Dependencies listed in requirements.txt 
 7 | * OpenAI API key
 8 | ## Usage
 9 | 1. Clone the repository to your local machine
10 | Install the required packages: pip install -r requirements.txt
11 | 2. Run the app: streamlit run imagen.py
12 | 3. Open the app in your browser at http://localhost:8501
13 | 4. Enter your OpenAI key and prompt then click “generate” to
14 | wait for the DALL-E output image to appear.
15 | If a cache hit occurred, you should see a message like “cache” at the bottom of the image.
16 | 
17 | <p align="center">
18 |   <img src="./example.png" alt="example"/>
19 | </p>
20 | 
21 | 
22 | 
23 | 


--------------------------------------------------------------------------------
/docs/bootcamp/streamlit/gptcache-streamlit-image/example.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zilliztech/GPTCache/48f8e768d7dcd7f66d948ad07914a630a382b45b/docs/bootcamp/streamlit/gptcache-streamlit-image/example.png


--------------------------------------------------------------------------------
/docs/bootcamp/streamlit/gptcache-streamlit-image/local/.cache:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zilliztech/GPTCache/48f8e768d7dcd7f66d948ad07914a630a382b45b/docs/bootcamp/streamlit/gptcache-streamlit-image/local/.cache


--------------------------------------------------------------------------------
/docs/bootcamp/streamlit/gptcache-streamlit-image/requirements.txt:
--------------------------------------------------------------------------------
1 | gptcache
2 | pillow
3 | streamlit
4 | onnxruntime
5 | faiss-cpu
6 | transformers
7 | sqlalchemy
8 | 


--------------------------------------------------------------------------------
/docs/bootcamp/temperature/index.rst:
--------------------------------------------------------------------------------
 1 | Temperature
 2 | =================
 3 | 
 4 | .. toctree::
 5 |    :maxdepth: 1
 6 |    :caption: Contents:
 7 | 
 8 | 
 9 |    chat
10 |    create_image


--------------------------------------------------------------------------------
/docs/bootcamp/vertex/index.rst:
--------------------------------------------------------------------------------
1 | Vertex
2 | =================
3 | 
4 | .. toctree::
5 |    :maxdepth: 1
6 |    :caption: Contents:
7 | 
8 | 
9 |    vertexai_caching


--------------------------------------------------------------------------------
/docs/feature.md:
--------------------------------------------------------------------------------
 1 | # Feature
 2 | 
 3 | - Support the openai chat completion normal and stream request
 4 | - Get top_k similar search results, it can be set when creating the data manager
 5 | - Support the cache chain, see: `Cache#next_cache`
 6 | 
 7 | ```python
 8 | bak_cache = Cache()
 9 | bak_cache.init()
10 | cache.init(next_cache=bak_cache)
11 | ```
12 | 
13 | - Whether to completely skip the current cache, that is, do not search the cache or save the Chat GPT results, see: `Cache#cache_enable_func`
14 | - In the cache initialization phase, no cache search is performed, but save the result returned by the chat gpt to cache, see: `cache_skip=True` in `create` request
15 | 
16 | ```python
17 | openai.ChatCompletion.create(
18 |     model="gpt-3.5-turbo",
19 |     messages=mock_messages,
20 |     cache_skip=True,
21 | )
22 | ```
23 | 
24 | - Like Lego bricks, custom assemble all modules, including:
25 |   - Adapter: The user interface to adapt different LLM model requests to the GPTCache protocol
26 |   - Pre-processor: Extracts the key information from the request and preprocess
27 |   - Context Buffer: Maintains session context
28 |   - Encoder: Embed the text into a dense vector for similarity search
29 |   - Cache manager: which includes searching, saving, or evicting data
30 |   - Ranker: Evaluate similarity by judging the quality of cached answers
31 |   - Post-processor: Determine which cached answers to the user, and generate the response


--------------------------------------------------------------------------------
/docs/gptcache_live.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zilliztech/GPTCache/48f8e768d7dcd7f66d948ad07914a630a382b45b/docs/gptcache_live.pdf


--------------------------------------------------------------------------------
/docs/make.bat:
--------------------------------------------------------------------------------
 1 | @ECHO OFF
 2 | 
 3 | pushd %~dp0
 4 | 
 5 | REM Command file for Sphinx documentation
 6 | 
 7 | if "%SPHINXBUILD%" == "" (
 8 | 	set SPHINXBUILD=sphinx-build
 9 | )
10 | set SOURCEDIR=.
11 | set BUILDDIR=_build
12 | 
13 | %SPHINXBUILD% >NUL 2>NUL
14 | if errorlevel 9009 (
15 | 	echo.
16 | 	echo.The 'sphinx-build' command was not found. Make sure you have Sphinx
17 | 	echo.installed, then set the SPHINXBUILD environment variable to point
18 | 	echo.to the full path of the 'sphinx-build' executable. Alternatively you
19 | 	echo.may add the Sphinx directory to PATH.
20 | 	echo.
21 | 	echo.If you don't have Sphinx installed, grab it from
22 | 	echo.https://www.sphinx-doc.org/
23 | 	exit /b 1
24 | )
25 | 
26 | if "%1" == "" goto help
27 | 
28 | %SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
29 | goto end
30 | 
31 | :help
32 | %SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
33 | 
34 | :end
35 | popd
36 | 


--------------------------------------------------------------------------------
/docs/references/index.rst:
--------------------------------------------------------------------------------
 1 | 🥸 API References
 2 | =================
 3 | 
 4 | .. toctree::
 5 |    :maxdepth: 1
 6 |    :caption: Contents:
 7 | 
 8 | 
 9 |    gptcache
10 |    processor
11 |    embedding
12 |    utils
13 |    adapter
14 |    manager
15 |    similarity_evaluation


--------------------------------------------------------------------------------
/docs/requirements.txt:
--------------------------------------------------------------------------------
 1 | sphinx
 2 | urllib3<2.0
 3 | pyqt5<5.13
 4 | pyqtwebengine<5.13
 5 | nbsphinx
 6 | autodoc_pydantic
 7 | myst_nb
 8 | sphinx-autobuild
 9 | sphinx_book_theme
10 | sphinx-panels
11 | sphinx_copybutton
12 | m2r2
13 | sphinx_toolbox
14 | 


--------------------------------------------------------------------------------
/docs/toc.bak:
--------------------------------------------------------------------------------
 1 | .. toctree::
 2 |    :maxdepth: 1
 3 |    :caption: Getting Started
 4 |    :name: getting-started
 5 |    :hidden:
 6 | 
 7 |    usage.md
 8 |    feature.md
 9 |    configure_it.md
10 |    release_note.md
11 | 
12 | .. toctree::
13 |    :maxdepth: 1
14 |    :caption: Bootcamp
15 |    :name: bootcamp
16 |    :hidden:
17 | 
18 |    bootcamp/langchain/index
19 |    bootcamp/llama_index/index
20 |    bootcamp/openai/index
21 |    bootcamp/replicate/index
22 |    bootcamp/temperature/index
23 |    bootcamp/vertex/index
24 | 
25 | .. toctree::
26 |    :maxdepth: 1
27 |    :caption: References
28 |    :name: references
29 |    :hidden:
30 | 
31 |    references/index
32 | 
33 | .. toctree::
34 |    :maxdepth: 1
35 |    :caption: Contributing
36 |    :name: contributing
37 |    :hidden:
38 | 
39 |    contributing.md


--------------------------------------------------------------------------------
/examples/adapter/api.py:
--------------------------------------------------------------------------------
 1 | from gptcache import cache, Config, Cache
 2 | from gptcache.adapter.api import put, get, init_similar_cache
 3 | from gptcache.processor.post import nop
 4 | from gptcache.processor.pre import get_prompt
 5 | 
 6 | 
 7 | def run_basic():
 8 |     cache.init(pre_embedding_func=get_prompt)
 9 |     put("hello", "foo")
10 |     print(get("hello"))
11 |     # output: foo
12 | 
13 | 
14 | def run_similar_match():
15 |     inner_cache = Cache()
16 |     init_similar_cache(
17 |         cache_obj=inner_cache, post_func=nop, config=Config(similarity_threshold=0)
18 |     )
19 | 
20 |     put("hello1", "foo1", cache_obj=inner_cache)
21 |     put("hello2", "foo2", cache_obj=inner_cache)
22 |     put("hello3", "foo3", cache_obj=inner_cache)
23 | 
24 |     messages = get("hello", cache_obj=inner_cache, top_k=3)
25 |     print(messages)
26 |     # output: ['foo1', 'foo2', 'foo3']
27 | 
28 | 
29 | if __name__ == "__main__":
30 |     run_basic()
31 |     run_similar_match()
32 | 


--------------------------------------------------------------------------------
/examples/adapter/langchain_llms.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | 
 3 | from langchain import Cohere
 4 | from langchain.llms import OpenAI
 5 | from langchain.chat_models import ChatOpenAI
 6 | from langchain.schema import HumanMessage
 7 | 
 8 | from gptcache.adapter.langchain_models import LangChainLLMs
 9 | from gptcache import cache
10 | from gptcache.processor.pre import get_prompt
11 | 
12 | from gptcache.adapter.langchain_models import LangChainChat
13 | 
14 | OpenAI.api_key = os.getenv("OPENAI_API_KEY")
15 | Cohere.cohere_api_key = os.getenv("COHERE_API_KEY")
16 | 
17 | 
18 | def run_llm():
19 |     cache.init(
20 |         pre_embedding_func=get_prompt,
21 |     )
22 | 
23 |     question = "what is chatgpt"
24 | 
25 |     langchain_openai = OpenAI(model_name="text-ada-001")
26 |     llm = LangChainLLMs(llm=langchain_openai)
27 |     answer = llm(prompt=question)
28 |     print(answer)
29 | 
30 |     # TODO install cohere auto
31 |     langchain_cohere = Cohere()
32 |     llm = LangChainLLMs(llm=langchain_cohere)
33 |     answer = llm(prompt=question)
34 |     print(answer)
35 | 
36 | 
37 | def get_msg(data, **_):
38 |     return data.get("messages")[-1].content
39 | 
40 | 
41 | def run_chat_model():
42 |     cache.init(
43 |         pre_embedding_func=get_msg,
44 |     )
45 | 
46 |     chat = LangChainChat(chat=ChatOpenAI(temperature=0))
47 |     answer = chat(
48 |         messages=[
49 |             HumanMessage(
50 |                 content="Translate this sentence from English to Chinese. I love programming."
51 |             )
52 |         ]
53 |     )
54 |     print(answer)
55 | 
56 | 
57 | if __name__ == "__main__":
58 |     run_llm()
59 |     run_chat_model()
60 | 


--------------------------------------------------------------------------------
/examples/adapter/openai_chatgpt.py:
--------------------------------------------------------------------------------
 1 | from gptcache import cache
 2 | from gptcache.adapter import openai
 3 | 
 4 | cache.init()
 5 | cache.set_openai_key()
 6 | 
 7 | question = 'what is github'
 8 | answer = openai.ChatCompletion.create(
 9 |       model='gpt-3.5-turbo',
10 |       messages=[
11 |         {
12 |             'role': 'user',
13 |             'content': question
14 |         }
15 |       ],
16 |     )
17 | print(answer)
18 | 


--------------------------------------------------------------------------------
/examples/benchmark/similiar_qqp.json.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zilliztech/GPTCache/48f8e768d7dcd7f66d948ad07914a630a382b45b/examples/benchmark/similiar_qqp.json.gz


--------------------------------------------------------------------------------
/examples/benchmark/similiar_qqp_full.json.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zilliztech/GPTCache/48f8e768d7dcd7f66d948ad07914a630a382b45b/examples/benchmark/similiar_qqp_full.json.gz


--------------------------------------------------------------------------------
/examples/data_manager/map_manager.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | 
 3 | from gptcache.manager import get_data_manager
 4 | from gptcache.adapter import openai
 5 | from gptcache import cache
 6 | 
 7 | 
 8 | def run():
 9 |     dir_name, _ = os.path.split(os.path.abspath(__file__))
10 |     data_file = dir_name + '/data_map.txt'
11 |     data_manager = get_data_manager(data_path=data_file, max_size=10)
12 |     cache.init(data_manager=data_manager)
13 |     cache.set_openai_key()
14 | 
15 |     answer = openai.ChatCompletion.create(
16 |         model='gpt-3.5-turbo',
17 |         messages=[
18 |             {'role': 'user', 'content': 'what is chatgpt'}
19 |         ],
20 |     )
21 |     print(answer)
22 | 
23 | 
24 | if __name__ == '__main__':
25 |     run()
26 | 


--------------------------------------------------------------------------------
/examples/data_manager/scalar_store.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | 
 3 | import numpy as np
 4 | 
 5 | from gptcache import cache
 6 | from gptcache.adapter import openai
 7 | from gptcache.manager import get_data_manager, CacheBase, VectorBase
 8 | from gptcache.similarity_evaluation.distance import SearchDistanceEvaluation
 9 | 
10 | d = 8
11 | 
12 | # Change the embdding function to your own
13 | def mock_embeddings(data, **kwargs):
14 |     return np.random.random((d, )).astype('float32')
15 | 
16 | 
17 | def run():
18 |     scalar_stores = [
19 |         CacheBase('sqlite', sql_url='sqlite:///./sqlite.db'),
20 |         CacheBase('postgresql', sql_url='postgresql+psycopg2://postgres:123456@127.0.0.1:5432/postgres'),
21 |         CacheBase('mysql', sql_url='mysql+pymysql://root:123456@127.0.0.1:3306/mysql'),
22 |         CacheBase('mariadb', sql_url='mariadb+pymysql://root:123456@127.0.0.1:3307/mysql'),
23 |         CacheBase('sqlserver', sql_url='ssql+pyodbc://sa:Strongpsw_123@127.0.0.1:1434/msdb?driver=ODBC+Driver+17+for+SQL+Server'),
24 |         CacheBase('oracle', sql_url='oracle+cx_oracle://oracle:123456@127.0.0.1:1521/?service_name=helowin&encoding=UTF-8&nencoding=UTF-8'),
25 |         CacheBase('dynamo'),
26 |     ]
27 | 
28 |     for scalar_store in scalar_stores:
29 |         if os.path.exists('faiss.index'):
30 |             os.remove('faiss.index')
31 |         vector_base = VectorBase('faiss', dimension=d)
32 |         data_manager = get_data_manager(scalar_store, vector_base)
33 |         cache.init(embedding_func=mock_embeddings,
34 |                    data_manager=data_manager,
35 |                    similarity_evaluation=SearchDistanceEvaluation(),
36 |                    )
37 |         cache.set_openai_key()
38 | 
39 |         answer = openai.ChatCompletion.create(
40 |             model='gpt-3.5-turbo',
41 |             messages=[
42 |                 {'role': 'user', 'content': 'what is chatgpt'}
43 |             ],
44 |         )
45 |         print('answer:', answer)
46 | 
47 |         answer = openai.ChatCompletion.create(
48 |             model='gpt-3.5-turbo',
49 |             messages=[
50 |                 {'role': 'user', 'content': 'what is chatgpt'}
51 |             ],
52 |         )
53 |         print('answer cached:', answer)
54 | 
55 | 
56 | if __name__ == '__main__':
57 |     run()
58 | 


--------------------------------------------------------------------------------
/examples/data_manager/vector_store.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | from gptcache import cache
 4 | from gptcache.adapter import openai
 5 | from gptcache.manager import CacheBase, VectorBase, get_data_manager
 6 | from gptcache.similarity_evaluation.distance import SearchDistanceEvaluation
 7 | 
 8 | d = 8
 9 | 
10 | 
11 | def mock_embeddings(data, **kwargs):
12 |     return np.random.random((d, )).astype('float32')
13 | 
14 | 
15 | def run():
16 |     vector_stores = [
17 |         'faiss',
18 |         'milvus',
19 |         'chromadb',
20 |         'docarray',
21 |         'redis',
22 |         'weaviate',
23 |     ]
24 |     for vector_store in vector_stores:
25 |         cache_base = CacheBase('sqlite')
26 |         vector_base = VectorBase(vector_store, dimension=d)
27 |         data_manager = get_data_manager(cache_base, vector_base)
28 | 
29 |         cache.init(
30 |             embedding_func=mock_embeddings,
31 |             data_manager=data_manager,
32 |             similarity_evaluation=SearchDistanceEvaluation(),
33 |         )
34 |         cache.set_openai_key()
35 | 
36 |         answer = openai.ChatCompletion.create(
37 |             model='gpt-3.5-turbo',
38 |             messages=[{'role': 'user', 'content': 'what is chatgpt'}],
39 |         )
40 |         print(answer)
41 | 
42 | 
43 | if __name__ == '__main__':
44 |     run()
45 | 


--------------------------------------------------------------------------------
/examples/embedding/default.py:
--------------------------------------------------------------------------------
 1 | from gptcache.adapter import openai
 2 | from gptcache import cache
 3 | from gptcache.embedding.string import to_embeddings as string_embedding
 4 | 
 5 | 
 6 | def run():
 7 |     cache.init(embedding_func=string_embedding)
 8 |     cache.set_openai_key()
 9 | 
10 |     answer = openai.ChatCompletion.create(
11 |         model='gpt-3.5-turbo',
12 |         messages=[
13 |             {'role': 'user', 'content': 'what is chatgpt'}
14 |         ],
15 |     )
16 |     print(answer)
17 | 
18 | 
19 | if __name__ == '__main__':
20 |     run()
21 | 


--------------------------------------------------------------------------------
/examples/embedding/onnx.py:
--------------------------------------------------------------------------------
 1 | from gptcache.adapter import openai
 2 | from gptcache import cache
 3 | from gptcache.manager.factory import get_data_manager
 4 | from gptcache.manager import get_data_manager, CacheBase, VectorBase
 5 | from gptcache.similarity_evaluation.distance import SearchDistanceEvaluation
 6 | from gptcache.embedding import Onnx
 7 | 
 8 | 
 9 | def run():
10 |     onnx = Onnx()
11 | 
12 |     cache_base = CacheBase('sqlite')
13 |     vector_base = VectorBase('faiss', dimension=onnx.dimension)
14 |     data_manager = get_data_manager(cache_base, vector_base)
15 | 
16 |     cache.init(embedding_func=onnx.to_embeddings,
17 |                data_manager=data_manager,
18 |                similarity_evaluation=SearchDistanceEvaluation(),
19 |                )
20 |     cache.set_openai_key()
21 | 
22 |     answer = openai.ChatCompletion.create(
23 |         model='gpt-3.5-turbo',
24 |         messages=[
25 |             {'role': 'user', 'content': 'what is chatgpt'}
26 |         ],
27 |     )
28 |     print(answer)
29 | 
30 | 
31 | if __name__ == '__main__':
32 |     run()
33 | 


--------------------------------------------------------------------------------
/examples/embedding/paddlenlp.py:
--------------------------------------------------------------------------------
 1 | from gptcache.adapter import openai
 2 | from gptcache import cache
 3 | from gptcache.manager.factory import get_data_manager
 4 | from gptcache.manager import get_data_manager, CacheBase, VectorBase
 5 | from gptcache.similarity_evaluation.distance import SearchDistanceEvaluation
 6 | from gptcache.embedding import PaddleNLP
 7 | 
 8 | 
 9 | def run():
10 |     paddlenlp = PaddleNLP()
11 | 
12 |     cache_base = CacheBase('sqlite')
13 |     vector_base = VectorBase('faiss', dimension=paddlenlp.dimension)
14 |     data_manager = get_data_manager(cache_base, vector_base)
15 | 
16 |     cache.init(embedding_func=paddlenlp.to_embeddings,
17 |                data_manager=data_manager,
18 |                similarity_evaluation=SearchDistanceEvaluation(),
19 |                )
20 |     cache.set_openai_key()
21 | 
22 |     answer = openai.ChatCompletion.create(
23 |         model='gpt-3.5-turbo',
24 |         messages=[
25 |             {'role': 'user', 'content': 'what is chatgpt'}
26 |         ],
27 |     )
28 |     print(answer)
29 | 
30 | 
31 | if __name__ == '__main__':
32 |     run()
33 | 


--------------------------------------------------------------------------------
/examples/embedding/random.py:
--------------------------------------------------------------------------------
 1 | from gptcache.adapter import openai
 2 | from gptcache import cache
 3 | from gptcache.manager import get_data_manager, CacheBase, VectorBase
 4 | from gptcache.similarity_evaluation.distance import SearchDistanceEvaluation
 5 | import numpy as np
 6 | 
 7 | 
 8 | d = 8
 9 | 
10 | 
11 | def mock_embeddings(data, **kwargs):
12 |     return np.random.random((d, )).astype('float32')
13 | 
14 | 
15 | def run():
16 |     cache_base = CacheBase('sqlite')
17 |     vector_base = VectorBase('faiss', dimension=d)
18 |     data_manager = get_data_manager(cache_base, vector_base)
19 |     cache.init(embedding_func=mock_embeddings,
20 |                data_manager=data_manager,
21 |                similarity_evaluation=SearchDistanceEvaluation(),
22 |                )
23 |     cache.set_openai_key()
24 | 
25 |     answer = openai.ChatCompletion.create(
26 |         model='gpt-3.5-turbo',
27 |         messages=[
28 |             {'role': 'user', 'content': 'what is chatgpt'}
29 |         ],
30 |     )
31 |     print(answer)
32 | 
33 | 
34 | if __name__ == '__main__':
35 |     run()
36 | 


--------------------------------------------------------------------------------
/examples/integrate/diffusers/stable_diffusion.py:
--------------------------------------------------------------------------------
 1 | import time
 2 | 
 3 | import torch
 4 | from PIL import ImageChops
 5 | from diffusers import DPMSolverMultistepScheduler
 6 | 
 7 | from gptcache.adapter.diffusers import StableDiffusionPipeline
 8 | from gptcache.processor.pre import get_prompt
 9 | from gptcache import cache
10 | 
11 | from gptcache.embedding import Onnx
12 | from gptcache.similarity_evaluation.distance import SearchDistanceEvaluation
13 | from gptcache.manager import get_data_manager, CacheBase, VectorBase, ObjectBase
14 | 
15 | 
16 | # onnx = Onnx()
17 | # cache_base = CacheBase('sqlite')
18 | # vector_base = VectorBase('milvus', host='localhost', port='19530', collection_name='gptcache_image', dimension=onnx.dimension)
19 | # object_base = ObjectBase('local', path='./images')
20 | # data_manager = get_data_manager(cache_base, vector_base, object_base)
21 | 
22 | cache.init(
23 |     pre_embedding_func=get_prompt,
24 |     # embedding_func=onnx.to_embeddings,
25 |     # data_manager=data_manager,
26 |     # similarity_evaluation=SearchDistanceEvaluation(),
27 |     )
28 | 
29 | 
30 | model_id = "stabilityai/stable-diffusion-2-1"
31 | 
32 | # Use the DPMSolverMultistepScheduler (DPM-Solver++) scheduler here instead
33 | pipe = StableDiffusionPipeline.from_pretrained(model_id, torch_dtype=torch.float16)
34 | pipe.scheduler = DPMSolverMultistepScheduler.from_config(pipe.scheduler.config)
35 | pipe = pipe.to("cuda")
36 | 
37 | prompt = "a photo of an astronaut riding a horse on mars"
38 | start = time.time()
39 | image1 = pipe(prompt=prompt).images[0]
40 | print("First time generation:", time.time() - start)
41 | 
42 | start = time.time()
43 | image2 = pipe(prompt=prompt).images[0]
44 | print("Second time generation:", time.time() - start)
45 | 
46 | # Compare generated images
47 | diff = ImageChops.difference(image1, image2)
48 | assert not diff.getbbox(), "Got different images."


--------------------------------------------------------------------------------
/examples/integrate/dolly/basic_usage.py:
--------------------------------------------------------------------------------
 1 | import time
 2 | import torch
 3 | from transformers import pipeline
 4 | from gptcache.processor.pre import get_inputs
 5 | from gptcache.manager import manager_factory
 6 | from gptcache import Cache
 7 | from gptcache.embedding import Onnx
 8 | from gptcache.adapter.dolly import Dolly
 9 | 
10 | 
11 | def dolly_base_usage():
12 |     onnx = Onnx()
13 |     m = manager_factory("sqlite,faiss,local", data_dir="./dolly", vector_params={"dimension": onnx.dimension})
14 |     llm_cache = Cache()
15 |     llm_cache.init(
16 |         pre_embedding_func=get_inputs,
17 |         data_manager=m,
18 |         embedding_func=onnx.to_embeddings
19 |     )    
20 | 
21 |     llm = Dolly.from_model(model="databricks/dolly-v2-3b", torch_dtype=torch.bfloat16, trust_remote_code=True, device=0)
22 | 
23 |     context = """George Washington (February 22, 1732[b] – December 14, 1799) was an American military officer, statesman,
24 | and Founding Father who served as the first president of the United States from 1789 to 1797."""
25 |     
26 |     for _ in range(2):
27 |         start_time = time.time()
28 |         answer = llm(context, cache_obj=llm_cache)
29 |         print("Time consuming: {:.2f}s".format(time.time() - start_time))
30 |         print(f"Received: {answer[0]['generated_text']}")
31 |         print(f"Hit cache: {answer[0].get('gptcache', False)}")
32 | 
33 | 
34 | def dolly_from_hugggingface():
35 |     onnx = Onnx()
36 |     m = manager_factory("sqlite,faiss,local", data_dir="./dolly_hg", vector_params={"dimension": onnx.dimension})
37 |     llm_cache = Cache()
38 |     llm_cache.init(
39 |         pre_embedding_func=get_inputs,
40 |         data_manager=m,
41 |         embedding_func=onnx.to_embeddings
42 |     )    
43 | 
44 |     pipe = pipeline(model="databricks/dolly-v2-3b", torch_dtype=torch.bfloat16,
45 |                     trust_remote_code=True, device=0, return_full_text=True)
46 |     llm = Dolly(pipe)
47 | 
48 |     context = """George Washington (February 22, 1732[b] – December 14, 1799) was an American military officer, statesman,
49 | and Founding Father who served as the first president of the United States from 1789 to 1797."""
50 |     
51 |     for _ in range(2):
52 |         start_time = time.time()
53 |         answer = llm(context, cache_obj=llm_cache)
54 |         print("Time consuming: {:.2f}s".format(time.time() - start_time))
55 |         print(f"Received: {answer[0]['generated_text']}")
56 |         print(f"Hit cache: {answer[0].get('gptcache', False)}")            
57 | 
58 | 
59 | if __name__ == '__main__':
60 |     dolly_base_usage()
61 |     dolly_from_hugggingface()
62 | 


--------------------------------------------------------------------------------
/examples/integrate/langchain/langchain_llms_mock.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | 
 3 | from langchain import Cohere
 4 | from langchain.llms import OpenAI
 5 | 
 6 | from gptcache.adapter.langchain_models import LangChainLLMs
 7 | from gptcache import cache, Cache
 8 | from gptcache.processor.pre import get_prompt
 9 | 
10 | OpenAI.api_key = os.getenv("OPENAI_API_KEY")
11 | Cohere.cohere_api_key = os.getenv("COHERE_API_KEY")
12 | 
13 | 
14 | def run():
15 |     data_file = "data_map.txt"
16 |     has_data = os.path.isfile(data_file)
17 |     llm_cache = Cache()
18 |     llm_cache.init(
19 |         pre_embedding_func=get_prompt,
20 |     )
21 | 
22 |     if not has_data:
23 |         for i in range(10):
24 |             question = f"foo{i}"
25 |             answer = f"receiver the foo {i}"
26 |             cache.data_manager.save(question, answer, cache.embedding_func(question))
27 | 
28 |     question = "foo0"
29 | 
30 |     langchain_openai = OpenAI(model_name="text-ada-001")
31 |     llm = LangChainLLMs(llm=langchain_openai)
32 |     answer = llm(prompt=question, cache_obj=llm_cache)
33 |     print(answer)
34 |     answer = llm(prompt=question, cache_obj=llm_cache)
35 |     print(answer)
36 | 
37 |     # TODO install cohere auto
38 |     langchain_cohere = Cohere()
39 |     llm = LangChainLLMs(llm=langchain_cohere)
40 |     answer = llm(prompt=question, cache_obj=llm_cache)
41 |     print(answer)
42 | 
43 | 
44 | if __name__ == '__main__':
45 |     run()
46 | 


--------------------------------------------------------------------------------
/examples/integrate/langchain/langchain_prompt_openai.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | 
 3 | import openai
 4 | import time
 5 | from langchain.llms import OpenAI
 6 | from langchain import PromptTemplate, LLMChain
 7 | 
 8 | from gptcache.adapter.langchain_models import LangChainLLMs
 9 | from gptcache import Cache
10 | from gptcache.processor.pre import get_prompt
11 | 
12 | openai.api_key = os.getenv("OPENAI_API_KEY")
13 | 
14 | template = """Question: {question}
15 | 
16 | Answer: Let's think step by step."""
17 | 
18 | prompt = PromptTemplate(template=template, input_variables=["question"])
19 | 
20 | llm = OpenAI()
21 | 
22 | question = "What NFL team won the Super Bowl in the year Justin Bieber was born?"
23 | 
24 | llm_cache = Cache()
25 | llm_cache.init(
26 |     pre_embedding_func=get_prompt,
27 | )
28 | 
29 | before = time.time()
30 | cached_llm = LangChainLLMs(llm=llm)
31 | answer = cached_llm(prompt=question, cache_obj=llm_cache)
32 | print(answer)
33 | print("Read through Time Spent =", time.time() - before)
34 | 
35 | before = time.time()
36 | answer = cached_llm(prompt=question, cache_obj=llm_cache)
37 | print(answer)
38 | print("Cache Hit Time Spent =", time.time() - before)
39 | 


--------------------------------------------------------------------------------
/examples/integrate/langchain/langchain_qa_chain.py:
--------------------------------------------------------------------------------
 1 | import time
 2 | 
 3 | from langchain import OpenAI
 4 | from langchain.chains.question_answering import load_qa_chain
 5 | from langchain.schema import Document
 6 | 
 7 | from gptcache import cache
 8 | from gptcache.adapter.api import init_similar_cache
 9 | from gptcache.adapter.langchain_models import LangChainLLMs
10 | 
11 | 
12 | def get_content_func(data, **_):
13 |     return data.get("prompt").split("Question:")[-1]
14 | 
15 | 
16 | init_similar_cache(pre_func=get_content_func)
17 | cache.set_openai_key()
18 | 
19 | mkt_qa = load_qa_chain(llm=LangChainLLMs(llm=OpenAI(temperature=0)), chain_type="stuff")
20 | 
21 | msg = "What is Traditional marketing?"
22 | 
23 | 
24 | before = time.time()
25 | answer = mkt_qa.run(question=msg, input_documents=[Document(page_content="marketing is hello world")])
26 | print(answer)
27 | print("Time Spent:", time.time() - before)
28 | 
29 | before = time.time()
30 | answer = mkt_qa.run(question=msg, input_documents=[Document(page_content="marketing is hello world")])
31 | print(answer)
32 | print("Time Spent:", time.time() - before)
33 | 


--------------------------------------------------------------------------------
/examples/integrate/langchain/langchain_similaritycache_openai.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import time
 3 | 
 4 | import openai
 5 | from langchain import PromptTemplate
 6 | from langchain.llms import OpenAI
 7 | 
 8 | from gptcache import Cache
 9 | from gptcache.adapter.api import init_similar_cache
10 | from gptcache.adapter.langchain_models import LangChainLLMs
11 | from gptcache.processor.pre import get_prompt
12 | 
13 | openai.api_key = os.getenv("OPENAI_API_KEY")
14 | 
15 | template = """Question: {question}
16 | 
17 | Answer: Let's think step by step."""
18 | 
19 | prompt = PromptTemplate(template=template, input_variables=["question"])
20 | 
21 | llm = OpenAI()
22 | 
23 | question = "What NFL team won the Super Bowl in the year Justin Bieber was born?"
24 | 
25 | llm_cache = Cache()
26 | init_similar_cache(pre_func=get_prompt, cache_obj=llm_cache)
27 | 
28 | 
29 | before = time.time()
30 | cached_llm = LangChainLLMs(llm=llm)
31 | answer = cached_llm(prompt=question, cache_obj=llm_cache)
32 | print(answer)
33 | print("Read through Time Spent =", time.time() - before)
34 | 
35 | before = time.time()
36 | question = "What is the winner Super Bowl in the year Justin Bieber was born?"
37 | answer = cached_llm(prompt=question, cache_obj=llm_cache)
38 | print(answer)
39 | print("Cache Hit Time Spent =", time.time() - before)
40 | 


--------------------------------------------------------------------------------
/examples/integrate/llama_cpp/basic_usage.py:
--------------------------------------------------------------------------------
 1 | import time
 2 | 
 3 | from gptcache.adapter.llama_cpp import Llama
 4 | from gptcache.manager import manager_factory
 5 | from gptcache import Cache
 6 | from gptcache.embedding import Onnx
 7 | from gptcache.processor.pre import get_prompt
 8 | 
 9 | 
10 | def llama_cpp_base_usage():
11 |     onnx = Onnx()
12 |     m = manager_factory("sqlite,faiss,local", data_dir="./llamacpp_basic", vector_params={"dimension": onnx.dimension})
13 |     llm_cache = Cache()
14 |     llm_cache.init(
15 |         pre_embedding_func=get_prompt,
16 |         data_manager=m,
17 |         embedding_func=onnx.to_embeddings
18 |     )
19 |     llm = Llama("./ggml-model-q4_0.bin")
20 |     for _ in range(2):
21 |         start_time = time.time()
22 |         answer = llm(prompt="Q: Name the planets in the solar system? A: ", stop=["Q:", "\n"], cache_obj=llm_cache)
23 |         print("Time consuming: {:.2f}s".format(time.time() - start_time))
24 |         print(f"Received: {answer['choices'][0]['text']}")
25 |         print(f"Hit cache: {answer.get('gptcache', False)}")
26 | 
27 | 
28 | def llama_cpp_stream_usage():
29 |     onnx = Onnx()
30 |     m = manager_factory("sqlite,faiss,local", data_dir="./llamacpp_stream", vector_params={"dimension": onnx.dimension})
31 |     llm_cache = Cache()
32 |     llm_cache.init(
33 |         pre_embedding_func=get_prompt,
34 |         data_manager=m,
35 |         embedding_func=onnx.to_embeddings
36 |     )
37 |     llm = Llama("./ggml-model-q4_0.bin")
38 |     for _ in range(2):
39 |         start_time = time.time()
40 |         ret = llm(prompt="Q: Name the planets in the solar system? A: ", stop=["Q:", "\n"], stream=True, cache_obj=llm_cache)
41 |         answer = ''
42 |         for chunk in ret:
43 |             answer += chunk['choices'][0]['text']
44 |         print("Time consuming: {:.2f}s".format(time.time() - start_time))
45 |         print(f"Received: {answer}")
46 | 
47 | 
48 | if __name__ == "__main__":
49 |     llama_cpp_base_usage()
50 |     llama_cpp_stream_usage()
51 |     
52 | 


--------------------------------------------------------------------------------
/examples/integrate/replicate/vqa.py:
--------------------------------------------------------------------------------
 1 | import time
 2 | 
 3 | from gptcache import cache
 4 | from gptcache.adapter import replicate
 5 | from gptcache.embedding import Timm, Onnx
 6 | from gptcache.manager import get_data_manager, CacheBase, VectorBase, ObjectBase
 7 | from gptcache.processor.pre import get_input_image_file_name
 8 | from gptcache.similarity_evaluation.np import NumpyNormEvaluation
 9 | 
10 | timm = Timm('resnet18')
11 | onnx = Onnx()
12 | cache_base = CacheBase('sqlite')
13 | vector_base = VectorBase('faiss', dimension=timm.dimension)
14 | object_base = ObjectBase('local', path='./objects')
15 | data_manager = get_data_manager(cache_base, vector_base, object_base)
16 | 
17 | cache.init(
18 |     pre_embedding_func=get_input_image_file_name,
19 |     data_manager=data_manager,
20 |     embedding_func=timm.to_embeddings,
21 |     similarity_evaluation=NumpyNormEvaluation(enable_normal=True, question_embedding_function=onnx.to_embeddings)
22 |     )
23 |     
24 | 
25 | image_path = '../../../docs/GPTCache.png'
26 | 
27 | 
28 | # run replicate clinet with gptcache
29 | start = time.time()
30 | question1 = "what is in the image?"
31 | question2 = "What can you see in the image?"
32 | 
33 | output = replicate.run(
34 |     "andreasjansson/blip-2:4b32258c42e9efd4288bb9910bc532a69727f9acd26aa08e175713a0a857a608",
35 |     input={
36 |         "image": open(image_path, 'rb'),
37 |         "question": question1}
38 |     )
39 | end = time.time()
40 | print('Answer:', output)
41 | print('Time elapsed 1:', end - start)
42 | 
43 | start = time.time()
44 | output = replicate.run(
45 |     "andreasjansson/blip-2:4b32258c42e9efd4288bb9910bc532a69727f9acd26aa08e175713a0a857a608",
46 |     input={
47 |         "image": open(image_path, 'rb'),
48 |         "question": question2}
49 |     )
50 | end = time.time()
51 | print('Answer:', output)
52 | print('Time elapsed 2:', end - start)


--------------------------------------------------------------------------------
/examples/integrate/stability/text_to_image.py:
--------------------------------------------------------------------------------
 1 | import io
 2 | import os
 3 | import time
 4 | 
 5 | from PIL import Image
 6 | 
 7 | from gptcache import cache
 8 | from gptcache.adapter.stability_sdk import StabilityInference, generation
 9 | from gptcache.embedding import Onnx
10 | from gptcache.manager.factory import manager_factory
11 | from gptcache.processor.pre import get_prompt
12 | from gptcache.similarity_evaluation.distance import SearchDistanceEvaluation
13 | 
14 | # init gptcache
15 | onnx = Onnx()
16 | data_manager = manager_factory('sqlite,faiss,local',
17 |                                data_dir='/',
18 |                                vector_params={'dimension': onnx.dimension},
19 |                                object_params={'path': './images'}
20 |                                )
21 | cache.init(
22 |     pre_embedding_func=get_prompt,
23 |     embedding_func=onnx.to_embeddings,
24 |     data_manager=data_manager,
25 |     similarity_evaluation=SearchDistanceEvaluation()
26 |     )
27 | 
28 | # run with gptcache
29 | api_key = os.getenv('STABILITY_KEY', 'key-goes-here')
30 | 
31 | stability_api = StabilityInference(
32 |     key=os.environ['STABILITY_KEY'], # API Key reference.
33 |     verbose=False, # Print debug messages.
34 |     engine='stable-diffusion-xl-beta-v2-2-2', # Set the engine to use for generation.
35 | )
36 | 
37 | start = time.time()
38 | answers = stability_api.generate(
39 |     prompt='a cat sitting besides a dog',
40 |     width=256,
41 |     height=256
42 |     )
43 | 
44 | for resp in answers:
45 |     for artifact in resp.artifacts:
46 |         if artifact.type == generation.ARTIFACT_IMAGE:
47 |             img = Image.open(io.BytesIO(artifact.binary))
48 |             assert img.size == (256, 256)
49 | print('Time elapsed 1:', time.time() - start)
50 | 
51 | start = time.time()
52 | answers = stability_api.generate(
53 |     prompt='a dog and a dog sitting together',
54 |     width=512,
55 |     height=512
56 |     )
57 | 
58 | for resp in answers:
59 |     for artifact in resp.artifacts:
60 |         if artifact.type == generation.ARTIFACT_IMAGE:
61 |             img = Image.open(io.BytesIO(artifact.binary))
62 |             assert img.size == (512, 512)
63 | print('Time elapsed 2:', time.time() - start)


--------------------------------------------------------------------------------
/examples/processor/temperature_example.py:
--------------------------------------------------------------------------------
 1 | import time
 2 | 
 3 | from gptcache import cache
 4 | from gptcache.adapter import openai
 5 | from gptcache.embedding import Onnx
 6 | from gptcache.manager import manager_factory
 7 | from gptcache.processor.post import temperature_softmax
 8 | from gptcache.similarity_evaluation.distance import SearchDistanceEvaluation
 9 | 
10 | cache.set_openai_key()
11 | 
12 | # Init cache with vector store
13 | # if os.path.exists("faiss.index"):
14 | #     os.remove("faiss.index")
15 | # if os.path.exists("sqlite.db"):
16 | #     os.remove("sqlite.db")
17 | 
18 | onnx = Onnx()
19 | data_manager = manager_factory("sqlite,faiss", vector_params={"dimension": onnx.dimension})
20 | 
21 | cache.init(
22 |     embedding_func=onnx.to_embeddings,
23 |     data_manager=data_manager,
24 |     similarity_evaluation=SearchDistanceEvaluation(),
25 |     post_process_messages_func=temperature_softmax
26 |     )
27 | # cache.config = Config(similarity_threshold=0.2)
28 | 
29 | question = 'what is github'
30 | 
31 | for _ in range(3):
32 |     start = time.time()
33 |     response = openai.ChatCompletion.create(
34 |         model='gpt-3.5-turbo',
35 |         temperature = 1.0,  # Change temperature here
36 |         messages=[{
37 |             'role': 'user',
38 |             'content': question
39 |         }],
40 |     )
41 |     print(round(time.time() - start, 3))
42 |     print(response["choices"][0]["message"]["content"])


--------------------------------------------------------------------------------
/examples/session/session.py:
--------------------------------------------------------------------------------
 1 | from gptcache import cache
 2 | from gptcache.session import Session
 3 | from gptcache.adapter import openai
 4 | 
 5 | # init gptcache
 6 | cache.init()
 7 | cache.set_openai_key()
 8 | 
 9 | 
10 | def run_session():
11 |     session = Session()
12 |     response = openai.ChatCompletion.create(
13 |                   model="gpt-3.5-turbo",
14 |                   messages=[
15 |                     {
16 |                         "role": "user",
17 |                         "content": "what's github?"
18 |                     }],
19 |                   session=session
20 |                 )
21 |     response_content = response["choices"][0]["message"]["content"]
22 |     print(response_content)
23 | 
24 | 
25 | def run_custom_session():
26 |     def my_check_hit(cur_session_id, cache_session_ids, cache_questions, cache_answer):
27 |         print(cur_session_id, cache_session_ids, cache_questions, cache_answer)
28 |         if "GitHub" in cache_answer:
29 |             return True
30 |         return False
31 |     session = Session(name="my-session", check_hit_func=my_check_hit)
32 |     response = openai.ChatCompletion.create(
33 |         model="gpt-3.5-turbo",
34 |         messages=[
35 |             {
36 |                 "role": "user",
37 |                 "content": "what's github?"
38 |             }],
39 |         session=session
40 |     )
41 |     response_content = response["choices"][0]["message"]["content"]
42 |     print(response_content)
43 | 


--------------------------------------------------------------------------------
/examples/similarity_evaluation/exact_match.py:
--------------------------------------------------------------------------------
 1 | from gptcache.adapter import openai
 2 | from gptcache import cache
 3 | from gptcache.similarity_evaluation.exact_match import ExactMatchEvaluation
 4 | 
 5 | 
 6 | def run():
 7 |     cache.init(similarity_evaluation=ExactMatchEvaluation())
 8 |     cache.set_openai_key()
 9 | 
10 |     answer = openai.ChatCompletion.create(
11 |         model='gpt-3.5-turbo',
12 |         messages=[
13 |             {'role': 'user', 'content': 'what is chatgpt'}
14 |         ],
15 |     )
16 |     print(answer)
17 | 
18 | 
19 | if __name__ == '__main__':
20 |     run()
21 | 


--------------------------------------------------------------------------------
/examples/similarity_evaluation/onnx.py:
--------------------------------------------------------------------------------
 1 | from gptcache.adapter import openai
 2 | from gptcache import cache
 3 | from gptcache.manager import get_data_manager, CacheBase, VectorBase
 4 | from gptcache.embedding import Onnx as EmbeddingOnnx
 5 | from gptcache.similarity_evaluation import OnnxModelEvaluation
 6 | 
 7 | 
 8 | def run():
 9 |     onnx = EmbeddingOnnx()
10 |     evaluation_onnx = OnnxModelEvaluation()
11 | 
12 |     vector_base = VectorBase('faiss', dimension=onnx.dimension)
13 |     data_manager = get_data_manager('sqlite', vector_base)
14 | 
15 |     cache.init(embedding_func=onnx.to_embeddings,
16 |                data_manager=data_manager,
17 |                similarity_evaluation=evaluation_onnx,
18 |                )
19 |     cache.set_openai_key()
20 | 
21 |     answer = openai.ChatCompletion.create(
22 |         model='gpt-3.5-turbo',
23 |         messages=[
24 |             {'role': 'user', 'content': 'what is chatgpt'}
25 |         ],
26 |     )
27 |     print(answer)
28 | 
29 | 
30 | if __name__ == '__main__':
31 |     run()
32 | 


--------------------------------------------------------------------------------
/examples/similarity_evaluation/search_distance.py:
--------------------------------------------------------------------------------
 1 | from gptcache.adapter import openai
 2 | from gptcache import cache
 3 | from gptcache.manager import get_data_manager, VectorBase
 4 | from gptcache.similarity_evaluation.distance import SearchDistanceEvaluation
 5 | from gptcache.embedding import Onnx
 6 | 
 7 | 
 8 | def run():
 9 |     onnx = Onnx()
10 | 
11 |     vector_base = VectorBase('faiss', dimension=onnx.dimension)
12 |     data_manager = get_data_manager('sqlite', vector_base)
13 | 
14 |     cache.init(embedding_func=onnx.to_embeddings,
15 |                data_manager=data_manager,
16 |                similarity_evaluation=SearchDistanceEvaluation(),
17 |                )
18 |     cache.set_openai_key()
19 | 
20 |     answer = openai.ChatCompletion.create(
21 |         model='gpt-3.5-turbo',
22 |         messages=[
23 |             {'role': 'user', 'content': 'what is chatgpt'}
24 |         ],
25 |     )
26 |     print(answer)
27 | 
28 | 
29 | if __name__ == '__main__':
30 |     run()
31 | 


--------------------------------------------------------------------------------
/examples/similarity_evaluation/sequence_match.py:
--------------------------------------------------------------------------------
 1 | from gptcache.adapter import openai
 2 | from gptcache import cache
 3 | from gptcache.manager import get_data_manager, VectorBase
 4 | from gptcache.similarity_evaluation import SequenceMatchEvaluation
 5 | from gptcache.processor.pre import concat_all_queries
 6 | from gptcache.embedding import Onnx
 7 | from gptcache import Config
 8 | 
 9 | 
10 | def run():
11 |     onnx = Onnx()
12 | 
13 |     vector_base = VectorBase('faiss', dimension=onnx.dimension)
14 |     data_manager = get_data_manager('sqlite', vector_base)
15 | 
16 |     cache.init(embedding_func=onnx.to_embeddings,
17 |                pre_embedding_func=concat_all_queries,
18 |                data_manager=data_manager,
19 |                similarity_evaluation=SequenceMatchEvaluation([0.1, 0.2, 0.7], 'onnx'),
20 |                config=Config(context_len=3, skip_list=['system', 'assistant'])
21 |                )
22 |     cache.set_openai_key()
23 | 
24 |     answer = openai.ChatCompletion.create(
25 |         model='gpt-3.5-turbo',
26 |         messages=[
27 |             {'role': 'system', 'content': 'you are a helpful chatbot.'},
28 |             {'role': 'user', 'content': 'query1'},
29 |             {'role': 'assistant', 'content': 'answer1'},
30 |             {'role': 'user', 'content': 'query2'},
31 |             {'role': 'assistant', 'content': 'answer2'},
32 |             {'role': 'user', 'content': 'query3'},
33 |             {'role': 'assistant', 'content': 'answer3'}
34 |         ]
35 |     )
36 |     print(answer)
37 | 
38 | 
39 | if __name__ == '__main__':
40 |     run()
41 | 


--------------------------------------------------------------------------------
/gptcache/__init__.py:
--------------------------------------------------------------------------------
1 | """gptcache version"""
2 | __version__ = "0.1.44"
3 | 
4 | from gptcache.config import Config
5 | from gptcache.core import Cache
6 | from gptcache.core import cache
7 | 


--------------------------------------------------------------------------------
/gptcache/adapter/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zilliztech/GPTCache/48f8e768d7dcd7f66d948ad07914a630a382b45b/gptcache/adapter/__init__.py


--------------------------------------------------------------------------------
/gptcache/adapter/base.py:
--------------------------------------------------------------------------------
 1 | from abc import ABCMeta
 2 | from typing import Any, Dict, Callable, Optional
 3 | 
 4 | 
 5 | class BaseCacheLLM(metaclass=ABCMeta):
 6 |     """Base LLM, When you have enhanced llm without using the original llm api,
 7 |     you can use this class as a proxy to use the ability of the cache.
 8 | 
 9 |     NOTE: Please make sure that the custom llm returns the same value as the original llm.
10 | 
11 |     For example, if you use the openai proxy, you perform delay statistics before sending the openai request,
12 |     and then you package this part of the function, so you may have a separate package, which is different from openai.
13 |     If the api request parameters and return results you wrap are the same as the original ones,
14 |     then you can use this class to obtain cache-related capabilities.
15 | 
16 |     Example:
17 |         .. code-block:: python
18 | 
19 |             import time
20 | 
21 |             import openai
22 | 
23 |             from gptcache import Cache
24 |             from gptcache.adapter import openai as cache_openai
25 | 
26 | 
27 |             def proxy_openai_chat_complete(*args, **kwargs):
28 |                 start_time = time.time()
29 |                 res = openai.ChatCompletion.create(*args, **kwargs)
30 |                 print("Consume Time Spent =", round((time.time() - start_time), 2))
31 |                 return res
32 | 
33 | 
34 |             llm_cache = Cache()
35 | 
36 |             cache_openai.ChatCompletion.llm = proxy_openai_chat_complete
37 |             cache_openai.ChatCompletion.cache_args = {"cache_obj": llm_cache}
38 | 
39 |             cache_openai.ChatCompletion.create(
40 |                 model="gpt-3.5-turbo",
41 |                 messages=[
42 |                     {
43 |                         "role": "user",
44 |                         "content": "What's GitHub?",
45 |                     }
46 |                 ],
47 |             )
48 |     """
49 | 
50 |     llm: Optional[Callable] = None
51 |     """
52 |     On a cache miss, if that variable is set, it will be called;
53 |     if not, it will call the original llm.
54 |     """
55 | 
56 |     cache_args: Dict[str, Any] = {}
57 |     """
58 |     It can be used to set some cache-related public parameters.
59 |     If you don't want to set the same parameters every time when using cache, say cache_obj, you can use it.
60 |     """
61 | 
62 |     @classmethod
63 |     def fill_base_args(cls, **kwargs):
64 |         """ Fill the base args to the cache args
65 |         """
66 |         for key, value in cls.cache_args.items():
67 |             if key not in kwargs:
68 |                 kwargs[key] = value
69 | 
70 |         return kwargs
71 | 


--------------------------------------------------------------------------------
/gptcache/adapter/dolly.py:
--------------------------------------------------------------------------------
 1 | from typing import Any
 2 | 
 3 | from gptcache.adapter.adapter import adapt
 4 | from gptcache.manager.scalar_data.base import Answer, DataType
 5 | from gptcache.utils import import_huggingface, import_torch
 6 | 
 7 | import_torch()
 8 | import_huggingface()
 9 | 
10 | from transformers import pipeline  # pylint: disable=wrong-import-position
11 | 
12 | 
13 | class Dolly:
14 |     """Wrapper for Dolly (https://github.com/databrickslabs/dolly.git).
15 | 
16 |     Example using from_model:
17 |         .. code-block:: python
18 | 
19 |             from gptcache import cache
20 |             from gptcache.processor.pre import get_inputs
21 |             cache.init(pre_embedding_func=get_inputs)
22 | 
23 |             from gptcache.adapter.dolly import Dolly
24 |             dolly = Dolly.from_model(
25 |                 model="databricks/dolly-v2-12b", torch_dtype=torch.bfloat16, trust_remote_code=True, device=0
26 |             )
27 | 
28 |     Example passing pipeline in directly:
29 |         .. code-block:: python
30 | 
31 |             import torch
32 |             from transformers import pipeline
33 |             from gptcache import cache
34 |             from gptcache.processor.pre import get_inputs
35 |             cache.init(pre_embedding_func=get_inputs)
36 |             from gptcache.adapter.dolly import Dolly
37 | 
38 |             pipe = pipeline(
39 |                 model="databricks/dolly-v2-12b", torch_dtype=torch.bfloat16, trust_remote_code=True, device=0
40 |             )
41 |             dolly = Dolly(pipe)
42 |     """
43 | 
44 |     def __init__(self, dolly_pipeline: Any):
45 |         self._dolly_pipeline = dolly_pipeline
46 | 
47 |     @classmethod
48 |     def from_model(cls, model: str, **kwargs):
49 |         pipe = pipeline(model=model, **kwargs)
50 |         return cls(pipe)
51 | 
52 |     def __call__(self, prompt: str, **kwargs):
53 |         return adapt(
54 |             self._dolly_pipeline,
55 |             _cache_data_convert,
56 |             _update_cache_callback,
57 |             inputs=prompt,
58 |             **kwargs
59 |         )
60 | 
61 | 
62 | def _cache_data_convert(cache_data):
63 |     return [{"generated_text": cache_data, "gptcache": True}]
64 | 
65 | 
66 | def _update_cache_callback(llm_data, update_cache_func, *args, **kwargs):  # pylint: disable=unused-argument
67 |     update_cache_func(Answer(llm_data[0]["generated_text"], DataType.STR))
68 |     return llm_data
69 | 


--------------------------------------------------------------------------------
/gptcache/client.py:
--------------------------------------------------------------------------------
 1 | import asyncio
 2 | import json
 3 | 
 4 | from gptcache.utils import import_httpx
 5 | 
 6 | import_httpx()
 7 | 
 8 | import httpx  # pylint: disable=C0413
 9 | 
10 | 
11 | _CLIENT_HEADER = {"Content-Type": "application/json", "Accept": "application/json"}
12 | 
13 | 
14 | class Client:
15 |     """GPTCache client to send requests to GPTCache server.
16 | 
17 |     :param uri: the uri leads to the server, defaults to "http://localhost:8000".
18 |     :type uri: str
19 | 
20 |     Example:
21 |         .. code-block:: python
22 | 
23 |             from gptcache import client
24 | 
25 |             client = Client(uri="http://localhost:8000")
26 |             client.put("Hi", "Hi back")
27 |             ans = client.get("Hi")
28 |     """
29 | 
30 |     def __init__(self, uri: str = "http://localhost:8000"):
31 |         self._uri = uri
32 | 
33 |     async def _put(self, question: str, answer: str):
34 |         async with httpx.AsyncClient() as client:
35 |             data = {
36 |                 "prompt": question,
37 |                 "answer": answer,
38 |             }
39 | 
40 |             response = await client.post(
41 |                 f"{self._uri}/put", headers=_CLIENT_HEADER, data=json.dumps(data)
42 |             )
43 | 
44 |         return response.status_code
45 | 
46 |     async def _get(self, question: str):
47 |         async with httpx.AsyncClient() as client:
48 |             data = {
49 |                 "prompt": question,
50 |             }
51 | 
52 |             response = await client.post(
53 |                 f"{self._uri}/get", headers=_CLIENT_HEADER, data=json.dumps(data)
54 |             )
55 | 
56 |         return response.json().get("answer")
57 | 
58 |     def put(self, question: str, answer: str):
59 |         """
60 |         :param question: the question to be put.
61 |         :type question: str
62 |         :param answer: the answer to the question to be put.
63 |         :type answer: str
64 |         :return: status code.
65 |         """
66 |         return asyncio.run(self._put(question, answer))
67 | 
68 |     def get(self, question: str):
69 |         """
70 |         :param question: the question to get an answer.
71 |         :type question: str
72 |         :return: answer to the question.
73 |         """
74 |         return asyncio.run(self._get(question))
75 | 


--------------------------------------------------------------------------------
/gptcache/embedding/__init__.py:
--------------------------------------------------------------------------------
 1 | __all__ = [
 2 |     "OpenAI",
 3 |     "Huggingface",
 4 |     "SBERT",
 5 |     "Cohere",
 6 |     "Onnx",
 7 |     "FastText",
 8 |     "Data2VecAudio",
 9 |     "Timm",
10 |     "ViT",
11 |     "LangChain",
12 |     "Rwkv",
13 |     "PaddleNLP",
14 |     "UForm",
15 | ]
16 | 
17 | 
18 | from gptcache.utils.lazy_import import LazyImport
19 | 
20 | openai = LazyImport("openai", globals(), "gptcache.embedding.openai")
21 | huggingface = LazyImport("huggingface", globals(), "gptcache.embedding.huggingface")
22 | sbert = LazyImport("sbert", globals(), "gptcache.embedding.sbert")
23 | onnx = LazyImport("onnx", globals(), "gptcache.embedding.onnx")
24 | cohere = LazyImport("cohere", globals(), "gptcache.embedding.cohere")
25 | fasttext = LazyImport("fasttext", globals(), "gptcache.embedding.fasttext")
26 | data2vec = LazyImport("data2vec", globals(), "gptcache.embedding.data2vec")
27 | timm = LazyImport("timm", globals(), "gptcache.embedding.timm")
28 | vit = LazyImport("vit", globals(), "gptcache.embedding.vit")
29 | langchain = LazyImport("langchain", globals(), "gptcache.embedding.langchain")
30 | rwkv = LazyImport("rwkv", globals(), "gptcache.embedding.rwkv")
31 | paddlenlp = LazyImport("paddlenlp", globals(), "gptcache.embedding.paddlenlp")
32 | uform = LazyImport("uform", globals(), "gptcache.embedding.uform")
33 | 
34 | 
35 | def Cohere(model="large", api_key=None):
36 |     return cohere.Cohere(model, api_key)
37 | 
38 | 
39 | def OpenAI(model="text-embedding-ada-002", api_key=None):
40 |     return openai.OpenAI(model, api_key)
41 | 
42 | 
43 | def Huggingface(model="distilbert-base-uncased"):
44 |     return huggingface.Huggingface(model)
45 | 
46 | 
47 | def SBERT(model="all-MiniLM-L6-v2"):
48 |     return sbert.SBERT(model)
49 | 
50 | 
51 | def Onnx(model="GPTCache/paraphrase-albert-onnx"):
52 |     return onnx.Onnx(model)
53 | 
54 | 
55 | def FastText(model="en", dim=None):
56 |     return fasttext.FastText(model, dim)
57 | 
58 | 
59 | def Data2VecAudio(model="facebook/data2vec-audio-base-960h"):
60 |     return data2vec.Data2VecAudio(model)
61 | 
62 | 
63 | def Timm(model="resnet50", device="default"):
64 |     return timm.Timm(model, device)
65 | 
66 | 
67 | def ViT(model="google/vit-base-patch16-384"):
68 |     return vit.ViT(model)
69 | 
70 | 
71 | def LangChain(embeddings, dimension=0):
72 |     return langchain.LangChain(embeddings, dimension)
73 | 
74 | 
75 | def Rwkv(model="sgugger/rwkv-430M-pile"):
76 |     return rwkv.Rwkv(model)
77 | 
78 | 
79 | def PaddleNLP(model="ernie-3.0-medium-zh"):
80 |     return paddlenlp.PaddleNLP(model)
81 | 
82 | 
83 | def UForm(model="unum-cloud/uform-vl-multilingual", embedding_type="text"):
84 |     return uform.UForm(model, embedding_type)
85 | 


--------------------------------------------------------------------------------
/gptcache/embedding/base.py:
--------------------------------------------------------------------------------
 1 | from abc import ABCMeta, abstractmethod
 2 | 
 3 | 
 4 | class BaseEmbedding(metaclass=ABCMeta):
 5 |     """
 6 |     Base Embedding interface.
 7 |     """
 8 | 
 9 |     @abstractmethod
10 |     def to_embeddings(self, data, **kwargs):
11 |         pass
12 | 
13 |     @property
14 |     @abstractmethod
15 |     def dimension(self) -> int:
16 |         return 0
17 | 


--------------------------------------------------------------------------------
/gptcache/embedding/cohere.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | from gptcache.utils import import_cohere
 4 | from gptcache.embedding.base import BaseEmbedding
 5 | 
 6 | import_cohere()
 7 | 
 8 | import cohere  # pylint: disable=C0413
 9 | 
10 | 
11 | class Cohere(BaseEmbedding):
12 |     """Generate text embedding for given text using Cohere.
13 | 
14 |     :param model: model name (size), defaults to 'large'.
15 |     :type model: str
16 |     :param api_key: Cohere API Key.
17 |     :type api_key: str
18 | 
19 |     Example:
20 |         .. code-block:: python
21 | 
22 |             from gptcache.embedding import Cohere
23 | 
24 |             test_sentence = 'Hello, world.'
25 |             encoder = Cohere(model='small', api_key='your_cohere_key')
26 |             embed = encoder.to_embeddings(test_sentence)
27 |     """
28 | 
29 |     def __init__(self, model: str = "large", api_key: str = None):
30 |         self.co = cohere.Client(api_key)
31 |         self.model = model
32 | 
33 |         if model in self.dim_dict():
34 |             self.__dimension = self.dim_dict()[model]
35 |         else:
36 |             self.__dimension = None
37 | 
38 |     def to_embeddings(self, data, **_):
39 |         """Generate embedding given text input
40 | 
41 |         :param data: text in string.
42 |         :type data: str
43 | 
44 |         :return: a text embedding in shape of (dim,).
45 |         """
46 |         if not isinstance(data, list):
47 |             data = [data]
48 |         response = self.co.embed(texts=data, model=self.model)
49 |         embeddings = response.embeddings
50 |         return np.array(embeddings).astype("float32").squeeze(0)
51 | 
52 |     @property
53 |     def dimension(self):
54 |         """Embedding dimension.
55 | 
56 |         :return: embedding dimension
57 |         """
58 |         if not self.__dimension:
59 |             foo_emb = self.to_embeddings("foo")
60 |             self.__dimension = len(foo_emb)
61 |         return self.__dimension
62 | 
63 |     @staticmethod
64 |     def dim_dict():
65 |         return {"large": 4096, "small": 1024}
66 | 


--------------------------------------------------------------------------------
/gptcache/embedding/data2vec.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | from gptcache.utils import import_huggingface, import_torch, import_torchaudio
 4 | from gptcache.embedding.base import BaseEmbedding
 5 | 
 6 | import_torch()
 7 | import_huggingface()
 8 | import_torchaudio()
 9 | 
10 | import torch  # pylint: disable=C0413
11 | import torchaudio  # pylint: disable=C0413
12 | from transformers import Data2VecAudioModel, Wav2Vec2Processor  # pylint: disable=C0413
13 | 
14 | 
15 | class Data2VecAudio(BaseEmbedding):
16 |     """Generate audio embedding for given audio using pretrained models from Data2Vec.
17 | 
18 |     :param model: model name, defaults to 'facebook/data2vec-audio-base-960h'.
19 |     :type model: str
20 | 
21 |     Example:
22 |         .. code-block:: python
23 | 
24 |             from gptcache.embedding import Data2VecAudio
25 | 
26 |             audio_file = 'test.wav'
27 |             encoder = Data2VecAudio(model='facebook/data2vec-audio-base-960h')
28 |             embed = encoder.to_embeddings(audio_file)
29 |     """
30 |     def __init__(self, model_name = "facebook/data2vec-audio-base-960h"):
31 |         self.model = Data2VecAudioModel.from_pretrained(model_name)
32 |         self.processor = Wav2Vec2Processor.from_pretrained(model_name)
33 |         self.__dimension = self.model.config.hidden_size
34 |         self.sr = self.processor.feature_extractor.sampling_rate
35 | 
36 |     def to_embeddings(self, data, **_):
37 |         """Generate embedding given text input
38 | 
39 |         :param data: path to audio file.
40 |         :type data: str
41 | 
42 |         :return: a text embedding in shape of (dim,).
43 |         """
44 |         audio = self.load_audio(data, self.sr)
45 |         inputs = self.processor(audio, sampling_rate=self.sr, return_tensors="pt")
46 |         with torch.no_grad():
47 |             outputs = self.model(**inputs)
48 |         last_hidden_states = outputs.last_hidden_state
49 |         feat = last_hidden_states[:,-1,:].flatten().detach().cpu().numpy()
50 |         return np.array(feat).astype("float32")
51 | 
52 |     def load_audio(self, audio_path, target_sr):
53 |         waveform, sample_rate = torchaudio.load(audio_path)
54 |         waveform = torch.mean(waveform, axis=0)
55 |         transform = torchaudio.transforms.Resample(sample_rate, target_sr)
56 |         waveform = transform(waveform)
57 |         return waveform
58 | 
59 | 
60 |     @property
61 |     def dimension(self):
62 |         """Embedding dimension.
63 | 
64 |         :return: embedding dimension
65 |         """
66 |         return self.__dimension
67 | 


--------------------------------------------------------------------------------
/gptcache/embedding/fasttext.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import os
 3 | 
 4 | from gptcache.utils import import_fasttext
 5 | from gptcache.embedding.base import BaseEmbedding
 6 | 
 7 | import_fasttext()
 8 | 
 9 | import fasttext.util  # pylint: disable=C0413
10 | 
11 | 
12 | class FastText(BaseEmbedding):
13 |     """Generate sentence embedding for given text using pretrained models of different languages from fastText.
14 | 
15 |     :param model: model name, defaults to 'en'.
16 |     :type model: str
17 |     :param dim: reduced dimension of embedding. If this parameter is not provided, the embedding dimension (300) will not change.
18 |     :type dim: int
19 | 
20 |     Example:
21 |         .. code-block:: python
22 | 
23 |             from gptcache.embedding import FastText
24 | 
25 |             test_sentence = 'Hello, world.'
26 |             encoder = FastText(model='en', dim=100)
27 |             embed = encoder.to_embeddings(test_sentence)
28 |     """
29 | 
30 |     def __init__(self, model: str = "en", dim: int = None):
31 |         self.model_path = os.path.abspath(fasttext.util.download_model(model))
32 |         self.ft = fasttext.load_model(self.model_path)
33 | 
34 |         if dim:
35 |             fasttext.util.reduce_model(self.ft, dim)
36 |         self.__dimension = self.ft.get_dimension()
37 | 
38 |     def to_embeddings(self, data, **_):
39 |         """Generate embedding given text input
40 | 
41 |         :param data: text in string.
42 |         :type data: str
43 | 
44 |         :return: a text embedding in shape of (dim,).
45 |         """
46 |         assert isinstance(data, str), "Only allow string as input."
47 |         emb = self.ft.get_sentence_vector(data)
48 |         return np.array(emb).astype("float32")
49 | 
50 |     @property
51 |     def dimension(self):
52 |         """Embedding dimension.
53 | 
54 |         :return: embedding dimension
55 |         """
56 |         return self.__dimension
57 | 


--------------------------------------------------------------------------------
/gptcache/embedding/langchain.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | from gptcache.embedding.base import BaseEmbedding
 4 | from gptcache.utils import import_langchain
 5 | 
 6 | import_langchain()
 7 | 
 8 | from langchain.embeddings.base import Embeddings  # pylint: disable=C0413
 9 | 
10 | 
11 | class LangChain(BaseEmbedding):
12 |     """Generate text embedding for given text using LangChain
13 | 
14 |     :param embeddings: the LangChain Embeddings object.
15 |     :type embeddings: Embeddings
16 |     :param dimension: The vector dimension after embedding is calculated by calling embed once by default.
17 |      If you confirm the dimension, you can assign a value to this parameter to reduce this request.
18 |     :type dimension: int
19 | 
20 |     Example:
21 |         .. code-block:: python
22 | 
23 |             from gptcache.embedding import LangChain
24 |             from langchain.embeddings.openai import OpenAIEmbeddings
25 | 
26 |             test_sentence = 'Hello, world.'
27 |             embeddings = OpenAIEmbeddings(model="your-embeddings-deployment-name")
28 |             encoder = LangChain(embeddings=embeddings)
29 |             embed = encoder.to_embeddings(test_sentence)
30 |     """
31 | 
32 |     def __init__(self, embeddings: Embeddings, dimension: int = 0):
33 |         self._embeddings: Embeddings = embeddings
34 |         self._dimension: int = (
35 |             dimension if dimension != 0 else len(self._embeddings.embed_query("foo"))
36 |         )
37 | 
38 |     def to_embeddings(self, data, **kwargs):
39 |         vector_data = self._embeddings.embed_query(data)
40 |         return np.array(vector_data).astype("float32")
41 | 
42 |     @property
43 |     def dimension(self) -> int:
44 |         return self._dimension
45 | 


--------------------------------------------------------------------------------
/gptcache/embedding/openai.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | 
 3 | import numpy as np
 4 | 
 5 | from gptcache.embedding.base import BaseEmbedding
 6 | from gptcache.utils import import_openai
 7 | 
 8 | import_openai()
 9 | 
10 | import openai  # pylint: disable=C0413
11 | 
12 | class OpenAI(BaseEmbedding):
13 |     """Generate text embedding for given text using OpenAI.
14 | 
15 |     :param model: model name, defaults to 'text-embedding-ada-002'.
16 |     :type model: str
17 |     :param api_key: OpenAI API Key. When the parameter is not specified, it will load the key by default if it is available.
18 |     :type api_key: str
19 | 
20 |     Example:
21 |         .. code-block:: python
22 | 
23 |             from gptcache.embedding import OpenAI
24 | 
25 |             test_sentence = 'Hello, world.'
26 |             encoder = OpenAI(api_key='your_openai_key')
27 |             embed = encoder.to_embeddings(test_sentence)
28 |     """
29 | 
30 |     def __init__(self, model: str = "text-embedding-ada-002", api_key: str = None, api_base: str = None):
31 |         if not api_key:
32 |             if openai.api_key:
33 |                 api_key = openai.api_key
34 |             else:
35 |                 api_key = os.getenv("OPENAI_API_KEY")
36 |         if not api_base:
37 |             if openai.api_base:
38 |                 api_base = openai.api_base
39 |             else:
40 |                 api_base = os.getenv("OPENAI_API_BASE")
41 |         openai.api_key = api_key
42 |         self.api_base = api_base  # don't override all of openai as we may just want to override for say embeddings
43 |         self.model = model
44 |         if model in self.dim_dict():
45 |             self.__dimension = self.dim_dict()[model]
46 |         else:
47 |             self.__dimension = None
48 | 
49 |     def to_embeddings(self, data, **_):
50 |         """Generate embedding given text input
51 | 
52 |         :param data: text in string.
53 |         :type data: str
54 | 
55 |         :return: a text embedding in shape of (dim,).
56 |         """
57 |         sentence_embeddings = openai.Embedding.create(model=self.model, input=data, api_base=self.api_base)
58 |         return np.array(sentence_embeddings["data"][0]["embedding"]).astype("float32")
59 | 
60 |     @property
61 |     def dimension(self):
62 |         """Embedding dimension.
63 | 
64 |         :return: embedding dimension
65 |         """
66 |         if not self.__dimension:
67 |             foo_emb = self.to_embeddings("foo")
68 |             self.__dimension = len(foo_emb)
69 |         return self.__dimension
70 | 
71 |     @staticmethod
72 |     def dim_dict():
73 |         return {"text-embedding-ada-002": 1536}
74 | 


--------------------------------------------------------------------------------
/gptcache/embedding/paddlenlp.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | from gptcache.embedding.base import BaseEmbedding
 4 | from gptcache.utils import import_paddlenlp, import_paddle
 5 | 
 6 | import_paddle()
 7 | import_paddlenlp()
 8 | 
 9 | 
10 | import paddle  # pylint: disable=C0413
11 | from paddlenlp.transformers import AutoModel, AutoTokenizer  # pylint: disable=C0413
12 | 
13 | class PaddleNLP(BaseEmbedding):
14 |     """Generate sentence embedding for given text using pretrained models from PaddleNLP transformers.
15 | 
16 |     :param model: model name, defaults to 'ernie-3.0-medium-zh'.
17 |     :type model: str
18 | 
19 |     Example:
20 |         .. code-block:: python
21 | 
22 |             from gptcache.embedding import PaddleNLP
23 | 
24 |             test_sentence = 'Hello, world.'
25 |             encoder = PaddleNLP(model='ernie-3.0-medium-zh')
26 |             embed = encoder.to_embeddings(test_sentence)
27 |     """
28 | 
29 |     def __init__(self, model: str = "ernie-3.0-medium-zh"):
30 |         self.model = AutoModel.from_pretrained(model)
31 |         self.model.eval()
32 | 
33 |         self.tokenizer = AutoTokenizer.from_pretrained(model)
34 |         if not self.tokenizer.pad_token:
35 |             self.tokenizer.pad_token = "<pad>"
36 |         self.__dimension = None
37 | 
38 |     def to_embeddings(self, data, **_):
39 |         """Generate embedding given text input
40 | 
41 |         :param data: text in string.
42 |         :type data: str
43 | 
44 |         :return: a text embedding in shape of (dim,).
45 |         """
46 |         if not isinstance(data, list):
47 |             data = [data]
48 |         inputs = self.tokenizer(
49 |             data, padding=True, truncation=True, return_tensors="pd"
50 |         )
51 |         outs = self.model(**inputs)[0]
52 |         emb = self.post_proc(outs, inputs).squeeze(0).detach().numpy()
53 |         return np.array(emb).astype("float32")
54 | 
55 |     def post_proc(self, token_embeddings, inputs):
56 |         attention_mask = paddle.ones(inputs["token_type_ids"].shape)
57 |         input_mask_expanded = (
58 |             attention_mask.unsqueeze(-1).expand(token_embeddings.shape).astype("float32")
59 |         )
60 |         sentence_embs = paddle.sum(
61 |             token_embeddings * input_mask_expanded, 1
62 |         ) / paddle.clip(input_mask_expanded.sum(1), min=1e-9)
63 |         return sentence_embs
64 | 
65 |     @property
66 |     def dimension(self):
67 |         """Embedding dimension.
68 | 
69 |         :return: embedding dimension
70 |         """
71 |         if not self.__dimension:
72 |             self.__dimension = len(self.to_embeddings("foo"))
73 |         return self.__dimension
74 | 
75 | 


--------------------------------------------------------------------------------
/gptcache/embedding/rwkv.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | from gptcache.embedding.base import BaseEmbedding
 4 | from gptcache.utils import import_huggingface
 5 | 
 6 | import_huggingface()
 7 | 
 8 | from transformers import AutoTokenizer, RwkvModel  # pylint: disable=C0413
 9 | 
10 | 
11 | class Rwkv(BaseEmbedding):
12 |     """Generate sentence embedding for given text using RWKV models.
13 | 
14 |     :param model: model name, defaults to 'sgugger/rwkv-430M-pile'. Check
15 |       https://huggingface.co/docs/transformers/model_doc/rwkv for more avaliable models.
16 |     :type model: str
17 | 
18 |     Example:
19 |         .. code-block:: python
20 | 
21 |             from gptcache.embedding import Rwkv
22 | 
23 |             test_sentence = 'Hello, world.'
24 |             encoder = Rwkv(model='sgugger/rwkv-430M-pile')
25 |             embed = encoder.to_embeddings(test_sentence)
26 |     """
27 | 
28 |     def __init__(self, model: str = "sgugger/rwkv-430M-pile"):
29 |         self.model = RwkvModel.from_pretrained(model)
30 |         self.model.eval()
31 | 
32 |         self.tokenizer = AutoTokenizer.from_pretrained(model)
33 |         try:
34 |             self.__dimension = self.model.config.hidden_size
35 |         except Exception:  # pylint: disable=W0703
36 |             from transformers import AutoConfig  # pylint: disable=C0415
37 | 
38 |             config = AutoConfig.from_pretrained(model)
39 |             self.__dimension = config.hidden_size
40 | 
41 |     def to_embeddings(self, data, **_):
42 |         """Generate embedding given text input
43 | 
44 |         :param data: text in string.
45 |         :type data: str
46 | 
47 |         :return: a text embedding in shape of (dim,).
48 |         """
49 |         inputs = self.tokenizer(data, return_tensors="pt")
50 |         outputs = self.model(inputs["input_ids"])
51 |         emb = outputs.last_hidden_state[0, 0, :].detach().numpy()
52 |         return np.array(emb).astype("float32")
53 | 
54 |     @property
55 |     def dimension(self):
56 |         """Embedding dimension.
57 | 
58 |         :return: embedding dimension
59 |         """
60 |         return self.__dimension
61 | 


--------------------------------------------------------------------------------
/gptcache/embedding/sbert.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | from gptcache.utils import import_sbert
 3 | from gptcache.embedding.base import BaseEmbedding
 4 | 
 5 | import_sbert()
 6 | 
 7 | from sentence_transformers import SentenceTransformer  # pylint: disable=C0413
 8 | 
 9 | 
10 | class SBERT(BaseEmbedding):
11 |     """Generate sentence embedding for given text using pretrained models of Sentence Transformers.
12 | 
13 |     :param model: model name, defaults to 'all-MiniLM-L6-v2'.
14 |     :type model: str
15 | 
16 |     Example:
17 |         .. code-block:: python
18 | 
19 |             from gptcache.embedding import SBERT
20 | 
21 |             test_sentence = 'Hello, world.'
22 |             encoder = SBERT('all-MiniLM-L6-v2')
23 |             embed = encoder.to_embeddings(test_sentence)
24 |     """
25 | 
26 |     def __init__(self, model: str = "all-MiniLM-L6-v2"):
27 |         self.model = SentenceTransformer(model)
28 |         self.model.eval()
29 |         self.__dimension = None
30 | 
31 |     def to_embeddings(self, data, **_):
32 |         """Generate embedding given text input
33 | 
34 |         :param data: text in string.
35 |         :type data: str
36 | 
37 |         :return: a text embedding in shape of (dim,).
38 |         """
39 |         if not isinstance(data, list):
40 |             data = [data]
41 |         emb = self.model.encode(data).squeeze(0)
42 | 
43 |         if not self.__dimension:
44 |             self.__dimension = len(emb)
45 |         return np.array(emb).astype("float32")
46 | 
47 |     @property
48 |     def dimension(self):
49 |         """Embedding dimension.
50 | 
51 |         :return: embedding dimension
52 |         """
53 |         if not self.__dimension:
54 |             self.__dimension = len(self.to_embeddings("foo"))
55 |         return self.__dimension
56 | 


--------------------------------------------------------------------------------
/gptcache/embedding/string.py:
--------------------------------------------------------------------------------
1 | def to_embeddings(data, **_):
2 |     """Nothing to do, return the origin data"""
3 |     return data
4 | 


--------------------------------------------------------------------------------
/gptcache/embedding/vit.py:
--------------------------------------------------------------------------------
 1 | from gptcache.utils import import_huggingface, import_torch, import_torchvision
 2 | from gptcache.embedding.base import BaseEmbedding
 3 | 
 4 | import_torch()
 5 | import_huggingface()
 6 | import_torchvision()
 7 | 
 8 | import torch  # pylint: disable=C0413
 9 | from transformers import AutoImageProcessor  # pylint: disable=C0413
10 | from transformers import ViTModel  # pylint: disable=C0413
11 | 
12 | 
13 | class ViT(BaseEmbedding):
14 |     """Generate sentence embedding for given text using pretrained models from Huggingface transformers.
15 | 
16 |     :param model: model name, defaults to 'google/vit-base-patch16-384'.
17 |     :type model: str
18 | 
19 |     Example:
20 |         .. code-block:: python
21 | 
22 |             import io
23 |             from PIL import Image
24 |             from gptcache.embedding import ImageEmbedding
25 | 
26 |             def prepare_image(image_data: str = None):
27 |                 if not image_data:
28 |                     image_data = io.BytesIO()
29 |                     Image.new('RGB', (244, 244), color=(255, 0, 0)).save(image_data, format='JPEG')
30 |                     image_data.seek(0)
31 |                 image = Image.open(image_data)
32 |                 return image
33 | 
34 |             image = prepare_image()
35 |             encoder = ImageEmbeddings(model="google/vit-base-patch16-384")
36 |             embed = encoder.to_embeddings(image)
37 |     """
38 | 
39 |     def __init__(self, model: str = "google/vit-base-patch16-384"):
40 | 
41 |         self.model_name = model
42 |         model = ViTModel.from_pretrained(model)
43 |         self.model = model.eval()
44 |         config = self.model.config
45 |         self.__dimension = config.hidden_size
46 | 
47 |     def to_embeddings(self, data, **__):
48 |         """Generate embedding given text input
49 | 
50 |         :param data: text in string.
51 |         :type data: str
52 | 
53 |         :return: a text embedding in shape of (dim,).
54 |         """
55 |         inputs = self.preprocess(data)
56 | 
57 |         with torch.no_grad():
58 |             outputs = self.model(**inputs)
59 | 
60 |         last_hidden_states = outputs.last_hidden_state
61 |         features = last_hidden_states[:, 0, :]
62 |         features = features.squeeze()
63 |         return features.detach().numpy()
64 | 
65 |     def preprocess(self, data):
66 |         image_processor = AutoImageProcessor.from_pretrained(self.model_name)
67 |         inputs = image_processor(data, return_tensors="pt")
68 |         return inputs
69 | 
70 |     @property
71 |     def dimension(self):
72 |         """Embedding dimension.
73 | 
74 |         :return: embedding dimension
75 |         """
76 |         return self.__dimension
77 | 
78 | 


--------------------------------------------------------------------------------
/gptcache/manager/__init__.py:
--------------------------------------------------------------------------------
1 | from gptcache.manager.scalar_data import CacheBase
2 | from gptcache.manager.vector_data import VectorBase
3 | from gptcache.manager.object_data import ObjectBase
4 | from gptcache.manager.factory import get_data_manager, manager_factory
5 | 


--------------------------------------------------------------------------------
/gptcache/manager/eviction/__init__.py:
--------------------------------------------------------------------------------
 1 | __all__ = ["EvictionBase"]
 2 | 
 3 | from gptcache.utils.lazy_import import LazyImport
 4 | 
 5 | eviction_manager = LazyImport(
 6 |     "eviction_manager", globals(), "gptcache.manager.eviction.manager"
 7 | )
 8 | 
 9 | 
10 | def EvictionBase(name: str, **kwargs):
11 |     """Generate specific CacheStorage with the configuration.
12 | 
13 |     :param name: the name of the eviction, like: memory
14 |     :type name: str
15 | 
16 |     :param policy: eviction strategy
17 |     :type policy: str
18 |     :param maxsize: the maxsize of cache data
19 |     :type maxsize: int
20 |     :param clean_size: will clean the size of data when the size of cache data reaches the max size
21 |     :type clean_size: int
22 |     :param on_evict: the function for cleaning the data in the store
23 |     :type  on_evict: Callable[[List[Any]], None]
24 | 
25 |     Example:
26 |         .. code-block:: python
27 | 
28 |             from gptcache.manager import EvictionBase
29 | 
30 |             cache_base = EvictionBase('memory', policy='lru', maxsize=10, clean_size=2, on_evict=lambda x: print(x))
31 |     """
32 |     return eviction_manager.EvictionBase.get(name, **kwargs)
33 | 


--------------------------------------------------------------------------------
/gptcache/manager/eviction/base.py:
--------------------------------------------------------------------------------
 1 | from abc import ABCMeta, abstractmethod
 2 | from typing import Any, List
 3 | 
 4 | 
 5 | class EvictionBase(metaclass=ABCMeta):
 6 |     """
 7 |     Eviction base.
 8 |     """
 9 | 
10 |     @abstractmethod
11 |     def put(self, objs: List[Any]):
12 |         pass
13 | 
14 |     @abstractmethod
15 |     def get(self, obj: Any):
16 |         pass
17 | 
18 |     @property
19 |     @abstractmethod
20 |     def policy(self) -> str:
21 |         pass
22 | 


--------------------------------------------------------------------------------
/gptcache/manager/eviction/distributed_cache.py:
--------------------------------------------------------------------------------
 1 | # pylint: disable=wrong-import-position
 2 | from abc import ABC, abstractmethod
 3 | from typing import List
 4 | 
 5 | from gptcache.manager.eviction.base import EvictionBase
 6 | 
 7 | 
 8 | class DistributedEviction(EvictionBase, ABC):
 9 |     """
10 |     Base class for Distributed Eviction Strategy.
11 |     """
12 | 
13 |     @abstractmethod
14 |     def put(self, objs: List[str]):
15 |         pass
16 | 
17 |     @abstractmethod
18 |     def get(self, obj: str):
19 |         pass
20 | 
21 |     @property
22 |     @abstractmethod
23 |     def policy(self) -> str:
24 |         pass
25 | 
26 | 
27 | class NoOpEviction(EvictionBase):
28 |     """eviction: No Op Eviction Strategy. This is used when Eviction is managed internally
29 |     by the Databases such as Redis or memcached and no eviction is required to perform.
30 | 
31 |     """
32 | 
33 |     @property
34 |     def policy(self) -> str:
35 |         return ""
36 | 
37 |     def __init__(self, **kwargs):
38 |         pass
39 | 
40 |     def put(self, objs: List[str]):
41 |         pass
42 | 
43 |     def get(self, obj: str):
44 |         pass
45 | 


--------------------------------------------------------------------------------
/gptcache/manager/eviction/manager.py:
--------------------------------------------------------------------------------
 1 | # pylint: disable=import-outside-toplevel
 2 | from typing import Callable, List, Any
 3 | 
 4 | from gptcache.utils.error import NotFoundError
 5 | 
 6 | 
 7 | class EvictionBase:
 8 |     """
 9 |     EvictionBase to evict the cache data.
10 |     """
11 | 
12 |     def __init__(self):
13 |         raise EnvironmentError(
14 |             "EvictionBase is designed to be instantiated, "
15 |             "please using the `EvictionBase.get(name, policy, maxsize, clean_size)`."
16 |         )
17 | 
18 |     @staticmethod
19 |     def get(
20 |         name: str,
21 |         policy: str = "LRU",
22 |         maxsize: int = 1000,
23 |         clean_size: int = 0,
24 |         on_evict: Callable[[List[Any]], None] = None,
25 |         **kwargs
26 |     ):
27 |         if not clean_size:
28 |             clean_size = int(maxsize * 0.2)
29 |         if name in "memory":
30 |             from gptcache.manager.eviction.memory_cache import MemoryCacheEviction
31 | 
32 |             eviction_base = MemoryCacheEviction(
33 |                 policy, maxsize, clean_size, on_evict, **kwargs
34 |             )
35 |             return eviction_base
36 |         if name == "redis":
37 |             from gptcache.manager.eviction.redis_eviction import RedisCacheEviction
38 |             if policy == "LRU":
39 |                 policy = None
40 |             eviction_base = RedisCacheEviction(policy=policy, **kwargs)
41 |             return eviction_base
42 |         if name == "no_op_eviction":
43 |             from gptcache.manager.eviction.distributed_cache import NoOpEviction
44 |             eviction_base = NoOpEviction()
45 |             return eviction_base
46 | 
47 |         else:
48 |             raise NotFoundError("eviction base", name)
49 | 


--------------------------------------------------------------------------------
/gptcache/manager/eviction/memory_cache.py:
--------------------------------------------------------------------------------
 1 | from typing import Any, Callable, List
 2 | 
 3 | import cachetools
 4 | 
 5 | from gptcache.manager.eviction.base import EvictionBase
 6 | 
 7 | 
 8 | def popitem_wrapper(func, wrapper_func, clean_size):
 9 |     def wrapper(*args, **kwargs):
10 |         keys = []
11 |         try:
12 |             keys = [func(*args, **kwargs)[0] for _ in range(clean_size)]
13 |         except KeyError:
14 |             pass
15 |         wrapper_func(keys)
16 | 
17 |     return wrapper
18 | 
19 | 
20 | class MemoryCacheEviction(EvictionBase):
21 |     """eviction: Memory Cache
22 | 
23 |     :param policy: eviction strategy
24 |     :type policy: str
25 |     :param maxsize: the maxsize of cache data
26 |     :type maxsize: int
27 |     :param clean_size: will clean the size of data when the size of cache data reaches the max size
28 |     :type clean_size: int
29 |     :param on_evict: the function for cleaning the data in the store
30 |     :type  on_evict: Callable[[List[Any]], None]
31 | 
32 | 
33 |     """
34 | 
35 |     def __init__(
36 |             self,
37 |             policy: str = "LRU",
38 |             maxsize: int = 1000,
39 |             clean_size: int = 0,
40 |             on_evict: Callable[[List[Any]], None] = None,
41 |             **kwargs,
42 |     ):
43 |         self._policy = policy.upper()
44 |         if self._policy == "LRU":
45 |             self._cache = cachetools.LRUCache(maxsize=maxsize, **kwargs)
46 |         elif self._policy == "LFU":
47 |             self._cache = cachetools.LFUCache(maxsize=maxsize, **kwargs)
48 |         elif self._policy == "FIFO":
49 |             self._cache = cachetools.FIFOCache(maxsize=maxsize, **kwargs)
50 |         elif self._policy == "RR":
51 |             self._cache = cachetools.RRCache(maxsize=maxsize, **kwargs)
52 |         else:
53 |             raise ValueError(f"Unknown policy {policy}")
54 | 
55 |         self._cache.popitem = popitem_wrapper(self._cache.popitem, on_evict, clean_size)
56 | 
57 |     def put(self, objs: List[Any]):
58 |         for obj in objs:
59 |             self._cache[obj] = True
60 | 
61 |     def get(self, obj: Any):
62 |         return self._cache.get(obj)
63 | 
64 |     @property
65 |     def policy(self) -> str:
66 |         return self._policy
67 | 


--------------------------------------------------------------------------------
/gptcache/manager/eviction_manager.py:
--------------------------------------------------------------------------------
 1 | class EvictionManager:
 2 |     """
 3 |     EvictionManager to manager the eviction policy.
 4 | 
 5 |     :param scalar_storage: CacheStorage to manager the scalar data.
 6 |     :type scalar_storage: :class:`CacheStorage`
 7 |     :param vector_base: VectorBase to manager the vector data.
 8 |     :type vector_base:  :class:`VectorBase`
 9 |     """
10 | 
11 |     MAX_MARK_COUNT = 5000
12 |     MAX_MARK_RATE = 0.1
13 |     BATCH_SIZE = 100000
14 |     REBUILD_CONDITION = 5
15 | 
16 |     def __init__(self, scalar_storage, vector_base):
17 |         self._scalar_storage = scalar_storage
18 |         self._vector_base = vector_base
19 |         self.delete_count = 0
20 | 
21 |     def check_evict(self):
22 |         mark_count = self._scalar_storage.count(state=-1)
23 |         all_count = self._scalar_storage.count(is_all=True)
24 |         if (
25 |             mark_count > self.MAX_MARK_COUNT
26 |             or mark_count / all_count > self.MAX_MARK_RATE
27 |         ):
28 |             return True
29 |         return False
30 | 
31 |     def delete(self):
32 |         mark_ids = self._scalar_storage.get_ids(deleted=True)
33 |         self._scalar_storage.clear_deleted_data()
34 |         self._vector_base.delete(mark_ids)
35 |         self.delete_count += 1
36 |         if self.delete_count >= self.REBUILD_CONDITION:
37 |             self.rebuild()
38 | 
39 |     def rebuild(self):
40 |         self._scalar_storage.clear_deleted_data()
41 |         ids = self._scalar_storage.get_ids(deleted=False)
42 |         self._vector_base.rebuild(ids)
43 |         self.delete_count = 0
44 | 
45 |     def soft_evict(self, marked_keys):
46 |         self._scalar_storage.mark_deleted(marked_keys)
47 | 


--------------------------------------------------------------------------------
/gptcache/manager/object_data/__init__.py:
--------------------------------------------------------------------------------
 1 | __all__ = ["ObjectBase"]
 2 | 
 3 | from gptcache.utils.lazy_import import LazyImport
 4 | 
 5 | object_manager = LazyImport(
 6 |     "object_manager", globals(), "gptcache.manager.object_data.manager"
 7 | )
 8 | 
 9 | 
10 | def ObjectBase(name: str, **kwargs):
11 |     """Generate specific ObjectStorage with the configuration. For example, setting for
12 |        `ObjectBase` (with `name`) to manage LocalObjectStorage, S3 object storage.
13 |     """
14 |     return object_manager.ObjectBase.get(name, **kwargs)
15 | 


--------------------------------------------------------------------------------
/gptcache/manager/object_data/base.py:
--------------------------------------------------------------------------------
 1 | from abc import ABC, abstractmethod
 2 | from typing import Any, List
 3 | 
 4 | 
 5 | class ObjectBase(ABC):
 6 |     """
 7 |     Object storage base.
 8 |     """
 9 | 
10 |     @abstractmethod
11 |     def put(self, obj: Any) -> str:
12 |         pass
13 | 
14 |     @abstractmethod
15 |     def get(self, obj: str) -> Any:
16 |         pass
17 | 
18 |     @abstractmethod
19 |     def get_access_link(self, obj: str) -> str:
20 |         pass
21 | 
22 |     @abstractmethod
23 |     def delete(self, to_delete: List[str]):
24 |         pass
25 | 


--------------------------------------------------------------------------------
/gptcache/manager/object_data/local_storage.py:
--------------------------------------------------------------------------------
 1 | from typing import Any, List
 2 | import os
 3 | import uuid
 4 | from pathlib import Path
 5 | from gptcache.manager.object_data.base import ObjectBase
 6 | from gptcache.utils.log import gptcache_log
 7 | 
 8 | 
 9 | class LocalObjectStorage(ObjectBase):
10 |     """Local object storage
11 |     """
12 | 
13 |     def __init__(self, local_root: str):
14 |         self._local_root = Path(local_root)
15 |         self._local_root.mkdir(exist_ok=True)
16 | 
17 |     def put(self, obj: Any) -> str:
18 |         f_path = self._local_root / str(uuid.uuid4())
19 |         with open(f_path, "wb") as f:
20 |             f.write(obj)
21 |         return str(f_path.absolute())
22 | 
23 |     def get(self, obj: str) -> Any:
24 |         try:
25 |             with open(obj, "rb") as f:
26 |                 return f.read()
27 |         except Exception: # pylint: disable=broad-except
28 |             return None
29 | 
30 |     def get_access_link(self, obj: str, _: int = 3600):
31 |         return obj
32 | 
33 |     def delete(self, to_delete: List[str]):
34 |         assert isinstance(to_delete, list)
35 |         for obj in to_delete:
36 |             try:
37 |                 os.remove(obj)
38 |             except Exception:  # pylint: disable=broad-except
39 |                 gptcache_log.warning("Can not find obj: %s", obj)
40 |                 pass
41 | 


--------------------------------------------------------------------------------
/gptcache/manager/object_data/manager.py:
--------------------------------------------------------------------------------
 1 | from gptcache.utils.error import NotFoundError
 2 | 
 3 | 
 4 | class ObjectBase:
 5 |     """
 6 |     ObjectBase to manager the object storage.
 7 | 
 8 |     Generate specific ObjectStorage with the configuration. For example, setting for
 9 |        `ObjectBase` (with `name`) to manage LocalObjectStorage, S3 object storage.
10 | 
11 |     :param name: the name of the object storage, it is support 'local', 's3'.
12 |     :type name: str
13 |     :param path: the cache root of the LocalObjectStorage.
14 |     :type path: str
15 | 
16 |     :param bucket: the bucket of s3.
17 |     :type bucket: str
18 |     :param path_prefix: s3 object prefix.
19 |     :type path_prefix: str
20 |     :param access_key: the access_key of s3.
21 |     :type access_key: str
22 |     :param secret_key: the secret_key of s3.
23 |     :type secret_key: str
24 | 
25 |     :return: ObjectStorage.
26 | 
27 |     Example:
28 |         .. code-block:: python
29 | 
30 |             from gptcache.manager import ObjectBase
31 | 
32 |             obj_storage = ObjectBase('local', path='./')
33 |     """
34 | 
35 |     def __init__(self):
36 |         raise EnvironmentError(
37 |             "CacheBase is designed to be instantiated, please using the `CacheBase.get(name)`."
38 |         )
39 | 
40 |     @staticmethod
41 |     def get(name, **kwargs):
42 |         if name == "local":
43 |             from gptcache.manager.object_data.local_storage import LocalObjectStorage  # pylint: disable=import-outside-toplevel
44 |             object_base = LocalObjectStorage(kwargs.get("path", "./local_obj"))
45 |         elif name == "s3":
46 |             from gptcache.manager.object_data.s3_storage import S3Storage  # pylint: disable=import-outside-toplevel
47 |             object_base = S3Storage(kwargs.get("path_prefix"), kwargs.get("bucket"),
48 |                                     kwargs.get("access_key"), kwargs.get("secret_key"),
49 |                                     kwargs.get("endpoint"))
50 |         else:
51 |             raise NotFoundError("object store", name)
52 |         return object_base
53 | 


--------------------------------------------------------------------------------
/gptcache/manager/object_data/s3_storage.py:
--------------------------------------------------------------------------------
 1 | from typing import Any, List
 2 | import uuid
 3 | import os
 4 | 
 5 | from gptcache.manager.object_data.base import ObjectBase
 6 | 
 7 | from gptcache.utils import import_boto3
 8 | from gptcache.utils.log import gptcache_log
 9 | 
10 | import_boto3()
11 | import boto3  # pylint: disable=wrong-import-position
12 | 
13 | 
14 | class S3Storage(ObjectBase):
15 |     """S3 storage
16 |     """
17 | 
18 |     def __init__(self, bucket: str, path_prefix: str, access_key: str, secret_key: str, endpoint: str = None):
19 |         self._session = boto3.Session(
20 |             aws_access_key_id=access_key,
21 |             aws_secret_access_key=secret_key
22 |         )
23 |         self._s3 = self._session.resource("s3")
24 |         self._bucket = bucket
25 |         self._path_prefix = path_prefix
26 |         self._endpoint = endpoint
27 | 
28 |     def put(self, obj: Any) -> str:
29 |         f_path = os.path.join(self._path_prefix, str(uuid.uuid4()))
30 |         self._s3.Bucket(self._bucket).put_object(Key=str(f_path), Body=obj)
31 |         return f_path
32 | 
33 |     def get(self, obj: str) -> Any:
34 |         try:
35 |             return self._s3.Bucket(self._bucket).Object(obj).get()["Body"].read()
36 |         except Exception:  # pylint: disable=broad-except
37 |             gptcache_log.error("obj:%s not exist", obj)
38 |             return None
39 | 
40 |     def get_access_link(self, obj: str, expires: int = 3600) -> str:
41 |         s3 = self._session.client("s3")
42 |         link = s3.generate_presigned_url(
43 |             ClientMethod="get_object",
44 |             ExpiresIn=expires,
45 |             Params={
46 |                 "Bucket": self._bucket,
47 |                 "Key": obj
48 |             }
49 |         )
50 |         if self._endpoint:
51 |             link = link.replace("s3.amazonaws.com/" + self._bucket, self._endpoint)
52 |         return link
53 | 
54 |     def delete(self, to_delete: List[str]):
55 |         self._s3.Bucket(self._bucket).delete_objects(Delete={"Objects": [{"Key": k} for k in to_delete]})
56 | 


--------------------------------------------------------------------------------
/gptcache/manager/scalar_data/__init__.py:
--------------------------------------------------------------------------------
 1 | __all__ = ["CacheBase"]
 2 | 
 3 | from gptcache.utils.lazy_import import LazyImport
 4 | 
 5 | scalar_manager = LazyImport(
 6 |     "scalar_manager", globals(), "gptcache.manager.scalar_data.manager"
 7 | )
 8 | 
 9 | 
10 | def CacheBase(name: str, **kwargs):
11 |     """Generate specific CacheStorage with the configuration. For example, setting for
12 |        `SQLDataBase` (with `name`, `sql_url` and `table_name` params) to manage SQLite, PostgreSQL, MySQL, MariaDB, SQL Server and Oracle.
13 |     """
14 |     return scalar_manager.CacheBase.get(name, **kwargs)
15 | 


--------------------------------------------------------------------------------
/gptcache/manager/vector_data/__init__.py:
--------------------------------------------------------------------------------
 1 | __all__ = ["VectorBase"]
 2 | 
 3 | from gptcache.utils.lazy_import import LazyImport
 4 | 
 5 | vector_manager = LazyImport(
 6 |     "vector_manager", globals(), "gptcache.manager.vector_data.manager"
 7 | )
 8 | 
 9 | 
10 | def VectorBase(name: str, **kwargs):
11 |     """Generate specific VectorBase with the configuration.
12 |     """
13 |     return vector_manager.VectorBase.get(name, **kwargs)
14 | 


--------------------------------------------------------------------------------
/gptcache/manager/vector_data/base.py:
--------------------------------------------------------------------------------
 1 | from abc import ABC, abstractmethod
 2 | from dataclasses import dataclass
 3 | from typing import List, Optional, Union
 4 | 
 5 | import numpy as np
 6 | 
 7 | 
 8 | @dataclass
 9 | class VectorData:
10 |     id: int
11 |     data: np.ndarray
12 | 
13 | 
14 | class VectorBase(ABC):
15 |     """VectorBase: base vector store interface"""
16 | 
17 |     @abstractmethod
18 |     def mul_add(self, datas: List[VectorData]):
19 |         pass
20 | 
21 |     @abstractmethod
22 |     def search(self, data: np.ndarray, top_k: int):
23 |         pass
24 | 
25 |     @abstractmethod
26 |     def rebuild(self, ids=None) -> bool:
27 |         pass
28 | 
29 |     @abstractmethod
30 |     def delete(self, ids) -> bool:
31 |         pass
32 | 
33 |     def flush(self):
34 |         pass
35 | 
36 |     def close(self):
37 |         pass
38 | 
39 |     def get_embeddings(self, data_id: Union[int, str]) -> Optional[np.ndarray]:
40 |         raise NotImplementedError
41 | 
42 |     def update_embeddings(self, data_id: Union[int, str], emb: np.ndarray):
43 |         pass
44 | 


--------------------------------------------------------------------------------
/gptcache/manager/vector_data/faiss.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | from typing import List
 3 | 
 4 | import numpy as np
 5 | 
 6 | from gptcache.manager.vector_data.base import VectorBase, VectorData
 7 | from gptcache.utils import import_faiss
 8 | 
 9 | import_faiss()
10 | 
11 | import faiss  # pylint: disable=C0413
12 | 
13 | 
14 | class Faiss(VectorBase):
15 |     """vector store: Faiss
16 | 
17 |     :param index_path: the path to Faiss index, defaults to 'faiss.index'.
18 |     :type index_path: str
19 |     :param dimension: the dimension of the vector, defaults to 0.
20 |     :type dimension: int
21 |     :param top_k: the number of the vectors results to return, defaults to 1.
22 |     :type top_k: int
23 |     """
24 | 
25 |     def __init__(self, index_file_path, dimension, top_k):
26 |         self._index_file_path = index_file_path
27 |         self._dimension = dimension
28 |         self._index = faiss.index_factory(self._dimension, "IDMap,Flat", faiss.METRIC_L2)
29 |         self._top_k = top_k
30 |         if os.path.isfile(index_file_path):
31 |             self._index = faiss.read_index(index_file_path)
32 | 
33 |     def mul_add(self, datas: List[VectorData]):
34 |         data_array, id_array = map(list, zip(*((data.data, data.id) for data in datas)))
35 |         np_data = np.array(data_array).astype("float32")
36 |         ids = np.array(id_array)
37 |         self._index.add_with_ids(np_data, ids)
38 | 
39 |     def search(self, data: np.ndarray, top_k: int = -1):
40 |         if self._index.ntotal == 0:
41 |             return None
42 |         if top_k == -1:
43 |             top_k = self._top_k
44 |         np_data = np.array(data).astype("float32").reshape(1, -1)
45 |         dist, ids = self._index.search(np_data, top_k)
46 |         ids = [int(i) for i in ids[0]]
47 |         return list(zip(dist[0], ids))
48 | 
49 |     def rebuild(self, ids=None):
50 |         return True
51 | 
52 |     def delete(self, ids):
53 |         ids_to_remove = np.array(ids)
54 |         self._index.remove_ids(faiss.IDSelectorBatch(ids_to_remove.size, faiss.swig_ptr(ids_to_remove)))
55 | 
56 |     def flush(self):
57 |         faiss.write_index(self._index, self._index_file_path)
58 | 
59 |     def close(self):
60 |         self.flush()
61 | 
62 |     def count(self):
63 |         return self._index.ntotal
64 | 


--------------------------------------------------------------------------------
/gptcache/processor/__init__.py:
--------------------------------------------------------------------------------
1 | from gptcache.processor.context.context import ContextProcess
2 | 


--------------------------------------------------------------------------------
/gptcache/processor/check_hit.py:
--------------------------------------------------------------------------------
 1 | # pylint: disable=unused-argument
 2 | def check_hit_session(cur_session_id: str, cache_session_ids: list, cache_questions: list, cache_answer: str):
 3 |     """
 4 |     Check if the sesion result meets the hit requirement.
 5 | 
 6 |     :param cur_session_id: the name of the current session.
 7 |     :type cur_session_id: str
 8 |     :param cache_session_ids: a list of session names for caching the same content if you are using map as a data management method.
 9 |                               Otherwise a list of session names for similar content and same answer.
10 |     :type cache_session_ids: list
11 |     :param cache_question: a list with one question which same as the you asked if you use a map as a data management method.
12 |                            Otherwise it is a list that is similar to the question you asked with the same answer,
13 |                            and it is correspondence with cache_session_ids.
14 |     :type cache_question: list
15 |     :param cache_answer: the content of the cached answer.
16 |     :param cache_answer: str
17 | 
18 |     :return: True or False
19 |     """
20 |     return cur_session_id not in cache_session_ids
21 | 


--------------------------------------------------------------------------------
/gptcache/processor/context/__init__.py:
--------------------------------------------------------------------------------
 1 | from gptcache.utils.lazy_import import LazyImport
 2 | 
 3 | summarization = LazyImport(
 4 |     "summarization_context",
 5 |     globals(),
 6 |     "gptcache.processor.context.summarization_context",
 7 | )
 8 | selective = LazyImport(
 9 |     "selective_context", globals(), "gptcache.processor.context.selective_context"
10 | )
11 | concat = LazyImport(
12 |     "concat_context", globals(), "gptcache.processor.context.concat_context"
13 | )
14 | 
15 | 
16 | __all__ = [
17 |     "SummarizationContextProcess",
18 |     "SelectiveContextProcess",
19 |     "ConcatContextProcess",
20 | ]
21 | 
22 | 
23 | def SummarizationContextProcess(model_name=None, tokenizer=None, target_length=512):
24 |     return summarization.SummarizationContextProcess(
25 |         model_name, tokenizer, target_length
26 |     )
27 | 
28 | 
29 | def SelectiveContextProcess(
30 |     model_type: str = "gpt2",
31 |     lang: str = "en",
32 |     reduce_ratio: float = 0.35,
33 |     reduce_level: str = "phrase",
34 | ):
35 |     return selective.SelectiveContextProcess(
36 |         model_type=model_type,
37 |         lang=lang,
38 |         reduce_ratio=reduce_ratio,
39 |         reduce_level=reduce_level,
40 |     )
41 | 
42 | 
43 | def ConcatContextProcess():
44 |     return concat.ConcatContextProcess()
45 | 


--------------------------------------------------------------------------------
/gptcache/processor/context/concat_context.py:
--------------------------------------------------------------------------------
 1 | from typing import Any, Dict
 2 | 
 3 | from gptcache.processor import ContextProcess
 4 | 
 5 | 
 6 | class ConcatContextProcess(ContextProcess):
 7 |     """A concat context processor simply concat the context.
 8 |     Generally used with rwkv embedding, because rwkv can input almost infinitely long
 9 | 
10 |     Example:
11 |         .. code-block:: python
12 | 
13 |             from gptcache.manager import manager_factory
14 |             from gptcache.processor.context.concat_context import ConcatContextProcess
15 | 
16 |             context_process = ConcatContextProcess()
17 |             rwkv_embedding = Rwkv()
18 |             data_manager = manager_factory(
19 |                 "sqlite,faiss",
20 |                 vector_params={"dimension": rwkv_embedding.dimension},
21 |             )
22 |             cache.init(
23 |                 pre_embedding_func=context_process.pre_process,
24 |                 embedding_func=rwkv_embedding.to_embeddings,
25 |                 data_manager=data_manager,
26 |             )
27 |     """
28 | 
29 |     content: str = ""
30 | 
31 |     def __init__(
32 |             self
33 |     ):
34 |         self.content = ""
35 |         self.concat_content = ""
36 | 
37 |     def format_all_content(self, data: Dict[str, Any], **params: Dict[str, Any]):
38 |         for query in data["messages"]:
39 |             self.content += f"{query['role']}: {query['content']} \n"
40 |             self.concat_content += query["content"]
41 | 
42 |     def process_all_content(self) -> (Any, Any):
43 |         return self.content, self.concat_content
44 | 


--------------------------------------------------------------------------------
/gptcache/processor/context/context.py:
--------------------------------------------------------------------------------
 1 | from abc import ABCMeta, abstractmethod
 2 | from typing import Dict, Any
 3 | 
 4 | 
 5 | class ContextProcess(metaclass=ABCMeta):
 6 |     """ContextProcess: the context process interfacer, which is used to pre-process the lang conversation.
 7 |     By the way, the GPTCache will acquire more information and get a more accurate embedding vector.
 8 | 
 9 |     Example:
10 |         .. code-block:: python
11 | 
12 |             from gptcache.processor.context import SummarizationContextProcess
13 | 
14 |             context_process = SummarizationContextProcess()
15 |             cache.init(pre_embedding_func=context_process.pre_process)
16 |     """
17 | 
18 |     @abstractmethod
19 |     def format_all_content(self, data: Dict[str, Any], **params: Dict[str, Any]):
20 |         """format all content of the llm request data as a string
21 | 
22 |         :param data: the user llm request data
23 |         :type data: Dict[str, Any]
24 |         """
25 |         pass
26 | 
27 |     @abstractmethod
28 |     def process_all_content(self) -> (Any, Any):
29 |         """process all content of the llm request data, for extracting key information in context.
30 |         In order to achieve this goal, you can pass the summary model and so on
31 |         """
32 |         pass
33 | 
34 |     def pre_process(self, data: Dict[str, Any], **params: Dict[str, Any]) -> (Any, Any):
35 |         """ pre-process function, it's used as the GPTCache initialization param -- pre_embedding_func.
36 | 
37 |         :param data: the user llm request data
38 |         :type data: Dict[str, Any]
39 |         """
40 |         self.format_all_content(data, **params)
41 |         return self.process_all_content()
42 | 


--------------------------------------------------------------------------------
/gptcache/processor/context/selective_context.py:
--------------------------------------------------------------------------------
 1 | from typing import Any, Dict
 2 | 
 3 | from gptcache.processor import ContextProcess
 4 | from gptcache.utils import import_selective_context
 5 | 
 6 | import_selective_context()
 7 | 
 8 | from selective_context import SelectiveContext  # pylint: disable=C0413
 9 | 
10 | 
11 | class SelectiveContextProcess(ContextProcess):
12 |     """A context processor for selecting context
13 | 
14 |     Need to download the corresponding model before use, the default English model is: en_core_web_sm
15 | 
16 |     `pip install spacy && python -m spacy download en_core_web_sm`
17 | 
18 |     :param model_type: the selective context model name, default value is 'gpt2'
19 |     :type model_type: str
20 |     :param lang: the content lang type, default value is 'en'.
21 |     :type lang: str
22 |     :param reduce_ratio: selective context ratio. The range for the value is between 0 and 1, with a default value of 0.35.
23 |     :type reduce_ratio: float
24 |     :param reduce_level: selective context level. The valid values include 'sent', 'phrase', and 'token', with the default value being 'phrase'.
25 |     :type reduce_level: str
26 | 
27 |     more details: https://github.com/liyucheng09/Selective_Context
28 | 
29 |     Example:
30 |         .. code-block:: python
31 | 
32 |             from gptcache.processor.context.selective_context import SelectiveContextProcess
33 | 
34 |             context_process = SelectiveContextProcess()
35 |             cache.init(pre_embedding_func=context_process.pre_process)
36 |     """
37 | 
38 |     content: str = ""
39 | 
40 |     def __init__(
41 |             self,
42 |             model_type: str = "gpt2",
43 |             lang: str = "en",
44 |             reduce_ratio: float = 0.35,
45 |             reduce_level: str = "phrase",
46 |     ):
47 |         self.sc = SelectiveContext(model_type=model_type, lang=lang)
48 |         self.reduce_ratio = reduce_ratio
49 |         self.reduce_level = reduce_level
50 | 
51 |     def format_all_content(self, data: Dict[str, Any], **params: Dict[str, Any]):
52 |         for query in data["messages"]:
53 |             self.content += f"{query['role']}: {query['content']} \n"
54 | 
55 |     def process_all_content(self) -> (Any, Any):
56 |         selective_content, _ = self.sc(
57 |             self.content, reduce_ratio=self.reduce_ratio, reduce_level=self.reduce_level
58 |         )
59 |         return self.content, selective_content
60 | 


--------------------------------------------------------------------------------
/gptcache/similarity_evaluation/cohere_rerank.py:
--------------------------------------------------------------------------------
 1 | from typing import Tuple, Dict, Any
 2 | 
 3 | from gptcache.similarity_evaluation import SimilarityEvaluation
 4 | from gptcache.utils import import_cohere
 5 | 
 6 | import_cohere()
 7 | 
 8 | import cohere  # pylint: disable=C0413
 9 | 
10 | 
11 | class CohereRerank(SimilarityEvaluation):
12 |     """Use the Cohere Rerank API to evaluate relevance of question and answer.
13 | 
14 |     Reference: https://docs.cohere.com/reference/rerank-1
15 | 
16 |     :param model: model name, defaults to 'rerank-english-v2.0', and multilingual option: rerank-multilingual-v2.0.
17 |     :type model: str
18 |     :param api_key: cohere api key, defaults to None.
19 |     :type api_key: str
20 | 
21 |     Example:
22 |         .. code-block:: python
23 | 
24 |             from gptcache.similarity_evaluation import CohereRerankEvaluation
25 | 
26 |             evaluation = CohereRerankEvaluation()
27 |             score = evaluation.evaluation(
28 |                 {
29 |                     'question': 'What is the color of sky?'
30 |                 },
31 |                 {
32 |                     'answer': 'the color of sky is blue'
33 |                 }
34 |             )
35 |     """
36 | 
37 |     def __init__(self, model: str = "rerank-english-v2.0", api_key: str = None):
38 |         self.co = cohere.Client(api_key)
39 |         self.model = model
40 | 
41 |     def evaluation(self, src_dict: Dict[str, Any], cache_dict: Dict[str, Any], **kwargs) -> float:
42 |         response = self.co.rerank(
43 |             model=self.model,
44 |             query=src_dict["question"],
45 |             documents=cache_dict["answer"],
46 |             top_n=1,
47 |         )
48 |         if len(response.results) == 0:
49 |             return 0
50 |         return response.results[0].relevance_score
51 | 
52 |     def range(self) -> Tuple[float, float]:
53 |         return 0.0, 1.0
54 | 


--------------------------------------------------------------------------------
/gptcache/similarity_evaluation/distance.py:
--------------------------------------------------------------------------------
 1 | from typing import Tuple, Dict, Any
 2 | 
 3 | from gptcache.similarity_evaluation import SimilarityEvaluation
 4 | 
 5 | 
 6 | class SearchDistanceEvaluation(SimilarityEvaluation):
 7 |     """Using search distance to evaluate sentences pair similarity.
 8 | 
 9 |     This is the evaluator to compare two embeddings according to their distance computed in embedding retrieval stage.
10 |     In the retrieval stage, `search_result` is the distance used for approximate nearest neighbor search and have been
11 |     put into `cache_dict`. `max_distance` is used to bound this distance to make it between [0-`max_distance`]. `positive` is
12 |     used to indicate this distance is directly proportional to the similarity of two entites. If `positive` is set `False`,
13 |     `max_distance` will be used to substract this distance to get the final score.
14 | 
15 |     :param max_distance: the bound of maximum distance.
16 |     :type max_distance: float
17 |     :param positive: if the larger distance indicates more similar of two entities, It is True. Otherwise it is False.
18 |     :type positive: bool
19 | 
20 |     Example:
21 |         .. code-block:: python
22 | 
23 |             from gptcache.similarity_evaluation import SearchDistanceEvaluation
24 | 
25 |             evaluation = SearchDistanceEvaluation()
26 |             score = evaluation.evaluation(
27 |                 {},
28 |                 {
29 |                     "search_result": (1, None)
30 |                 }
31 |             )
32 |     """
33 | 
34 |     def __init__(self, max_distance=4.0, positive=False):
35 |         self.max_distance = max_distance
36 |         self.positive = positive
37 | 
38 |     def evaluation(
39 |         self, src_dict: Dict[str, Any], cache_dict: Dict[str, Any], **_
40 |     ) -> float:
41 |         """Evaluate the similarity score of pair.
42 |         :param src_dict: the query dictionary to evaluate with cache.
43 |         :type src_dict: Dict
44 |         :param cache_dict: the cache dictionary.
45 |         :type cache_dict: Dict
46 | 
47 |         :return: evaluation score.
48 |         """
49 |         distance, _ = cache_dict["search_result"]
50 |         if distance < 0:
51 |             distance = 0
52 |         elif distance > self.max_distance:
53 |             distance = self.max_distance
54 |         if self.positive:
55 |             return distance
56 |         return self.max_distance - distance
57 | 
58 |     def range(self) -> Tuple[float, float]:
59 |         """Range of similarity score.
60 | 
61 |         :return: minimum and maximum of similarity score.
62 |         """
63 |         return 0.0, self.max_distance
64 | 


--------------------------------------------------------------------------------
/gptcache/similarity_evaluation/exact_match.py:
--------------------------------------------------------------------------------
 1 | from typing import Tuple, Dict, Any
 2 | 
 3 | from gptcache.similarity_evaluation.similarity_evaluation import SimilarityEvaluation
 4 | 
 5 | 
 6 | class ExactMatchEvaluation(SimilarityEvaluation):
 7 |     """Using exact metric to evaluate sentences pair similarity.
 8 | 
 9 |     This evaluator is used to directly compare two `question` from text. If every single character in two questions can match, then this evaluator
10 |     will return 1 else 0.
11 | 
12 |     Example:
13 |         .. code-block:: python
14 | 
15 |             from gptcache.similarity_evaluation import ExactMatchEvaluation
16 | 
17 |             evaluation = ExactMatchEvaluation()
18 |             score = evaluation.evaluation(
19 |                 {
20 |                     "question": "What is the color of sky?"
21 |                 },
22 |                 {
23 |                     "question": "What is the color of sky?"
24 |                 }
25 |             )
26 |     """
27 | 
28 |     def __init__(self):
29 |         pass
30 | 
31 |     def evaluation(
32 |         self, src_dict: Dict[str, Any], cache_dict: Dict[str, Any], **_
33 |     ) -> float:
34 |         """Evaluate the similarity score of pair.
35 | 
36 |         :param src_dict: the query dictionary to evaluate with cache_dict.
37 |         :type src_dict: Dict
38 |         :param cache_dict: the cache dictionary.
39 |         :type cache_dict: Dict
40 | 
41 |         :return: evaluation score.
42 |         """
43 |         return 1 if cache_dict["question"] == src_dict["question"] else 0
44 | 
45 |     def range(self) -> Tuple[float, float]:
46 |         """Range of similarity score.
47 | 
48 |         :return: minimum and maximum of similarity score.
49 |         """
50 |         return 0, 1
51 | 


--------------------------------------------------------------------------------
/gptcache/similarity_evaluation/sbert_crossencoder.py:
--------------------------------------------------------------------------------
 1 | from typing import Dict, Tuple, Any
 2 | from gptcache.utils import import_sbert
 3 | from gptcache.similarity_evaluation import SimilarityEvaluation
 4 | import_sbert()
 5 | from sentence_transformers import CrossEncoder # pylint: disable=C0413
 6 | 
 7 | class SbertCrossencoderEvaluation(SimilarityEvaluation):
 8 |     """Using SBERT crossencoders to evaluate sentences pair similarity.
 9 | 
10 |     This evaluator use the crossencoder model to evaluate the similarity of two sentences.
11 | 
12 |     :param model: model name of SbertCrossencoderEvaluation. Default is 'cross-encoder/quora-distilroberta-base'.
13 |     Check more please refer to https://www.sbert.net/docs/pretrained_cross-encoders.html#quora-duplicate-questions.
14 |     :type model: str
15 | 
16 |     Example:
17 |         .. code-block:: python
18 | 
19 |             from gptcache.similarity_evaluation import SbertCrossencoderEvaluation
20 | 
21 |             evaluation = SbertCrossencoderEvaluation()
22 |             score = evaluation.evaluation(
23 |                 {
24 |                     'question': 'What is the color of sky?'
25 |                 },
26 |                 {
27 |                     'question': 'hello'
28 |                 }
29 |             )
30 |     """
31 |     def __init__(self, model: str="cross-encoder/quora-distilroberta-base"):
32 |         self.model = CrossEncoder(model)
33 | 
34 |     def evaluation(
35 |         self, src_dict: Dict[str, Any], cache_dict: Dict[str, Any], **_
36 |     ) -> float:
37 |         """Evaluate the similarity score of pair.
38 | 
39 |         :param src_dict: the query dictionary to evaluate with cache.
40 |         :type src_dict: Dict
41 |         :param cache_dict: the cache dictionary.
42 |         :type cache_dict: Dict
43 | 
44 |         :return: evaluation score.
45 |         """
46 |         try:
47 |             src_question = src_dict["question"]
48 |             cache_question = cache_dict["question"]
49 |             if src_question.lower() == cache_question.lower():
50 |                 return 1
51 |             return self.model.predict([(src_question, cache_question)])[0]
52 |         except Exception: # pylint: disable=W0703
53 |             return 0
54 | 
55 |     def range(self) -> Tuple[float, float]:
56 |         """Range of similarity score.
57 | 
58 |         :return: minimum and maximum of similarity score.
59 |         """
60 |         return 0.0, 1.0
61 | 


--------------------------------------------------------------------------------
/gptcache/similarity_evaluation/similarity_evaluation.py:
--------------------------------------------------------------------------------
 1 | from abc import ABCMeta, abstractmethod
 2 | from typing import Tuple, Dict, Any
 3 | 
 4 | 
 5 | class SimilarityEvaluation(metaclass=ABCMeta):
 6 |     """Similarity Evaluation interface,
 7 |     determine the similarity between the input request and the requests from the Vector Store.
 8 |     Based on this similarity, it determines whether a request matches the cache.
 9 | 
10 |     Example:
11 |         .. code-block:: python
12 | 
13 |             from gptcache import cache
14 |             from gptcache.similarity_evaluation import SearchDistanceEvaluation
15 | 
16 |             cache.init(
17 |                 similarity_evaluation=SearchDistanceEvaluation()
18 |             )
19 |     """
20 | 
21 |     @abstractmethod
22 |     def evaluation(
23 |         self, src_dict: Dict[str, Any], cache_dict: Dict[str, Any], **kwargs
24 |     ) -> float:
25 |         """Evaluate the similarity score of the user and cache requests pair.
26 | 
27 |         :param src_dict: the user request params.
28 |         :type src_dict: Dict
29 |         :param cache_dict: the cache request params.
30 |         :type cache_dict: Dict
31 |         """
32 |         pass
33 | 
34 |     @abstractmethod
35 |     def range(self) -> Tuple[float, float]:
36 |         """Range of similarity score.
37 | 
38 |         :return: the range of similarity score, which is the min and max values
39 |         :rtype: Tuple[float, float]
40 |         """
41 |         pass
42 | 


--------------------------------------------------------------------------------
/gptcache/similarity_evaluation/time.py:
--------------------------------------------------------------------------------
 1 | from datetime import datetime
 2 | from typing import Tuple, Dict, Any
 3 | 
 4 | from gptcache.adapter.api import _get_eval
 5 | from gptcache.similarity_evaluation import SimilarityEvaluation
 6 | 
 7 | 
 8 | class TimeEvaluation(SimilarityEvaluation):
 9 |     """Add time dimension restrictions on the basis of other Evaluation,
10 |     for example, only use the cache within 1 day from the current time,
11 |     and filter out the previous cache.
12 | 
13 |     :param evaluation: Similarity evaluation, like distance/onnx.
14 |     :param evaluation_config: Similarity evaluation config.
15 |     :param time_range: Time range, time unit: s
16 | 
17 |     Example:
18 |         .. code-block:: python
19 | 
20 |             import datetime
21 | 
22 |             from gptcache.manager.scalar_data.base import CacheData
23 |             from gptcache.similarity_evaluation import TimeEvaluation
24 | 
25 |             evaluation = TimeEvaluation(evaluation="distance", time_range=86400)
26 | 
27 |             similarity = eval.evaluation(
28 |                 {},
29 |                 {
30 |                     "search_result": (3.5, None),
31 |                     "cache_data": CacheData("a", "b", create_on=datetime.datetime.now()),
32 |                 },
33 |             )
34 |             # 0.5
35 | 
36 |     """
37 | 
38 |     def __init__(self, evaluation: str, evaluation_config=None, time_range: float = 86400.0):
39 |         if evaluation_config is None:
40 |             evaluation_config = {}
41 |         self._eval = _get_eval(evaluation, evaluation_config)
42 |         self._time_range = time_range
43 | 
44 |     def evaluation(self, src_dict: Dict[str, Any], cache_dict: Dict[str, Any], **kwargs) -> float:
45 |         cache_data = cache_dict.get("cache_data", None)
46 |         if not cache_data or not cache_data.create_on:
47 |             return self.range()[0]
48 |         delta_time = datetime.now().timestamp() - cache_data.create_on.timestamp()
49 |         if delta_time > self._time_range:
50 |             return self.range()[0]
51 |         return self._eval.evaluation(src_dict, cache_dict, **kwargs)
52 | 
53 |     def range(self) -> Tuple[float, float]:
54 |         return self._eval.range()
55 | 
56 | 


--------------------------------------------------------------------------------
/gptcache/utils/cache_func.py:
--------------------------------------------------------------------------------
1 | def cache_all(*_, **__):
2 |     return True
3 | 


--------------------------------------------------------------------------------
/gptcache/utils/dependency_control.py:
--------------------------------------------------------------------------------
 1 | import subprocess
 2 | 
 3 | from gptcache.utils.error import PipInstallError
 4 | from gptcache.utils.log import gptcache_log
 5 | 
 6 | 
 7 | def prompt_install(package: str, warn: bool = False):  # pragma: no cover
 8 |     """
 9 |     Function used to prompt user to install a package.
10 |     """
11 |     cmd = f"pip install -q {package}"
12 |     try:
13 |         if warn and input(f"Install {package}? Y/n: ") != "Y":
14 |             raise ModuleNotFoundError(f"No module named {package}")
15 |         print(f"start to install package: {package}")
16 |         subprocess.check_call(cmd, shell=True)
17 |         print(f"successfully installed package: {package}")
18 |         gptcache_log.info("%s installed successfully!", package)
19 |     except subprocess.CalledProcessError as e:
20 |         raise PipInstallError(package) from e
21 | 


--------------------------------------------------------------------------------
/gptcache/utils/error.py:
--------------------------------------------------------------------------------
 1 | class CacheError(Exception):
 2 |     """GPTCache base error"""
 3 | 
 4 | 
 5 | class NotInitError(CacheError):
 6 |     """Raise when the cache has been used before it's inited"""
 7 |     def __init__(self):
 8 |         super().__init__("The cache should be inited before using")
 9 | 
10 | 
11 | class NotFoundError(CacheError):
12 |     """Raise when getting an unsupported store."""
13 |     def __init__(self, store_type, current_type_name):
14 |         super().__init__(f"Unsupported ${store_type}: {current_type_name}")
15 | 
16 | 
17 | class ParamError(CacheError):
18 |     """Raise when receiving an invalid param."""
19 | 
20 | 
21 | class PipInstallError(CacheError):
22 |     """Raise when failed to install package."""
23 |     def __init__(self, package):
24 |         super().__init__(f"Ran into error installing {package}.")
25 | 
26 | 
27 | def wrap_error(e: Exception) -> Exception:
28 |     """Add a type to exception `e` while ensuring that the original type is not changed
29 | 
30 |     Example:
31 |         .. code-block:: python
32 | 
33 |             import openai
34 | 
35 |             from gptcache.utils.error import wrap_error
36 | 
37 | 
38 |             def raise_error():
39 |                 try:
40 |                     raise openai.error.OpenAIError(message="test")
41 |                 except openai.error.OpenAIError as e:
42 |                     raise wrap_error(e)
43 | 
44 | 
45 |             try:
46 |                 raise_error()
47 |             except openai.error.OpenAIError as e:
48 |                 print("exception:")
49 |                 print(e)
50 | 
51 |             print("over")
52 |     """
53 |     e.__class__ = type(e.__class__.__name__, (CacheError, e.__class__), {})
54 |     return e
55 | 


--------------------------------------------------------------------------------
/gptcache/utils/lazy_import.py:
--------------------------------------------------------------------------------
 1 | import importlib
 2 | from types import ModuleType
 3 | 
 4 | 
 5 | class LazyImport(ModuleType):
 6 |     """
 7 |     Lazily import a module.
 8 |     """
 9 | 
10 |     def __init__(self, local_name, parent_module_globals, name):
11 |         self._local_name = local_name
12 |         self._parent_module_globals = parent_module_globals
13 |         super().__init__(name)
14 | 
15 |     def _load(self):
16 |         module = importlib.import_module(self.__name__)
17 |         self._parent_module_globals[self._local_name] = module
18 |         self.__dict__.update(module.__dict__)
19 |         return module
20 | 
21 |     def __getattr__(self, item):
22 |         module = self._load()
23 |         return getattr(module, item)
24 | 
25 |     def __dir__(self):
26 |         module = self._load()
27 |         return dir(module)
28 | 


--------------------------------------------------------------------------------
/gptcache/utils/log.py:
--------------------------------------------------------------------------------
1 | import logging
2 | 
3 | import gptcache
4 | 
5 | FORMAT = '%(asctime)s - %(thread)d - %(filename)s-%(module)s:%(lineno)s - %(levelname)s: %(message)s'
6 | logging.basicConfig(format=FORMAT)
7 | 
8 | gptcache_log = logging.getLogger(f'gptcache:{gptcache.__version__}')
9 | 


--------------------------------------------------------------------------------
/gptcache/utils/response.py:
--------------------------------------------------------------------------------
 1 | import base64
 2 | import requests
 3 | 
 4 | 
 5 | def get_message_from_openai_answer(openai_resp):
 6 |     return openai_resp["choices"][0]["message"]["content"]
 7 | 
 8 | 
 9 | def get_stream_message_from_openai_answer(openai_data):
10 |     return openai_data["choices"][0]["delta"].get("content", "")
11 | 
12 | 
13 | def get_text_from_openai_answer(openai_resp):
14 |     return openai_resp["choices"][0]["text"]
15 | 
16 | 
17 | def get_image_from_openai_b64(openai_resp):
18 |     return openai_resp["data"][0]["b64_json"]
19 | 
20 | 
21 | def get_image_from_openai_url(openai_resp):
22 |     url = openai_resp["data"][0]["url"]
23 |     img_content = requests.get(url).content
24 |     img_data = base64.b64encode(img_content)
25 |     return img_data
26 | 
27 | 
28 | def get_image_from_path(openai_resp):
29 |     img_path = openai_resp["data"][0]["url"]
30 |     with open(img_path, "rb") as f:
31 |         img_data = base64.b64encode(f.read())
32 |     return img_data
33 | 
34 | 
35 | def get_audio_text_from_openai_answer(openai_resp):
36 |     return openai_resp["text"]
37 | 


--------------------------------------------------------------------------------
/gptcache/utils/softmax.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | 
 4 | def softmax(x: list):
 5 |     x = np.array(x)
 6 |     assert len(x.shape) == 1, f"Expect to get a shape of (len,) but got {x.shape}, x value: {x}."
 7 |     max_val = x.max()
 8 |     e_x = np.exp(x - max_val)
 9 |     return e_x / e_x.sum()
10 | 


--------------------------------------------------------------------------------
/gptcache/utils/time.py:
--------------------------------------------------------------------------------
 1 | import time
 2 | 
 3 | from gptcache import cache
 4 | 
 5 | 
 6 | def time_cal(func, func_name=None, report_func=None):
 7 |     def inner(*args, **kwargs):
 8 |         time_start = time.time()
 9 |         res = func(*args, **kwargs)
10 |         delta_time = time.time() - time_start
11 |         if cache.config.log_time_func:
12 |             cache.config.log_time_func(
13 |                 func.__name__ if func_name is None else func_name, delta_time
14 |             )
15 |         if report_func is not None:
16 |             report_func(delta_time)
17 |         return res
18 | 
19 |     return inner
20 | 


--------------------------------------------------------------------------------
/gptcache/utils/token.py:
--------------------------------------------------------------------------------
 1 | from gptcache.utils import import_tiktoken
 2 | 
 3 | _encoding = None
 4 | 
 5 | 
 6 | def _get_encoding():
 7 |     global _encoding
 8 |     if _encoding is None:
 9 |         import_tiktoken()
10 |         import tiktoken  # pylint: disable=C0415
11 |         _encoding = tiktoken.get_encoding("cl100k_base")
12 |     return _encoding
13 | 
14 | 
15 | def token_counter(text):
16 |     """Token Counter"""
17 |     num_tokens = len(_get_encoding().encode(text))
18 |     return num_tokens
19 | 


--------------------------------------------------------------------------------
/gptcache_server/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zilliztech/GPTCache/48f8e768d7dcd7f66d948ad07914a630a382b45b/gptcache_server/__init__.py


--------------------------------------------------------------------------------
/gptcache_server/dockerfiles/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM python:3.8-slim-bullseye
 2 | 
 3 | ENV PYTHONDONTWRITEBYTECODE=1
 4 | 
 5 | RUN pip3 install --upgrade pip
 6 | 
 7 | RUN pip3 install --no-cache-dir gptcache
 8 | 
 9 | WORKDIR /workspace
10 | 
11 | CMD ["gptcache_server", "-s", "0.0.0.0"]
12 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | numpy
2 | cachetools
3 | requests


--------------------------------------------------------------------------------
/scripts/manage_conda_env.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | 
 4 | DEFAULT_ENV_NAME="gpt-cache"
 5 | 
 6 | # Usage: ./manage_conda_env.sh create [env_name]
 7 | # Usage: ./manage_conda_env.sh remove [env_name]
 8 | 
 9 | if [[ "$1" == "create" ]]; then
10 |     if [[ -n "$2" ]]; then
11 |         env_name="$2"
12 |     else
13 |         env_name="$DEFAULT_ENV_NAME"
14 |     fi
15 |     if conda env list | grep -q "^$env_name "; then
16 |         echo "conda environment '$env_name' already exists."
17 |     else
18 |         conda create --name "$env_name" python=3.8
19 |         echo "conda environment '$env_name' created."
20 |     fi
21 |     conda activate "$env_name"
22 |     echo "conda environment '$env_name' activated."
23 | elif [[ "$1" == "remove" ]]; then
24 |     conda deactivate
25 |     if [[ -n "$2" ]]; then
26 |         env_name="$2"
27 |     else
28 |         env_name="$DEFAULT_ENV_NAME"
29 |     fi
30 |     conda remove --name "$env_name" --all
31 |     echo "conda environment '$env_name' removed."
32 | else
33 |     echo "Usage: ./manage_conda_env.sh [create|remove] [env_name]"
34 |     exit 1
35 | fi


--------------------------------------------------------------------------------
/scripts/remove_example_cache.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | 
3 | parent_dir="$(cd "$(dirname "${BASH_SOURCE[0]}")"/.. && pwd)"
4 | find "$parent_dir/examples" \( -path "$parent_dir/examples/benchmark" -path "$parent_dir/examples/sqlite_milvus_mock" \) -prune -o \( -type f \( -name 'data_map*.txt' -or -name 'faiss.index' -or -name '*.db' \) -delete \)


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | import codecs
 2 | import os
 3 | import re
 4 | from typing import List
 5 | 
 6 | import setuptools
 7 | from setuptools import find_packages
 8 | 
 9 | here = os.path.abspath(os.path.dirname(__file__))
10 | 
11 | 
12 | with open("README.md", "r") as fh:
13 |     long_description = fh.read()
14 | 
15 | 
16 | def parse_requirements(file_name: str) -> List[str]:
17 |     with open(file_name) as f:
18 |         return [
19 |             require.strip() for require in f
20 |             if require.strip() and not require.startswith('#')
21 |         ]
22 | 
23 | 
24 | def read(*parts):
25 |     with codecs.open(os.path.join(here, *parts), "r") as fp:
26 |         return fp.read()
27 | 
28 | 
29 | def find_version(*file_paths):
30 |     version_file = read(*file_paths)
31 |     version_match = re.search(r"^__version__ = ['\"]([^'\"]*)['\"]", version_file, re.M)
32 |     if version_match:
33 |         return version_match.group(1)
34 |     raise RuntimeError("Unable to find version string.")
35 | 
36 | 
37 | setuptools.setup(
38 |     name="gptcache",
39 |     packages=find_packages(),
40 |     version=find_version("gptcache", "__init__.py"),
41 |     author="SimFG",
42 |     author_email="bang.fu@zilliz.com",
43 |     description="GPTCache, a powerful caching library that can be used to speed up and lower the cost of chat "
44 |                 "applications that rely on the LLM service. GPTCache works as a memcache for AIGC applications, "
45 |                 "similar to how Redis works for traditional applications.",
46 |     long_description=long_description,
47 |     long_description_content_type="text/markdown",
48 |     install_requires=parse_requirements('requirements.txt'),
49 |     url="https://github.com/zilliztech/GPTCache",
50 |     license='https://opensource.org/license/mit/',
51 |     python_requires='>=3.8.1',
52 |     entry_points={
53 |         'console_scripts': [
54 |             'gptcache_server=gptcache_server.server:main',
55 |         ],
56 |     },
57 | )
58 | 


--------------------------------------------------------------------------------
/tests/integration_tests/base/client_base.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import pytest
 3 | from utils.util_log import test_log as log
 4 | from common import common_type as ct
 5 | from common import common_func as cf
 6 | 
 7 | 
 8 | class Base:
 9 |     def setup_method(self, method):
10 |         log.info(("*" * 35) + " setup " + ("*" * 35))
11 |         log.info("[setup_method] Start setup test case %s." % method.__name__)
12 |         log.info("[setup_method] Clean up tmp files.")
13 |         cf.remove_file()
14 | 
15 |     def teardown_method(self, method):
16 |         log.info(("*" * 35) + " teardown " + ("*" * 35))
17 |         log.info("[teardown_method] Start teardown test case %s..." % method.__name__)
18 |         log.info("[teardown_method] Clean up tmp files.")
19 |         cf.remove_file()
20 | 


--------------------------------------------------------------------------------
/tests/integration_tests/common/common_func.py:
--------------------------------------------------------------------------------
 1 | """" Methods of processing data """
 2 | import os
 3 | from common import common_type as ct
 4 | from utils.util_log import test_log as log
 5 | 
 6 | 
 7 | def remove_file(file_names=[ct.sqlite_file, ct.faiss_file]):
 8 |     """
 9 |     delete files
10 |     :param file_names: file name list
11 |     :return: None
12 |     """
13 |     for file in file_names:
14 |         if os.path.isfile(file):
15 |             os.remove(file)
16 |             log.info("%s is removed" % file)
17 | 
18 | 
19 | def log_time_func(func_name, delta_time):
20 |     """
21 |     print function time
22 |     :param func_name: function name
23 |     :param delta_time: consumed time
24 |     :return: None
25 |     """
26 |     log.info("func `{}` consume time: {:.2f}s".format(func_name, delta_time))
27 | 
28 | 
29 | def disable_cache(*args, **kwargs):
30 |     """
31 |     disable cache
32 |     """
33 |     return False
34 | 


--------------------------------------------------------------------------------
/tests/integration_tests/common/common_type.py:
--------------------------------------------------------------------------------
1 | """ Initialized parameters """
2 | sqlite_file = "sqlite.db"
3 | faiss_file = "faiss.index"
4 | 


--------------------------------------------------------------------------------
/tests/integration_tests/config/log_config.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import datetime
 3 | 
 4 | 
 5 | class LogConfig:
 6 |     def __init__(self):
 7 |         self.log_debug = ""
 8 |         self.log_err = ""
 9 |         self.log_info = ""
10 |         self.log_worker = ""
11 |         self.get_default_config()
12 | 
13 |     @staticmethod
14 |     def get_env_variable(var="CI_LOG_PATH"):
15 |         """get log path for testing"""
16 |         try:
17 |             log_path = os.environ[var]
18 |             return str(log_path)
19 |         except Exception as e:
20 |             # now = datetime.datetime.now().strftime('%Y-%m-%d-%H-%M-%S')
21 |             log_path = f"/tmp/ci_logs"
22 |             print(
23 |                 "[get_env_variable] failed to get environment variables : %s, use default path : %s"
24 |                 % (str(e), log_path)
25 |             )
26 |             return log_path
27 | 
28 |     @staticmethod
29 |     def create_path(log_path):
30 |         if not os.path.isdir(str(log_path)):
31 |             print("[create_path] folder(%s) is not exist." % log_path)
32 |             print("[create_path] create path now...")
33 |             os.makedirs(log_path)
34 | 
35 |     def get_default_config(self):
36 |         """Make sure the path exists"""
37 |         log_dir = self.get_env_variable()
38 |         self.log_debug = "%s/ci_test_log.debug" % log_dir
39 |         self.log_info = "%s/ci_test_log.log" % log_dir
40 |         self.log_err = "%s/ci_test_log.err" % log_dir
41 |         work_log = os.environ.get("PYTEST_XDIST_WORKER")
42 |         if work_log is not None:
43 |             self.log_worker = f"{log_dir}/{work_log}.log"
44 | 
45 |         self.create_path(log_dir)
46 | 
47 | 
48 | log_config = LogConfig()
49 | 


--------------------------------------------------------------------------------
/tests/integration_tests/examples/map/test_example_map.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | 
 3 | from gptcache.utils.response import get_message_from_openai_answer
 4 | from gptcache.manager.factory import get_data_manager
 5 | from gptcache.adapter import openai
 6 | from gptcache import cache, Cache
 7 | 
 8 | 
 9 | def test_map():
10 |     dir_name, _ = os.path.split(os.path.abspath(__file__))
11 |     bak_cache = Cache()
12 |     bak_data_file = dir_name + "/data_map_bak.txt"
13 |     bak_cache.init(data_manager=get_data_manager(data_path=bak_data_file, max_size=10))
14 |     data_file = dir_name + "/data_map.txt"
15 |     cache.init(
16 |         data_manager=get_data_manager(data_path=data_file, max_size=10),
17 |         next_cache=bak_cache,
18 |     )
19 | 
20 |     cache.set_openai_key()
21 |     mock_messages = [
22 |         {"role": "system", "content": "You are a helpful assistant."},
23 |         {"role": "user", "content": "foo15"},
24 |     ]
25 | 
26 |     if not os.path.isfile(bak_data_file):
27 |         cache.import_data(
28 |             [f"foo{i}" for i in range(10)], [f"receiver the foo {i}" for i in range(10)]
29 |         )
30 |     if not os.path.isfile(data_file):
31 |         bak_cache.import_data(
32 |             [f"foo{i}" for i in range(10, 20)],
33 |             [f"receiver the foo {i}" for i in range(10, 20)],
34 |         )
35 | 
36 |     expect_answer = "receiver the foo 15"
37 |     answer = openai.ChatCompletion.create(
38 |         model="gpt-3.5-turbo",
39 |         messages=mock_messages,
40 |     )
41 |     assert get_message_from_openai_answer(answer) == expect_answer
42 | 
43 |     cache.flush()
44 | 
45 |     bak_cache2 = Cache()
46 |     bak_cache2.init(data_manager=get_data_manager(data_path=bak_data_file, max_size=10))
47 |     cache.init(
48 |         data_manager=get_data_manager(data_path=data_file, max_size=10),
49 |         next_cache=bak_cache2,
50 |     )
51 |     answer = openai.ChatCompletion.create(
52 |         model="gpt-3.5-turbo",
53 |         messages=mock_messages,
54 |     )
55 |     assert get_message_from_openai_answer(answer) == expect_answer
56 | 


--------------------------------------------------------------------------------
/tests/integration_tests/examples/sqlite_faiss_mock/test_example_sqlite_faiss.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | 
 3 | from gptcache.utils.response import get_message_from_openai_answer
 4 | from gptcache.adapter import openai
 5 | from gptcache import cache, Config
 6 | from gptcache.manager import get_data_manager, VectorBase
 7 | from gptcache.similarity_evaluation.distance import SearchDistanceEvaluation
 8 | import numpy as np
 9 | 
10 | 
11 | d = 8
12 | 
13 | 
14 | def mock_embeddings(data, **kwargs):  # pylint: disable=W0613
15 |     return np.random.random((d,)).astype("float32")
16 | 
17 | 
18 | def test_sqlite_faiss():
19 |     sqlite_file = "sqlite.db"
20 |     faiss_file = "faiss.index"
21 | 
22 |     if os.path.isfile(sqlite_file):
23 |         os.remove(sqlite_file)
24 |     if os.path.isfile(faiss_file):
25 |         os.remove(faiss_file)
26 | 
27 |     vector_base = VectorBase("faiss", dimension=d, top_k=3)
28 |     data_manager = get_data_manager("sqlite", vector_base, max_size=8, clean_size=2)
29 |     cache.init(
30 |         embedding_func=mock_embeddings,
31 |         data_manager=data_manager,
32 |         similarity_evaluation=SearchDistanceEvaluation(),
33 |         config=Config(
34 |             similarity_threshold=0,
35 |         ),
36 |     )
37 | 
38 |     mock_messages = [
39 |         {"role": "system", "content": "You are a helpful assistant."},
40 |         {"role": "user", "content": "foo"},
41 |     ]
42 |     cache.import_data(
43 |         [f"foo{i}" for i in range(10)], [f"receiver the foo {i}" for i in range(10)]
44 |     )
45 | 
46 |     answer = openai.ChatCompletion.create(
47 |         model="gpt-3.5-turbo",
48 |         messages=mock_messages,
49 |     )
50 |     assert get_message_from_openai_answer(answer)
51 | 
52 |     cache.flush()
53 |     vector_base = VectorBase("faiss", dimension=d, top_k=3)
54 |     data_manager = get_data_manager("sqlite", vector_base, max_size=8, clean_size=2)
55 |     cache.init(
56 |         embedding_func=mock_embeddings,
57 |         data_manager=data_manager,
58 |         similarity_evaluation=SearchDistanceEvaluation(),
59 |         config=Config(
60 |             similarity_threshold=0,
61 |         ),
62 |     )
63 |     answer = openai.ChatCompletion.create(
64 |         model="gpt-3.5-turbo",
65 |         messages=mock_messages,
66 |     )
67 |     assert get_message_from_openai_answer(answer)
68 | 


--------------------------------------------------------------------------------
/tests/integration_tests/processor/pre/test_pre_without_prompt.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | 
 3 | from gptcache import Cache, Config
 4 | from gptcache.adapter import openai
 5 | from gptcache.manager import get_data_manager
 6 | from gptcache.processor.pre import last_content_without_prompt
 7 | from gptcache.utils.response import get_message_from_openai_answer
 8 | 
 9 | 
10 | def test_pre_without_prompt():
11 |     cache_obj = Cache()
12 |     data_file = "data_map_prompt.txt"
13 |     cache_obj.init(
14 |         pre_embedding_func=last_content_without_prompt,
15 |         data_manager=get_data_manager(data_path=data_file),
16 |         config=Config(prompts=["foo"]),
17 |     )
18 | 
19 |     if not os.path.isfile(data_file):
20 |         cache_obj.import_data(
21 |             [f"{i}" for i in range(10)],
22 |             [f"receiver the foo {i}" for i in range(10)],
23 |         )
24 | 
25 |     answer = openai.ChatCompletion.create(
26 |         model="gpt-3.5-turbo",
27 |         messages=[
28 |             {"role": "system", "content": "You are a helpful assistant."},
29 |             {"role": "user", "content": "foo5"},
30 |         ],
31 |         cache_obj=cache_obj,
32 |     )
33 |     assert get_message_from_openai_answer(answer) == "receiver the foo 5"
34 | 


--------------------------------------------------------------------------------
/tests/integration_tests/utils/util_log.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | import sys
 3 | 
 4 | from config.log_config import log_config
 5 | 
 6 | 
 7 | class TestLog:
 8 |     def __init__(self, logger, log_debug, log_file, log_err, log_worker):
 9 |         self.logger = logger
10 |         self.log_debug = log_debug
11 |         self.log_file = log_file
12 |         self.log_err = log_err
13 |         self.log_worker = log_worker
14 | 
15 |         self.log = logging.getLogger(self.logger)
16 |         self.log.setLevel(logging.DEBUG)
17 | 
18 |         try:
19 |             formatter = logging.Formatter(
20 |                 "[%(asctime)s - %(levelname)s - %(name)s]: "
21 |                 "%(message)s (%(filename)s:%(lineno)s)"
22 |             )
23 |             dh = logging.FileHandler(self.log_debug)
24 |             dh.setLevel(logging.DEBUG)
25 |             dh.setFormatter(formatter)
26 |             self.log.addHandler(dh)
27 | 
28 |             fh = logging.FileHandler(self.log_file)
29 |             fh.setLevel(logging.INFO)
30 |             fh.setFormatter(formatter)
31 |             self.log.addHandler(fh)
32 | 
33 |             eh = logging.FileHandler(self.log_err)
34 |             eh.setLevel(logging.ERROR)
35 |             eh.setFormatter(formatter)
36 |             self.log.addHandler(eh)
37 | 
38 |             if self.log_worker != "":
39 |                 wh = logging.FileHandler(self.log_worker)
40 |                 wh.setLevel(logging.DEBUG)
41 |                 wh.setFormatter(formatter)
42 |                 self.log.addHandler(wh)
43 | 
44 |             ch = logging.StreamHandler(sys.stdout)
45 |             ch.setLevel(logging.DEBUG)
46 |             ch.setFormatter(formatter)
47 | 
48 |         except Exception as e:
49 |             print(
50 |                 "Can not use %s or %s or %s to log. error : %s"
51 |                 % (log_debug, log_file, log_err, str(e))
52 |             )
53 | 
54 | 
55 | """All modules share this unified log"""
56 | log_debug = log_config.log_debug
57 | log_info = log_config.log_info
58 | log_err = log_config.log_err
59 | log_worker = log_config.log_worker
60 | test_log = TestLog("ci_test", log_debug, log_info, log_err, log_worker).log
61 | 


--------------------------------------------------------------------------------
/tests/pytest.ini:
--------------------------------------------------------------------------------
 1 | [pytest]
 2 | 
 3 | addopts = --html=/tmp/ci_logs/report.html --self-contained-html -v -s
 4 | #  python3 -W ignore -m pytest
 5 | 
 6 | log_format = [%(asctime)s - %(levelname)s - %(name)s]: %(message)s (%(filename)s:%(lineno)s)
 7 | log_date_format = %Y-%m-%d %H:%M:%S
 8 | 
 9 | 
10 | filterwarnings =
11 |     ignore::DeprecationWarning
12 | 


--------------------------------------------------------------------------------
/tests/requirements.txt:
--------------------------------------------------------------------------------
 1 | --extra-index-url https://test.pypi.org/simple/
 2 | loguru==0.5.3
 3 | pytest-cov==4.1.0
 4 | pytest==7.2.0
 5 | coverage==7.2.3
 6 | pytest-assume==2.4.3
 7 | pytest-timeout==1.3.3
 8 | pytest-repeat==0.8.0
 9 | pytest-level==0.1.1
10 | pytest-xdist==2.5.0
11 | pytest-loguru==0.2.0
12 | pytest-rerunfailures==9.1.1
13 | git+https://github.com/Projectplace/pytest-tags
14 | pytest-html==3.1.1
15 | pytest-sugar==0.9.5
16 | pytest-parallel
17 | psycopg2-binary
18 | transformers==4.29.2
19 | anyio==3.6.2
20 | torch
21 | mock
22 | pexpect
23 | spacy
24 | safetensors
25 | typing_extensions<4.6.0
26 | stability-sdk
27 | grpcio==1.53.0
28 | protobuf==3.20.0
29 | milvus==2.2.8
30 | pymilvus==2.2.8
31 | 


--------------------------------------------------------------------------------
/tests/unit_tests/embedding/test_cohere.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import types
 3 | from unittest.mock import patch
 4 | from gptcache.utils import import_cohere
 5 | from gptcache.embedding import Cohere
 6 | from gptcache.adapter.api import _get_model
 7 | 
 8 | import_cohere()
 9 | 
10 | 
11 | def test_embedding():
12 |     os.environ["CO_API_KEY"] = "API"
13 | 
14 |     with patch("cohere.Client.embed") as mock_create:
15 |         dimension = 4096
16 |         mock_create.return_value = types.SimpleNamespace(embeddings=[[0] * dimension])
17 |         c1 = Cohere()
18 |         assert c1.dimension == dimension
19 |         assert len(c1.to_embeddings("foo")) == dimension
20 | 
21 |     with patch("cohere.Client.embed") as mock_create:
22 |         dimension = 512
23 |         mock_create.return_value = types.SimpleNamespace(embeddings=[[0] * dimension])
24 |         c1 = Cohere("foo")
25 |         assert c1.dimension == dimension
26 |         assert len(c1.to_embeddings("foo")) == dimension
27 | 
28 |     with patch("cohere.Client.embed") as mock_create:
29 |         dimension = 4096
30 |         mock_create.return_value = types.SimpleNamespace(embeddings=[[0] * dimension])
31 |         c1 = _get_model("cohere")
32 |         assert c1.dimension == dimension
33 |         assert len(c1.to_embeddings("foo")) == dimension
34 | 


--------------------------------------------------------------------------------
/tests/unit_tests/embedding/test_data2vec.py:
--------------------------------------------------------------------------------
 1 | from io import BytesIO
 2 | 
 3 | import requests
 4 | 
 5 | from gptcache.adapter.api import _get_model
 6 | from gptcache.embedding import Data2VecAudio
 7 | 
 8 | 
 9 | def test_data2vec_audio():
10 |     url = "https://github.com/towhee-io/examples/releases/download/data/ah_yes.wav"
11 |     req = requests.get(url)
12 |     audio = BytesIO(req.content) 
13 |     t = Data2VecAudio(model="facebook/data2vec-audio-base-960h")
14 |     data = t.to_embeddings(audio)
15 |     assert len(data) == t.dimension, f"{len(data)}, {t.dimension}"
16 | 
17 |     req = requests.get(url)
18 |     audio = BytesIO(req.content) 
19 |     t = _get_model("data2vecaudio")
20 |     data = t.to_embeddings(audio)
21 |     assert len(data) == t.dimension, f"{len(data)}, {t.dimension}"
22 | 
23 | 
24 | if __name__ == "__main__":
25 |     test_data2vec_audio()
26 | 


--------------------------------------------------------------------------------
/tests/unit_tests/embedding/test_embedding_openai.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | from unittest.mock import patch
 3 | 
 4 | from gptcache.embedding import OpenAI
 5 | from gptcache.adapter.api import _get_model
 6 | 
 7 | 
 8 | def test_embedding():
 9 |     os.environ["OPENAI_API_KEY"] = "API"
10 | 
11 |     def get_return_value(d):
12 |         return {
13 |           "object": "list",
14 |           "data": [
15 |             {
16 |               "object": "embedding",
17 |               "embedding": [0] * d,
18 |               "index": 0
19 |             }
20 |           ],
21 |           "model": "text-embedding-ada-002",
22 |           "usage": {
23 |             "prompt_tokens": 8,
24 |             "total_tokens": 8
25 |           }
26 |         }
27 | 
28 |     with patch("openai.Embedding.create") as mock_create:
29 |         dimension = 1536
30 |         mock_create.return_value = get_return_value(dimension)
31 |         oa = OpenAI()
32 |         assert oa.dimension == dimension
33 |         assert len(oa.to_embeddings("foo")) == dimension
34 | 
35 |     with patch("openai.Embedding.create") as mock_create:
36 |         dimension = 1536
37 |         mock_create.return_value = get_return_value(dimension)
38 |         oa = OpenAI(api_key="openai")
39 |         assert oa.dimension == dimension
40 |         assert len(oa.to_embeddings("foo")) == dimension
41 | 
42 |     with patch("openai.Embedding.create") as mock_create:
43 |         dimension = 512
44 |         mock_create.return_value = get_return_value(dimension)
45 |         oa = OpenAI(model="test_embedding")
46 |         assert oa.dimension == dimension
47 |         assert len(oa.to_embeddings("foo")) == dimension
48 | 
49 |     with patch("openai.Embedding.create") as mock_create:
50 |         dimension = 1536
51 |         mock_create.return_value = get_return_value(dimension)
52 |         oa = _get_model("openai")
53 |         assert oa.dimension == dimension
54 |         assert len(oa.to_embeddings("foo")) == dimension
55 | 
56 | 
57 | 


--------------------------------------------------------------------------------
/tests/unit_tests/embedding/test_embedding_string.py:
--------------------------------------------------------------------------------
1 | from gptcache.embedding.string import to_embeddings
2 | 
3 | 
4 | def test_embedding():
5 |     message = to_embeddings("foo")
6 |     assert message == "foo"
7 | 


--------------------------------------------------------------------------------
/tests/unit_tests/embedding/test_fasttext.py:
--------------------------------------------------------------------------------
 1 | # from unittest.mock import patch
 2 | 
 3 | # from gptcache.embedding import FastText
 4 | 
 5 | # from gptcache.utils import import_fasttext
 6 | # from gptcache.adapter.api import _get_model
 7 | 
 8 | # import_fasttext()
 9 | 
10 | # import fasttext
11 | 
12 | 
13 | # def test_embedding():
14 | #     with patch("fasttext.util.download_model") as download_model_mock:
15 | #         download_model_mock.return_value = "fastttext.bin"
16 | #         with patch("fasttext.load_model") as load_model_mock:
17 | #             load_model_mock.return_value = fasttext.FastText._FastText()
18 | #             with patch("fasttext.util.reduce_model") as reduce_model_mock:
19 | #                 reduce_model_mock.return_value = None
20 | #                 with patch("fasttext.FastText._FastText.get_dimension") as dimension_mock:
21 | #                     dimension_mock.return_value = 128
22 | #                     with patch("fasttext.FastText._FastText.get_sentence_vector") as vector_mock:
23 | #                         vector_mock.return_value = [0] * 128
24 | 
25 | #                         ft = FastText(dim=128)
26 | #                         assert len(ft.to_embeddings("foo")) == 128
27 | #                         assert ft.dimension == 128
28 | 
29 | #                         ft1 = _get_model("fasttext", model_config={"dim": 128})
30 | #                         assert len(ft1.to_embeddings("foo")) == 128
31 | #                         assert ft1.dimension == 128
32 | 


--------------------------------------------------------------------------------
/tests/unit_tests/embedding/test_huggingface.py:
--------------------------------------------------------------------------------
 1 | from gptcache.embedding import Huggingface
 2 | from gptcache.adapter.api import _get_model
 3 | 
 4 | 
 5 | def test_huggingface():
 6 |     t = Huggingface("distilbert-base-uncased")
 7 |     data = t.to_embeddings("foo")
 8 |     assert len(data) == t.dimension, f"{len(data)}, {t.dimension}"
 9 | 
10 |     t = _get_model(model_src="huggingface", model_config={"model": "distilbert-base-uncased"})
11 |     data = t.to_embeddings("foo")
12 |     assert len(data) == t.dimension, f"{len(data)}, {t.dimension}"
13 | 


--------------------------------------------------------------------------------
/tests/unit_tests/embedding/test_langchain.py:
--------------------------------------------------------------------------------
 1 | from gptcache.embedding import LangChain
 2 | from gptcache.utils import import_langchain, prompt_install
 3 | 
 4 | import_langchain()
 5 | from langchain.embeddings import FakeEmbeddings
 6 | 
 7 | 
 8 | def test_langchain_embedding():
 9 |     size = 10
10 |     l = LangChain(embeddings=FakeEmbeddings(size=size))
11 |     data = l.to_embeddings("foo")
12 |     assert len(data) == size
13 | 
14 |     l = LangChain(embeddings=FakeEmbeddings(size=size), dimension=size)
15 |     data = l.to_embeddings("foo")
16 |     assert len(data) == size
17 | 


--------------------------------------------------------------------------------
/tests/unit_tests/embedding/test_onnx.py:
--------------------------------------------------------------------------------
 1 | from gptcache.embedding import Onnx
 2 | from gptcache.adapter.api import _get_model
 3 | 
 4 | 
 5 | def test_onnx():
 6 |     t = Onnx()
 7 |     data = t.to_embeddings("foo")
 8 |     assert len(data) == t.dimension, f"{len(data)}, {t.dimension}"
 9 | 
10 |     t = _get_model("onnx")
11 |     data = t.to_embeddings("foo")
12 |     assert len(data) == t.dimension, f"{len(data)}, {t.dimension}"
13 | 


--------------------------------------------------------------------------------
/tests/unit_tests/embedding/test_paddlenlp.py:
--------------------------------------------------------------------------------
 1 | from gptcache.embedding import PaddleNLP
 2 | from gptcache.adapter.api import _get_model
 3 | 
 4 | 
 5 | def test_paddlenlp():
 6 |     t = PaddleNLP("ernie-3.0-nano-zh")
 7 |     dimension = t.dimension
 8 |     data = t.to_embeddings("中国")
 9 |     assert len(data) == dimension, f"{len(data)}, {t.dimension}"
10 |   
11 |     t = _get_model(model_src="paddlenlp", model_config={"model": "ernie-3.0-nano-zh"})
12 |     dimension = t.dimension
13 |     data = t.to_embeddings("中国")
14 |     assert len(data) == dimension, f"{len(data)}, {t.dimension}"
15 | 


--------------------------------------------------------------------------------
/tests/unit_tests/embedding/test_rwkv.py:
--------------------------------------------------------------------------------
 1 | from gptcache.adapter.api import _get_model
 2 | from gptcache.embedding import Rwkv
 3 | 
 4 | 
 5 | def test_rwkv():
 6 |     t = Rwkv("sgugger/rwkv-430M-pile")
 7 |     data = t.to_embeddings("foo")
 8 |     assert len(data) == t.dimension, f"{len(data)}, {t.dimension}"
 9 | 
10 |     t = _get_model(model_src="rwkv", model_config={"model": "sgugger/rwkv-430M-pile"})
11 |     data = t.to_embeddings("foo")
12 |     assert len(data) == t.dimension, f"{len(data)}, {t.dimension}"
13 | 
14 | 


--------------------------------------------------------------------------------
/tests/unit_tests/embedding/test_sbert.py:
--------------------------------------------------------------------------------
 1 | from gptcache.adapter.api import _get_model
 2 | from gptcache.embedding import SBERT
 3 | 
 4 | 
 5 | def test_sbert():
 6 |     t = SBERT("all-MiniLM-L6-v2")
 7 |     dimension = t.dimension
 8 |     data = t.to_embeddings("foo")
 9 |     assert len(data) == dimension, f"{len(data)}, {t.dimension}"
10 | 
11 |     t = _get_model(model_src="sbert", model_config={"model": "all-MiniLM-L6-v2"})
12 |     dimension = t.dimension
13 |     data = t.to_embeddings("foo")
14 |     assert len(data) == dimension, f"{len(data)}, {t.dimension}"
15 | 
16 |     question = [
17 |         "what is apple?",
18 |         "what is intel?",
19 |         "what is openai?",
20 |     ]
21 |     answer = ["apple", "intel", "openai"]
22 |     for q, _ in zip(question, answer):
23 |         data = t.to_embeddings(q)
24 |         assert len(data) == dimension, f"{len(data)}, {t.dimension}"
25 | 


--------------------------------------------------------------------------------
/tests/unit_tests/embedding/test_timm.py:
--------------------------------------------------------------------------------
 1 | from io import BytesIO
 2 | 
 3 | import requests
 4 | 
 5 | from gptcache.adapter.api import _get_model
 6 | from gptcache.embedding import Timm
 7 | 
 8 | 
 9 | def test_timm():
10 |     url = 'https://raw.githubusercontent.com/zilliztech/GPTCache/main/docs/GPTCache.png'
11 |     image_bytes = requests.get(url).content
12 |     image_file = BytesIO(image_bytes)  # Convert image to file-like object
13 | 
14 |     encoder = Timm(model='resnet50')
15 |     embed = encoder.to_embeddings(image_file)
16 |     assert len(embed) == encoder.dimension
17 | 
18 |     encoder = _get_model(model_src="timm", model_config={"model": "resnet50"})
19 |     embed = encoder.to_embeddings(image_file)
20 |     assert len(embed) == encoder.dimension
21 | 
22 | 
23 | if __name__ == "__main__":
24 |     test_timm()


--------------------------------------------------------------------------------
/tests/unit_tests/embedding/test_uform.py:
--------------------------------------------------------------------------------
 1 | from io import BytesIO
 2 | 
 3 | import requests
 4 | 
 5 | from gptcache.adapter.api import _get_model
 6 | from gptcache.utils import import_uform, import_pillow
 7 | from gptcache.utils.error import ParamError
 8 | 
 9 | import_uform()
10 | import_pillow()
11 | 
12 | 
13 | def test_uform():
14 |     encoder = _get_model("uform")
15 |     embed = encoder.to_embeddings("Hello, world.")
16 |     assert len(embed) == encoder.dimension
17 | 
18 |     url = "https://raw.githubusercontent.com/zilliztech/GPTCache/main/docs/GPTCache.png"
19 |     image_bytes = requests.get(url).content
20 |     image_file = BytesIO(image_bytes)
21 | 
22 |     encoder = _get_model("uform", model_config={"embedding_type": "image"})
23 |     embed = encoder.to_embeddings(image_file)
24 |     assert len(embed) == encoder.dimension
25 | 
26 |     is_exception = False
27 |     try:
28 |         _get_model("uform", model_config={"embedding_type": "foo"})
29 |     except ParamError:
30 |         is_exception = True
31 |     assert is_exception
32 | 


--------------------------------------------------------------------------------
/tests/unit_tests/embedding/test_vit.py:
--------------------------------------------------------------------------------
 1 | from io import BytesIO
 2 | 
 3 | import requests
 4 | 
 5 | from gptcache.adapter.api import _get_model
 6 | from gptcache.utils import import_pillow, import_vit
 7 | 
 8 | 
 9 | def test_timm():
10 |     import_vit()
11 |     import_pillow()
12 | 
13 |     from PIL import Image
14 |     from gptcache.embedding import ViT
15 | 
16 |     url = 'https://raw.githubusercontent.com/zilliztech/GPTCache/main/docs/GPTCache.png'
17 |     image_bytes = requests.get(url).content
18 |     image_data = BytesIO(image_bytes)  # Convert image to file-like object
19 |     image = Image.open(image_data)
20 |     encoder = ViT(model="google/vit-base-patch16-384")
21 |     embed = encoder.to_embeddings(image)
22 |     assert len(embed) == encoder.dimension
23 | 
24 |     encoder = _get_model(model_src="vit")
25 |     embed = encoder.to_embeddings(image)
26 |     assert len(embed) == encoder.dimension
27 | 
28 | if __name__ == "__main__":
29 |     test_timm()


--------------------------------------------------------------------------------
/tests/unit_tests/manager/test_base.py:
--------------------------------------------------------------------------------
 1 | import unittest
 2 | 
 3 | from gptcache.utils.error import NotFoundError
 4 | from gptcache.manager import CacheBase, VectorBase
 5 | from gptcache.manager.scalar_data.manager import CacheBase as InnerCacheBase
 6 | from gptcache.manager.vector_data.manager import VectorBase as InnerVectorBase
 7 | 
 8 | 
 9 | class TestBaseStore(unittest.TestCase):
10 |     def test_cache_base(self):
11 |         with self.assertRaises(EnvironmentError):
12 |             InnerCacheBase()
13 | 
14 |         with self.assertRaises(NotFoundError):
15 |             CacheBase("test_cache_base")
16 | 
17 |     def test_vector_base(self):
18 |         with self.assertRaises(EnvironmentError):
19 |             InnerVectorBase()
20 | 
21 |         with self.assertRaises(NotFoundError):
22 |             VectorBase("test_cache_base")
23 | 


--------------------------------------------------------------------------------
/tests/unit_tests/manager/test_chromadb.py:
--------------------------------------------------------------------------------
 1 | import unittest
 2 | 
 3 | import numpy as np
 4 | 
 5 | from gptcache.manager import VectorBase
 6 | from gptcache.manager.vector_data.base import VectorData
 7 | 
 8 | 
 9 | class TestChromadb(unittest.TestCase):
10 |     def test_normal(self):
11 |         db = VectorBase("chromadb", client_settings={}, top_k=3)
12 |         db.mul_add([VectorData(id=i, data=np.random.sample(10)) for i in range(100)])
13 |         search_res = db.search(np.random.sample(10))
14 |         self.assertEqual(len(search_res), 3)
15 |         db.delete(["1", "3", "5", "7"])
16 |         self.assertEqual(db._collection.count(), 96)
17 | 


--------------------------------------------------------------------------------
/tests/unit_tests/manager/test_map.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | 
 3 | from gptcache.manager.data_manager import MapDataManager
 4 | 
 5 | data_map_path = "data_map.txt"
 6 | 
 7 | 
 8 | def test_map():
 9 |     if os.path.isfile(data_map_path):
10 |         os.remove(data_map_path)
11 | 
12 |     data_manager = MapDataManager(data_map_path, 3)
13 |     a = "a"
14 |     for i in range(4):
15 |         data_manager.save(chr(ord(a) + i), str(i), chr(ord(a) + i))
16 |     assert len(data_manager.search("a")) == 0
17 |     question, answer, emb, _ = data_manager.search("b")[0]
18 |     assert question == "b", question
19 |     assert answer == "1", answer
20 |     assert emb == "b", emb
21 |     data_manager.close()
22 | 


--------------------------------------------------------------------------------
/tests/unit_tests/manager/test_milvusdb.py:
--------------------------------------------------------------------------------
 1 | import unittest
 2 | import numpy as np
 3 | from tempfile import TemporaryDirectory
 4 | 
 5 | from gptcache.manager.vector_data import VectorBase
 6 | from gptcache.manager.vector_data.base import VectorData
 7 | 
 8 | 
 9 | class TestMilvusDB(unittest.TestCase):
10 |     def test_normal(self):
11 |         with TemporaryDirectory(dir="./") as root:
12 |             size = 1000
13 |             dim = 512
14 |             top_k = 10
15 | 
16 |             db = VectorBase(
17 |                 "milvus",
18 |                 top_k=top_k,
19 |                 dimension=dim,
20 |                 port="10086",
21 |                 local_mode=True,
22 |                 local_data=str(root),
23 |                 index_params={
24 |                     "metric_type": "L2",
25 |                     "index_type": "IVF_FLAT",
26 |                     "params": {"nlist": 128},
27 |                 },
28 |             )
29 |             data = np.random.randn(size, dim).astype(np.float32)
30 |             db.mul_add([VectorData(id=i, data=v) for v, i in zip(data, range(size))])
31 |             self.assertEqual(len(db.search(data[0])), top_k)
32 |             db.mul_add([VectorData(id=size, data=data[0])])
33 |             ret = db.search(data[0])
34 |             self.assertIn(ret[0][1], [0, size])
35 |             self.assertIn(ret[1][1], [0, size])
36 |             db.delete([0, 1, 2, 3, 4, 5, size])
37 |             ret = db.search(data[0])
38 |             self.assertNotIn(ret[0][1], [0, size])
39 |             db.rebuild()
40 |             db.close()
41 | 


--------------------------------------------------------------------------------
/tests/unit_tests/manager/test_object_storage.py:
--------------------------------------------------------------------------------
 1 | import unittest
 2 | import mock
 3 | import os
 4 | import requests
 5 | from pathlib import Path
 6 | import numpy as np
 7 | from tempfile import TemporaryDirectory
 8 | 
 9 | from gptcache.manager.object_data.local_storage import LocalObjectStorage
10 | from gptcache.manager.object_data.s3_storage import S3Storage
11 | from gptcache.manager import ObjectBase
12 | 
13 | 
14 | class TestLocal(unittest.TestCase):
15 |     def test_normal(self):
16 |         with TemporaryDirectory(dir="./") as root:
17 |             o = LocalObjectStorage(root)
18 |             data = b'My test'
19 |             fp = o.put(data)
20 |             self.assertTrue(Path(fp).is_file())
21 |             self.assertEqual(o.get(fp), data)
22 |             self.assertEqual(o.get_access_link(fp), fp)
23 |             o.delete([fp])
24 |             self.assertFalse(Path(fp).is_file())
25 | 
26 | 
27 | class TestS3(unittest.TestCase):
28 |     def test_normal(self):
29 |         access_key = os.environ.get('AWS_ACCESS_KEY_ID')
30 |         secret_key = os.environ.get('AWS_SECRET_ACCESS_KEY')
31 |         bucket = os.environ.get('BUCKET')
32 |         endpoint = os.environ.get('ENDPOINT')        
33 |         if access_key is None or secret_key is None or bucket is None:
34 |             return
35 |         o = S3Storage(bucket, 'gptcache', access_key, secret_key, endpoint)
36 |         data = b'My test'
37 |         fp = o.put(data)
38 |         self.assertEqual(o.get(fp), data)
39 |         link = o.get_access_link(fp)
40 |         self.assertEqual(requests.get(link, verify=False).content, data)
41 |         o.delete([fp])
42 |         self.assertIsNone(o.get(fp))
43 | 
44 | class TestBase(unittest.TestCase):
45 |     def test_local(self):
46 |         with TemporaryDirectory(dir="./") as root:
47 |             o = ObjectBase("local", path = root)
48 |             data = b'My test'
49 |             fp = o.put(data)
50 |             self.assertTrue(Path(fp).is_file())
51 |             self.assertEqual(o.get(fp), data)
52 |             self.assertEqual(o.get_access_link(fp), fp)
53 |             o.delete([fp])
54 |             self.assertFalse(Path(fp).is_file())
55 | 
56 |     def test_s3(self):
57 |         with mock.patch("boto3.Session") as mock_session:
58 |             o = ObjectBase("s3", bucket="", path_prefix="",
59 |                            access_key="", secret_key="")
60 |             data = b"My test"
61 |             fp = o.put(data)
62 |             o.get(fp)
63 |             o.get_access_link(fp)
64 | 


--------------------------------------------------------------------------------
/tests/unit_tests/manager/test_pgvector.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import unittest
 3 | 
 4 | import numpy as np
 5 | 
 6 | from gptcache.manager.vector_data import VectorBase
 7 | from gptcache.manager.vector_data.base import VectorData
 8 | 
 9 | 
10 | class TestPgvector(unittest.TestCase):
11 |     def test_normal(self):
12 |         size = 1000
13 |         dim = 10
14 |         top_k = 10
15 | 
16 |         url = os.getenv("POSTGRES_URL", "postgresql://postgres:postgres@localhost:5432/postgres")
17 | 
18 |         db = VectorBase(
19 |             "pgvector",
20 |             top_k=top_k,
21 |             dimension=dim,
22 |             url=url,
23 |             index_params={
24 |                 "index_type": "L2",
25 |                 "params": {"lists": 100, "probes": 10},
26 |             },
27 |         )
28 |         db.delete([i for i in range(size)])
29 |         data = np.random.randn(size, dim).astype(np.float32)
30 |         db.mul_add([VectorData(id=i, data=v) for v, i in zip(data, range(size))])
31 |         self.assertEqual(len(db.search(data[0])), top_k)
32 |         db.mul_add([VectorData(id=size, data=data[0])])
33 |         ret = db.search(data[0])
34 |         print(ret)
35 |         self.assertIn(ret[0][1], [0, size])
36 |         self.assertIn(ret[1][1], [0, size])
37 |         db.delete([0, 1, 2, 3, 4, 5, size])
38 |         ret = db.search(data[0])
39 |         print(ret)
40 |         self.assertNotIn(ret[0][1], [0, size])
41 |         db.rebuild()
42 |         db.close()
43 | 


--------------------------------------------------------------------------------
/tests/unit_tests/manager/test_qdrant.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import unittest
 3 | 
 4 | import numpy as np
 5 | 
 6 | from gptcache.manager.vector_data import VectorBase
 7 | from gptcache.manager.vector_data.base import VectorData
 8 | 
 9 | 
10 | class TestQdrant(unittest.TestCase):
11 |     def test_normal(self):
12 |         size = 10
13 |         dim = 2
14 |         top_k = 10
15 |         qdrant = VectorBase(
16 |             "qdrant",
17 |             top_k=top_k,
18 |             dimension=dim,
19 |             location=":memory:"
20 |         )
21 |         data = np.random.randn(size, dim).astype(np.float32)
22 |         qdrant.mul_add([VectorData(id=i, data=v) for v, i in zip(data, range(size))])
23 |         search_result = qdrant.search(data[0], top_k)
24 |         self.assertEqual(len(search_result), top_k)
25 |         qdrant.mul_add([VectorData(id=size, data=data[0])])
26 |         ret = qdrant.search(data[0])
27 |         self.assertIn(ret[0][1], [0, size])
28 |         self.assertIn(ret[1][1], [0, size])
29 |         qdrant.delete([0, 1, 2, 3, 4, 5, size])
30 |         ret = qdrant.search(data[0])
31 |         self.assertNotIn(ret[0][1], [0, size])
32 |         qdrant.rebuild()
33 |         qdrant.close()
34 | 


--------------------------------------------------------------------------------
/tests/unit_tests/manager/test_redis.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | from gptcache.embedding import Onnx
 4 | from gptcache.manager import VectorBase
 5 | from gptcache.manager.vector_data.base import VectorData
 6 | 
 7 | 
 8 | def test_redis_vector_store():
 9 |     encoder = Onnx()
10 |     dim = encoder.dimension
11 |     vector_base = VectorBase("redis", dimension=dim)
12 |     vector_base.mul_add([VectorData(id=i, data=np.random.rand(dim)) for i in range(10)])
13 | 
14 |     search_res = vector_base.search(np.random.rand(dim))
15 |     print(search_res)
16 |     assert len(search_res) == 1
17 | 
18 |     search_res = vector_base.search(np.random.rand(dim), top_k=10)
19 |     print(search_res)
20 |     assert len(search_res) == 10
21 | 
22 |     vector_base.delete([i for i in range(5)])
23 | 
24 |     search_res = vector_base.search(np.random.rand(dim), top_k=10)
25 |     print(search_res)
26 |     assert len(search_res) == 5
27 | 


--------------------------------------------------------------------------------
/tests/unit_tests/manager/test_usearch.py:
--------------------------------------------------------------------------------
 1 | import unittest
 2 | 
 3 | import numpy as np
 4 | 
 5 | from gptcache.manager.vector_data import VectorBase
 6 | from gptcache.manager.vector_data.base import VectorData
 7 | 
 8 | 
 9 | class TestUSearchDB(unittest.TestCase):
10 |     def test_normal(self):
11 |         size = 1000
12 |         dim = 512
13 |         top_k = 10
14 | 
15 |         db = VectorBase(
16 |             "usearch",
17 |             index_file_path='./index.usearch',
18 |             dimension=dim,
19 |             top_k=top_k,
20 |             metric='cos',
21 |             dtype='f32',
22 |         )
23 |         db.mul_add([VectorData(id=i, data=np.random.rand(dim))
24 |                    for i in range(size)])
25 |         self.assertEqual(len(db.search(np.random.rand(dim))), top_k)
26 |         self.assertEqual(db.count(), size)
27 |         db.close()
28 | 


--------------------------------------------------------------------------------
/tests/unit_tests/manager/test_weaviate.py:
--------------------------------------------------------------------------------
 1 | # import unittest
 2 | # import numpy as np
 3 | 
 4 | # from gptcache.manager.vector_data import VectorBase
 5 | # from gptcache.manager.vector_data.base import VectorData
 6 | 
 7 | 
 8 | # class TestWeaviateDB(unittest.TestCase):
 9 | #     def test_normal(self):
10 | #         size = 1000
11 | #         dim = 512
12 | #         top_k = 10
13 | #         class_name = "Vectorcache"
14 | 
15 | #         db = VectorBase(
16 | #             "weaviate",
17 | #             class_name=class_name,
18 | #             top_k=top_k
19 | #         )
20 | 
21 | #         created_class_name = db._create_class()
22 | #         self.assertEqual(class_name, created_class_name)
23 | #         data = np.random.randn(size, dim).astype(np.float32)
24 | #         db.mul_add([VectorData(id=i, data=v) for v, i in zip(data, range(size))])
25 | #         self.assertEqual(len(db.search(data[0])), top_k)
26 | #         db.mul_add([VectorData(id=size, data=data[0])])
27 | #         ret = db.search(data[0])
28 | #         self.assertIn(ret[0][1], [0, size])
29 | #         db.delete([0, 1, 2, 3, 4, 5, size])
30 | #         ret = db.search(data[0])
31 | #         self.assertNotIn(ret[0][1], [0, size])
32 | #         db.rebuild()
33 | #         db.update_embeddings(6, data[7])
34 | #         emb = db.get_embeddings(6)
35 | #         self.assertEqual(emb.tolist(), data[7].tolist())
36 | #         emb = db.get_embeddings(0)
37 | #         self.assertIsNone(emb)
38 | #         db.close()
39 | 
40 | #         custom_class_name = "Customcache"
41 | #         class_schema = {
42 | #             "class": custom_class_name,
43 | #             "description": "LLM response cache",
44 | #             "properties": [
45 | #                 {
46 | #                     "name": "data_id",
47 | #                     "dataType": ["int"],
48 | #                     "description": "The data-id generated by GPTCache for vectors.",
49 | #                 }
50 | #             ],
51 | #             "vectorIndexConfig": {"distance": "cosine"},
52 | #         }
53 | 
54 | #         db = VectorBase(
55 | #             "weaviate",
56 | #             class_schema=class_schema,
57 | #             top_k=top_k
58 | #         )
59 | #         created_class_name = db._create_class()
60 | #         self.assertEqual(custom_class_name, created_class_name)
61 | #         db.close()
62 | 


--------------------------------------------------------------------------------
/tests/unit_tests/processor/test_post.py:
--------------------------------------------------------------------------------
 1 | from gptcache.processor.post import random_one, first, nop, temperature_softmax
 2 | 
 3 | 
 4 | def test_random_one():
 5 |     message = random_one(["foo", "foo2"])
 6 |     assert message
 7 | 
 8 | 
 9 | def test_first():
10 |     message = first(["foo", "foo2"])
11 |     assert message == "foo"
12 | 
13 | 
14 | def test_nop():
15 |     message = nop(["foo", "foo2"])
16 |     assert "foo" in message
17 |     assert "foo2" in message
18 | 
19 | 
20 | def test_temperature_softmax():
21 |     message = temperature_softmax(messages=["foo", "foo2"], scores=[0.0, 1.0], temperature=0.5)
22 |     assert message in ["foo", "foo2"]
23 | 
24 |     message = temperature_softmax(messages=["foo", "foo2"], scores=[0.9, 0.1], temperature=0.0)
25 |     assert message == "foo"
26 | 
27 |     message = temperature_softmax(messages=["foo", "foo2"], scores=[0.1, 0.9], temperature=0.0)
28 |     assert message == "foo2"
29 | 
30 | 
31 | if __name__ == "__main__":
32 |     test_first()
33 |     test_nop()
34 |     test_random_one()
35 |     test_temperature_softmax()


--------------------------------------------------------------------------------
/tests/unit_tests/processor/test_pre.py:
--------------------------------------------------------------------------------
 1 | from gptcache.processor.pre import (
 2 |     last_content,
 3 |     all_content,
 4 |     nop,
 5 |     last_content_without_prompt,
 6 |     get_prompt, get_openai_moderation_input,
 7 |     concat_all_queries
 8 | )
 9 | 
10 | from gptcache.config import Config
11 | 
12 | def test_last_content():
13 |     content = last_content({"messages": [{"content": "foo1"}, {"content": "foo2"}]})
14 | 
15 |     assert content == "foo2"
16 | 
17 | 
18 | def test_last_content_without_prompt():
19 |     content = last_content_without_prompt(
20 |         {"messages": [{"content": "foo1"}, {"content": "foo2"}]}
21 |     )
22 |     assert content == "foo2"
23 | 
24 |     content = last_content_without_prompt(
25 |         {"messages": [{"content": "foo1"}, {"content": "foo2"}]}, prompts=None
26 |     )
27 |     assert content == "foo2"
28 | 
29 |     content = last_content_without_prompt(
30 |         {"messages": [{"content": "foo1"}, {"content": "foo2"}]}, prompts=["foo"]
31 |     )
32 |     assert content == "2"
33 | 
34 | 
35 | def test_all_content():
36 |     content = all_content({"messages": [{"content": "foo1"}, {"content": "foo2"}]})
37 | 
38 |     assert content == "foo1\nfoo2"
39 | 
40 | 
41 | def test_nop():
42 |     content = nop({"str": "hello"})
43 |     assert content == {"str": "hello"}
44 | 
45 | 
46 | def test_get_prompt():
47 |     content = get_prompt({"prompt": "foo"})
48 |     assert content == "foo"
49 | 
50 | 
51 | def test_get_openai_moderation_input():
52 |     content = get_openai_moderation_input({"input": ["hello", "world"]})
53 |     assert content == "['hello', 'world']"
54 | 
55 | 
56 | def test_get_messages_last_content():
57 |     content = last_content({"messages": [{"content": "foo1"}, {"content": "foo2"}]})
58 |     assert content == "foo2"
59 | 
60 | def test_concat_all_queries():
61 |     config = Config()
62 |     config.context_len = 2
63 |     content = concat_all_queries({"messages":[{"role": "system",   "content": "foo1"}, 
64 |                                         {"role": "user",     "content": "foo2"}, 
65 |                                         {"role": "assistant","content": "foo3"}, 
66 |                                         {"role": "user",     "content": "foo4"}, 
67 |                                         {"role": "assistant","content": "foo5"},
68 |                                         {"role": "user",     "content": "foo6"}]}, **{'cache_config':config})
69 |     assert content == 'USER: foo4\nUSER: foo6'
70 | 
71 |     
72 | if __name__  == '__main__':   
73 |     test_concat_all_queries()
74 | 


--------------------------------------------------------------------------------
/tests/unit_tests/similarity_evaluation/test_cohere_rerank.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | from unittest.mock import patch
 3 | 
 4 | from gptcache.adapter.api import _get_eval
 5 | from gptcache.utils import import_cohere
 6 | 
 7 | import_cohere()
 8 | 
 9 | from cohere.responses import Reranking
10 | 
11 | 
12 | def test_cohere_rerank():
13 |     os.environ["CO_API_KEY"] = "API"
14 | 
15 |     evaluation = _get_eval("cohere")
16 | 
17 |     min_value, max_value = evaluation.range()
18 |     assert min_value < 0.001
19 |     assert max_value > 0.999
20 | 
21 |     with patch("cohere.Client.rerank") as mock_create:
22 |         mock_create.return_value = Reranking(
23 |             response={
24 |                 "meta": {"api_version": {"version": "2022-12-06"}},
25 |                 "results": [],
26 |             }
27 |         )
28 |         evaluation = _get_eval("cohere")
29 |         score = evaluation.evaluation(
30 |             {"question": "What is the color of sky?"},
31 |             {"answer": "the color of sky is blue"},
32 |         )
33 |         assert score < 0.01
34 | 
35 |     with patch("cohere.Client.rerank") as mock_create:
36 |         mock_create.return_value = Reranking(
37 |             response={
38 |                 "meta": {"api_version": {"version": "2022-12-06"}},
39 |                 "results": [
40 |                     {
41 |                         "relevance_score": 0.9871293,
42 |                         "index": 0,
43 |                     }
44 |                 ],
45 |             }
46 |         )
47 |         evaluation = _get_eval("cohere")
48 |         score = evaluation.evaluation(
49 |             {"question": "What is the color of sky?"},
50 |             {"answer": "the color of sky is blue"},
51 |         )
52 |         assert score > 0.9
53 | 


--------------------------------------------------------------------------------
/tests/unit_tests/similarity_evaluation/test_evaluation_kreciprocal.py:
--------------------------------------------------------------------------------
 1 | from gptcache.similarity_evaluation import KReciprocalEvaluation
 2 | from gptcache.manager.vector_data.faiss import Faiss
 3 | from gptcache.manager.vector_data.base import VectorData
 4 | from gptcache.adapter.api import _get_eval
 5 | import numpy as np
 6 | import math
 7 | 
 8 | def normalize(vec):
 9 |     norm = np.linalg.norm(vec)
10 |     return vec / norm
11 | 
12 | faiss = Faiss('./none', 3, 10)
13 | 
14 | 
15 | def _test_evaluation(evaluation):
16 |     narr1 = normalize(np.array([1.0, 2.0, 3.0]))
17 |     faiss.mul_add([VectorData(id=0, data=narr1)])
18 |     narr2 = normalize(np.array([2.0, 3.0, 4.0]))
19 |     faiss.mul_add([VectorData(id=1, data=narr2)])
20 |     narr3 = normalize(np.array([3.0, 4.0, 5.0]))
21 |     faiss.mul_add([VectorData(id=2, data=narr3)])
22 |     evaluation = KReciprocalEvaluation(vectordb=faiss, top_k=2)
23 |     query1 = normalize(np.array([1.1, 2.1, 3.1]))
24 |     query2 = normalize(np.array([101.1, 102.1, 103.1]))
25 | 
26 |     score1 = evaluation.evaluation({'question': 'question1', 'embedding': query1}, {'question': 'question2', 'embedding': narr1})
27 |     score2 = evaluation.evaluation({'question': 'question1', 'embedding': query2}, {'question': 'question2', 'embedding': narr1})
28 | 
29 |     assert score1 > 3.99
30 |     assert math.isclose(score2, 0)
31 | 
32 | def test_kreciprocal():
33 |     evaluation = KReciprocalEvaluation(vectordb=faiss, top_k=2)
34 |     _test_evaluation(evaluation)
35 | 
36 | def test_get_eval():
37 |     evaluation = _get_eval(strategy="kreciprocal", kws={"vectordb": faiss, "top_k": 2})
38 |     _test_evaluation(evaluation)
39 | 


--------------------------------------------------------------------------------
/tests/unit_tests/similarity_evaluation/test_evaluation_onnx.py:
--------------------------------------------------------------------------------
 1 | import math
 2 | 
 3 | from gptcache.adapter.api import _get_eval
 4 | from gptcache.similarity_evaluation import OnnxModelEvaluation
 5 | 
 6 | 
 7 | def _test_evaluation(evaluation):
 8 |     range_min, range_max = evaluation.range()
 9 |     assert math.isclose(range_min, 0.0)
10 |     assert math.isclose(range_max, 1.0)
11 | 
12 |     score = evaluation.evaluation({"question": "hello"}, {"question": "hello"})
13 |     assert math.isclose(score, 1.0)
14 | 
15 |     query = "Can you pass a urine test for meth in 4 days?"
16 |     candidate_1 = "Can meth be detected in a urine test if last used was Thursday night and the test was tuesday morning?"
17 |     candidate_2 = "how old are you?"
18 | 
19 |     score = evaluation.evaluation({"question": query}, {"question": candidate_1})
20 |     assert isinstance(score, float), type(score)
21 |     assert score > 0.8
22 | 
23 |     score = evaluation.evaluation({"question": query}, {"question": candidate_2})
24 |     assert score < 0.1
25 | 
26 | 
27 | def test_onnx():
28 |     evaluation = OnnxModelEvaluation()
29 |     _test_evaluation(evaluation)
30 | 
31 | 
32 | def test_get_eval():
33 |     evaluation = _get_eval("onnx")
34 |     _test_evaluation(evaluation)
35 | 


--------------------------------------------------------------------------------
/tests/unit_tests/similarity_evaluation/test_evaluation_sbert.py:
--------------------------------------------------------------------------------
 1 | import math
 2 | 
 3 | from gptcache.adapter.api import _get_eval
 4 | from gptcache.similarity_evaluation import SbertCrossencoderEvaluation
 5 | 
 6 | 
 7 | def _test_evaluation(evaluation):
 8 |     range_min, range_max = evaluation.range()
 9 |     assert math.isclose(range_min, 0.0)
10 |     assert math.isclose(range_max, 1.0)
11 | 
12 |     score = evaluation.evaluation({"question": "hello"}, {"question": "hello"})
13 |     assert math.isclose(score, 1.0)
14 | 
15 |     query = "Can you pass a urine test for meth in 4 days?"
16 |     candidate_1 = "Can meth be detected in a urine test if last used was Thursday night and the test was tuesday morning?"
17 |     candidate_2 = "how old are you?"
18 | 
19 |     score = evaluation.evaluation({"question": query}, {"question": candidate_1})
20 |     assert score > 0.8
21 | 
22 |     score = evaluation.evaluation({"question": query}, {"question": candidate_2})
23 |     assert score < 0.1
24 | 
25 | 
26 | def test_sbert():
27 |     evaluation = SbertCrossencoderEvaluation()
28 |     _test_evaluation(evaluation)
29 | 
30 | 
31 | def test_get_eval():
32 |     evaluation = _get_eval("sbert_crossencoder")
33 |     _test_evaluation(evaluation)
34 | 
35 | if __name__ == '__main__':
36 |     test_sbert()
37 | 


--------------------------------------------------------------------------------
/tests/unit_tests/similarity_evaluation/test_evaluation_sequence.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | from gptcache.adapter.api import _get_eval
 4 | from gptcache.similarity_evaluation import SequenceMatchEvaluation
 5 | from gptcache.similarity_evaluation.sequence_match import reweight
 6 | 
 7 | 
 8 | def normalize(vec):
 9 |     norm = np.linalg.norm(vec)
10 |     return vec / norm
11 | 
12 | 
13 | def _test_evaluation(evaluation):
14 |     evaluation = SequenceMatchEvaluation([0.1, 0.2, 0.7], "onnx")
15 |     score1 = evaluation.evaluation(
16 |         {"question": "USER:foo1\nUSER:foo2\nUSER:foo3\n"},
17 |         {"question": "USER:foo1\nUSER:foo2\nUSER:foo3\n"},
18 |     )
19 |     score2 = evaluation.evaluation(
20 |         {"question": "USER:foo1\nUSER:foo2\nUSER:foo3\n"},
21 |         {"question": "USER:foo1\nUSER:foo2\n"},
22 |     )
23 |     evaluation = SequenceMatchEvaluation([0.2, 0.8], "onnx")
24 |     score2 = evaluation.evaluation(
25 |         {"question": "USER:foo1\nUser:foo2\nUser:foo3\n"},
26 |         {"question": "USER:foo1\nUser:foo2\n"},
27 |     )
28 |     assert True
29 | 
30 | 
31 | def test_sequence_match():
32 |     evaluation = SequenceMatchEvaluation([0.1, 0.2, 0.7], "onnx")
33 |     evaluation.range()
34 |     _test_evaluation(evaluation)
35 | 
36 | 
37 | def test_get_eval():
38 |     evaluation = _get_eval(
39 |         strategy="sequence_match",
40 |         kws={
41 |             "embedding_extractor": "onnx",
42 |             "weights": [0.1, 0.2, 0.7],
43 |             "embedding_config": {"model": "GPTCache/paraphrase-albert-onnx"},
44 |         },
45 |     )
46 |     _test_evaluation(evaluation)
47 | 
48 | 
49 | def test_reweigth():
50 |     ws = reweight([0.7, 0.2, 0.1], 4)
51 |     assert len(ws) == 3
52 |     ws = reweight([0.7, 0.2, 0.1], 3)
53 |     assert len(ws) == 3
54 |     ws = reweight([0.7, 0.2, 0.1], 2)
55 |     assert len(ws) == 2
56 |     ws = reweight([0.7, 0.2, 0.1], 1)
57 |     assert len(ws) == 1
58 | 
59 | 
60 | if __name__ == "__main__":
61 |     test_sequence_match()
62 | 


--------------------------------------------------------------------------------
/tests/unit_tests/similarity_evaluation/test_evaluation_string.py:
--------------------------------------------------------------------------------
 1 | import math
 2 | 
 3 | from gptcache.adapter.api import _get_eval
 4 | from gptcache.similarity_evaluation import ExactMatchEvaluation
 5 | 
 6 | def _test_evaluation(evaluation):
 7 |     range_min, range_max = evaluation.range()
 8 |     assert math.isclose(range_min, 0.0)
 9 |     assert math.isclose(range_max, 1.0)
10 | 
11 |     score = evaluation.evaluation({"question": "hello"}, {"question": "hello"})
12 |     assert math.isclose(score, 1.0)
13 | 
14 |     score = evaluation.evaluation({"question": "tello"}, {"question": "hello"})
15 |     assert math.isclose(score, 0.0)
16 | 
17 | 
18 | def test_exact_match_evaluation():
19 |     evaluation = ExactMatchEvaluation()
20 |     _test_evaluation(evaluation)
21 | 
22 | 
23 | def test_get_eval():
24 |     evaluation = _get_eval("exact")
25 |     _test_evaluation(evaluation)
26 | 


--------------------------------------------------------------------------------
/tests/unit_tests/similarity_evaluation/test_evalution_time.py:
--------------------------------------------------------------------------------
 1 | import datetime
 2 | 
 3 | from gptcache.manager.scalar_data.base import CacheData
 4 | from gptcache.similarity_evaluation import TimeEvaluation
 5 | 
 6 | 
 7 | def test_evaluation_time():
 8 |     eval = TimeEvaluation("distance", {}, time_range=2)
 9 |     assert eval.range() == (0.0, 4.0)
10 | 
11 |     similarity = eval.evaluation({}, {"search_result": (3.5, None)})
12 |     assert similarity == 0.0
13 | 
14 |     similarity = eval.evaluation(
15 |         {}, {"search_result": (3.5, None), "cache_data": CacheData("a", "b")}
16 |     )
17 |     assert similarity == 0.0
18 | 
19 |     similarity = eval.evaluation(
20 |         {},
21 |         {
22 |             "search_result": (3.5, None),
23 |             "cache_data": CacheData("a", "b", create_on=datetime.datetime(2022, 1, 1)),
24 |         },
25 |     )
26 |     assert similarity == 0.0
27 | 
28 |     similarity = eval.evaluation(
29 |         {},
30 |         {
31 |             "search_result": (3.5, None),
32 |             "cache_data": CacheData("a", "b", create_on=datetime.datetime.now()),
33 |         },
34 |     )
35 |     assert similarity == 0.5
36 | 


--------------------------------------------------------------------------------
/tests/unit_tests/similarity_evaluation/test_np.py:
--------------------------------------------------------------------------------
 1 | import math
 2 | 
 3 | import numpy as np
 4 | 
 5 | from gptcache.adapter.api import _get_eval
 6 | from gptcache.similarity_evaluation import NumpyNormEvaluation
 7 | 
 8 | 
 9 | embedding_func = lambda x: np.array([1, 1])
10 | 
11 | 
12 | def _test_evaluation(evaluation):
13 | 
14 |     range_min, range_max = evaluation.range()
15 |     # print(range_max)
16 |     assert math.isclose(range_min, 0.0)
17 |     assert math.isclose(range_max, 2.0)
18 | 
19 |     score = evaluation.evaluation(
20 |         {"embedding": np.array([-0.5, -0.5])}, {"embedding": np.array([1, 1])}
21 |     )
22 |     assert math.isclose(score, 0.0, abs_tol=0.001), score
23 | 
24 |     score = evaluation.evaluation(
25 |         {"embedding": np.array([1, 2, 3, 4])},
26 |         {"embedding": np.array([0.1, 0.2, 0.3, 0.4])},
27 |     )
28 | 
29 |     assert math.isclose(score, 2.0, abs_tol=0.001), score
30 | 
31 |     score = evaluation.evaluation(
32 |         {"question": "test"},
33 |         {"question": "test"}
34 |     )
35 |     assert math.isclose(score, 2.0), score
36 | 
37 |     score = evaluation.evaluation(
38 |         {"question": "test1"},
39 |         {"question": "test2"}
40 |     )
41 |     assert math.isclose(score, 2.0), score
42 | 
43 | 
44 | def test_norm():
45 |     evaluation = NumpyNormEvaluation(enable_normal=True, question_embedding_function=embedding_func)
46 |     _test_evaluation(evaluation)
47 | 
48 | 
49 | def test_get_eval():
50 |     evaluation = _get_eval(strategy="numpy", kws={"enable_normal": True, "question_embedding_function": embedding_func})
51 | 
52 | 
53 | if __name__ == "__main__":
54 |     test_norm()
55 | 


--------------------------------------------------------------------------------
/tests/unit_tests/similarity_evaluation/test_simple.py:
--------------------------------------------------------------------------------
 1 | import math
 2 | 
 3 | from gptcache.adapter.api import _get_eval
 4 | from gptcache.similarity_evaluation import SearchDistanceEvaluation
 5 | 
 6 | 
 7 | def _test_evaluation_default(evaluation):
 8 |     range_min, range_max = evaluation.range()
 9 |     assert math.isclose(range_min, 0.0)
10 |     assert math.isclose(range_max, 4.0)
11 | 
12 |     score = evaluation.evaluation({}, {"search_result": (1, None)})
13 |     assert math.isclose(score, 3.0)
14 | 
15 |     score = evaluation.evaluation({}, {"search_result": (-1, None)})
16 |     assert math.isclose(score, 4.0)
17 | 
18 | 
19 | def _test_evaluation_config(evaluation):
20 |     range_min, range_max = evaluation.range()
21 |     assert math.isclose(range_min, 0.0)
22 |     assert math.isclose(range_max, 10.0)
23 | 
24 |     score = evaluation.evaluation({}, {"search_result": (5, None)})
25 |     assert math.isclose(score, 5.0)
26 |     score = evaluation.evaluation({}, {"search_result": (20, None)})
27 |     assert math.isclose(score, 10.0)
28 | 
29 | 
30 | def test_search_distance_evaluation():
31 |     evaluation = SearchDistanceEvaluation()
32 |     _test_evaluation_default(evaluation)
33 | 
34 |     evaluation = SearchDistanceEvaluation(max_distance=10, positive=True)
35 |     _test_evaluation_config(evaluation)
36 | 
37 | 
38 | def test_get_eval():
39 |     evaluation = _get_eval("distance")
40 |     _test_evaluation_default(evaluation)
41 | 
42 |     evaluation = _get_eval(strategy="distance", kws = {"max_distance": 10, "positive": True})
43 |     _test_evaluation_config(evaluation)
44 | 


--------------------------------------------------------------------------------
/tests/unit_tests/test_client.py:
--------------------------------------------------------------------------------
 1 | from unittest.mock import patch, Mock
 2 | 
 3 | from gptcache.utils import import_httpx
 4 | 
 5 | import_httpx()
 6 | from gptcache.client import Client
 7 | 
 8 | 
 9 | def test_client():
10 |     client = Client()
11 |     with patch("httpx.AsyncClient.post") as mock_response:
12 |         mock_response.return_value = Mock(status_code=200)
13 |         status_code = client.put("Hi", "Hi back")
14 |         assert status_code == 200
15 | 
16 |     with patch("httpx.AsyncClient.post") as mock_response:
17 |         m = Mock()
18 |         attrs = {"json.return_value": {"answer": "Hi back"}}
19 |         m.configure_mock(**attrs)
20 |         mock_response.return_value = m
21 |         ans = client.get("Hi")
22 |         assert ans == "Hi back"
23 | 


--------------------------------------------------------------------------------
/tests/unit_tests/test_core.py:
--------------------------------------------------------------------------------
 1 | import time
 2 | 
 3 | from gptcache import cache, Config
 4 | from gptcache.report import Report
 5 | from gptcache.utils.cache_func import cache_all
 6 | from gptcache.utils.time import time_cal
 7 | 
 8 | 
 9 | def test_time_cal():
10 |     def log_time_func(fname, delta_time):
11 |         assert fname == "unit_test"
12 |         assert delta_time > 0.1
13 | 
14 |     cache.config = Config(log_time_func=log_time_func)
15 | 
16 |     @time_cal
17 |     def time_cal_annotation():
18 |         time.sleep(0.2)
19 | 
20 |     func_name = "test_time_cal"
21 | 
22 |     def log_time_func(fname, delta_time):
23 |         assert fname == func_name
24 |         assert delta_time > 0.1
25 | 
26 |     cache.config = Config(log_time_func=log_time_func)
27 | 
28 |     def report_func(delta_time):
29 |         assert delta_time > 0.1
30 | 
31 |     def time_cal_without_annotation():
32 |         time.sleep(0.2)
33 | 
34 |     time_cal(
35 |         time_cal_without_annotation, func_name=func_name, report_func=report_func
36 |     )()
37 | 
38 |     cache.config = None
39 | 
40 | 
41 | def test_cache_all():
42 |     assert cache_all()
43 | 
44 | 
45 | def test_report():
46 |     report = Report()
47 |     report.embedding(1)
48 |     report.embedding(3)
49 |     report.search(2)
50 |     report.search(4)
51 |     report.hint_cache()
52 |     report.hint_cache()
53 | 
54 |     assert report.average_embedding_time() == 2
55 |     assert report.op_embedding.count == 2
56 |     assert report.average_search_time() == 3
57 |     assert report.op_search.count == 2
58 |     assert report.hint_cache_count == 2
59 | 


--------------------------------------------------------------------------------
/tests/unit_tests/utils/test_error.py:
--------------------------------------------------------------------------------
 1 | from gptcache.utils.error import (
 2 |     CacheError,
 3 |     NotInitError,
 4 |     NotFoundError,
 5 |     ParamError,
 6 | )
 7 | 
 8 | 
 9 | def test_error_type():
10 |     not_init_error = NotInitError()
11 |     assert issubclass(type(not_init_error), CacheError)
12 | 
13 |     not_found_store_error = NotFoundError("unittest", "test_error_type")
14 |     assert issubclass(type(not_found_store_error), CacheError)
15 | 
16 |     param_error = ParamError("unittest")
17 |     assert issubclass(type(param_error), CacheError)
18 | 
19 | 
20 | def test_wrap():
21 |     import openai
22 | 
23 |     from gptcache.utils.error import wrap_error
24 | 
25 |     def raise_error():
26 |         try:
27 |             raise openai.error.OpenAIError(message="test")
28 |         except openai.error.OpenAIError as e:
29 |             raise wrap_error(e)
30 | 
31 |     is_exception = False
32 |     try:
33 |         raise_error()
34 |     except openai.error.OpenAIError as e:
35 |         is_exception = True
36 | 
37 |     assert is_exception
38 | 


--------------------------------------------------------------------------------
/tests/unit_tests/utils/test_log.py:
--------------------------------------------------------------------------------
 1 | from gptcache.utils.log import gptcache_log
 2 | 
 3 | 
 4 | def test_error_type():
 5 |     gptcache_log.setLevel("INFO")
 6 |     gptcache_log.error("Cache log error.")
 7 |     gptcache_log.warning("Cache log warning.")
 8 |     gptcache_log.info("Cache log info.")
 9 |     assert gptcache_log.level == 20
10 | 


--------------------------------------------------------------------------------
/tests/unit_tests/utils/test_response.py:
--------------------------------------------------------------------------------
 1 | from gptcache.utils.response import (
 2 |     get_message_from_openai_answer,
 3 |     get_stream_message_from_openai_answer,
 4 | )
 5 | 
 6 | 
 7 | def test_get_message_from_openai_answer():
 8 |     message = get_message_from_openai_answer(
 9 |         {
10 |             "choices": [
11 |                 {
12 |                     "finish_reason": "stop",
13 |                     "index": 0,
14 |                     "message": {"content": "hello", "role": "assistant"},
15 |                 }
16 |             ],
17 |             "created": 1677825456,
18 |             "id": "chatcmpl-6ptKqrhgRoVchm58Bby0UvJzq2ZuQ",
19 |             "model": "gpt-3.5-turbo-0301",
20 |             "object": "chat.completion",
21 |             "usage": {
22 |                 "completion_tokens": 301,
23 |                 "prompt_tokens": 36,
24 |                 "total_tokens": 337,
25 |             },
26 |         }
27 |     )
28 |     assert message == "hello"
29 | 
30 | 
31 | def test_get_stream_message_from_openai_answer():
32 |     message = get_stream_message_from_openai_answer(
33 |         {
34 |             "choices": [
35 |                 {"delta": {"role": "assistant"}, "finish_reason": None, "index": 0}
36 |             ],
37 |             "created": 1677825464,
38 |             "id": "chatcmpl-6ptKyqKOGXZT6iQnqiXAH8adNLUzD",
39 |             "model": "gpt-3.5-turbo-0301",
40 |             "object": "chat.completion.chunk",
41 |         }
42 |     )
43 |     assert message == ""
44 | 
45 |     message = get_stream_message_from_openai_answer(
46 |         {
47 |             "choices": [{"delta": {"content": "2"}, "finish_reason": None, "index": 0}],
48 |             "created": 1677825464,
49 |             "id": "chatcmpl-6ptKyqKOGXZT6iQnqiXAH8adNLUzD",
50 |             "model": "gpt-3.5-turbo-0301",
51 |             "object": "chat.completion.chunk",
52 |         }
53 |     )
54 |     assert message == "2"
55 | 


--------------------------------------------------------------------------------