├── .env.example
├── .flake8
├── .github
    ├── ISSUE_TEMPLATE
    │   ├── bug-report.yml
    │   ├── config.yml
    │   ├── documentation.yml
    │   ├── feature-request.yml
    │   └── question.yml
    └── workflows
    │   ├── ci.yml
    │   └── comment.yml
├── .gitignore
├── .pre-commit-config.yaml
├── .readthedocs.yaml
├── CODE_OF_CONDUCT.md
├── CONTRIBUTING.md
├── LICENSE
├── README.md
├── docs
    ├── Makefile
    ├── README.md
    ├── img
    │   ├── feedback.png
    │   └── table.png
    ├── requirements.txt
    └── source
    │   ├── conf.py
    │   ├── experiment.rst
    │   ├── harness.rst
    │   ├── index.rst
    │   ├── modules.rst
    │   ├── playground.rst
    │   ├── prompttools.experiment.experiments.rst
    │   ├── prompttools.experiment.rst
    │   ├── prompttools.experiment.widgets.rst
    │   ├── prompttools.harness.rst
    │   ├── prompttools.mock.rst
    │   ├── prompttools.prompttest.error.rst
    │   ├── prompttools.prompttest.rst
    │   ├── prompttools.prompttest.runner.rst
    │   ├── prompttools.requests.rst
    │   ├── prompttools.rst
    │   ├── prompttools.utils.rst
    │   ├── quickstart.rst
    │   ├── setup.rst
    │   ├── testing.rst
    │   ├── usage.rst
    │   └── utils.rst
├── examples
    ├── notebooks
    │   ├── AnthropicExperiment.ipynb
    │   ├── AutoEval.ipynb
    │   ├── AzureOpenAIServiceExperiment.ipynb
    │   ├── FineTuningExperiment.ipynb
    │   ├── GPT4RegressionTesting.ipynb
    │   ├── GPT4vsLlama2.ipynb
    │   ├── GoogleGeminiChatExperiment.ipynb
    │   ├── GoogleVertexChatExperiment.ipynb
    │   ├── HuggingFaceHub.ipynb
    │   ├── HumanFeedback.ipynb
    │   ├── LlamaCppExperiment.ipynb
    │   ├── LlamaHeadToHead.ipynb
    │   ├── MistralChatExperiment.ipynb
    │   ├── ModelComparison.ipynb
    │   ├── Moderation Evaluation.ipynb
    │   ├── OpenAIChatExperiment.ipynb
    │   ├── OpenAIChatFunctionExperiment.ipynb
    │   ├── PaLM2Experiment.ipynb
    │   ├── README.md
    │   ├── ReplicateLlama2.ipynb
    │   ├── SemanticSimilarity.ipynb
    │   ├── StructuredOutput.ipynb
    │   ├── audio_experiments
    │   │   ├── MusicGenExperiment.ipynb
    │   │   └── sample_audio_files
    │   │   │   └── 80s_billy_joel.wav
    │   ├── benchmark
    │   │   └── Benchmarking.ipynb
    │   ├── frameworks
    │   │   ├── LangChainRouterChainExperiment.ipynb
    │   │   ├── LangChainSequentialChainExperiment.ipynb
    │   │   └── MindsDBExperiment.ipynb
    │   ├── image_experiments
    │   │   ├── ReplicateStableDiffusion.ipynb
    │   │   ├── StableDiffusion.ipynb
    │   │   ├── compare_images_folder
    │   │   │   ├── A panda writing code in the Swiss Alps3.png
    │   │   │   └── A_fruit_basket_on_the_moon.png
    │   │   └── stablediffusion_images
    │   │   │   ├── An_apple_orchard.png
    │   │   │   └── Just_a_fruit_basket.png
    │   ├── remote
    │   │   ├── Logging.ipynb
    │   │   ├── Saving_and_Loading_ExperimentHarness_to_Remote.ipynb
    │   │   └── Saving_and_Loading_Experiment_to_Remote.ipynb
    │   └── vectordb_experiments
    │   │   ├── ChromaDBExperiment.ipynb
    │   │   ├── LanceDBExperiment.ipynb
    │   │   ├── PineconeExperiment.ipynb
    │   │   ├── QdrantExperiment.ipynb
    │   │   ├── RetrievalAugmentedGeneration.ipynb
    │   │   └── WeaviateExperiment.ipynb
    └── prompttests
    │   ├── test_chromadb.py
    │   ├── test_huggingface_hub.py
    │   ├── test_openai_chat.py
    │   └── test_qdrant.py
├── img
    ├── demo.gif
    ├── feedback.png
    ├── hegel_ai_logo.svg
    ├── hegel_ai_logo_dark.svg
    ├── playground.gif
    ├── prompttest.png
    └── table.png
├── prompttools
    ├── __init__.py
    ├── benchmarks
    │   ├── __init__.py
    │   └── benchmark.py
    ├── common.py
    ├── data
    │   └── benchmarking
    │   │   └── hellaswag
    │   │       ├── hellaswag_dataset.jsonl
    │   │       └── hellaswag_labels.lst
    ├── experiment
    │   ├── __init__.py
    │   ├── experiments
    │   │   ├── __init__.py
    │   │   ├── _utils.py
    │   │   ├── anthropic_completion_experiment.py
    │   │   ├── chromadb_experiment.py
    │   │   ├── error.py
    │   │   ├── experiment.py
    │   │   ├── google_gemini_chat_experiment.py
    │   │   ├── google_palm_experiment.py
    │   │   ├── google_vertex_chat_experiment.py
    │   │   ├── huggingface_endpoint_experiment.py
    │   │   ├── huggingface_hub_experiment.py
    │   │   ├── lancedb_experiment.py
    │   │   ├── langchain_experiment.py
    │   │   ├── llama_cpp_experiment.py
    │   │   ├── mindsdb_experiment.py
    │   │   ├── mistral_experiment.py
    │   │   ├── musicgen_experiment.py
    │   │   ├── openai_chat_experiment.py
    │   │   ├── openai_completion_experiment.py
    │   │   ├── pinecone_experiment.py
    │   │   ├── qdrant_experiment.py
    │   │   ├── replicate_experiment.py
    │   │   ├── stablediffusion_experiment.py
    │   │   ├── style.mplstyle
    │   │   └── weaviate_experiment.py
    │   └── widgets
    │   │   ├── __init__.py
    │   │   ├── comparison.py
    │   │   ├── feedback.py
    │   │   └── utility.py
    ├── harness
    │   ├── __init__.py
    │   ├── chat_history_harness.py
    │   ├── chat_model_comparison_harness.py
    │   ├── chat_prompt_template_harness.py
    │   ├── document_retrieval_harness.py
    │   ├── function_call_harness.py
    │   ├── harness.py
    │   ├── model_comparison_harness.py
    │   ├── multi_experiment_harness.py
    │   ├── prompt_template_harness.py
    │   ├── rag_harness.py
    │   ├── system_prompt_harness.py
    │   └── utility.py
    ├── logger
    │   ├── __init__.py
    │   └── logger.py
    ├── mock
    │   ├── __init__.py
    │   ├── mock.py
    │   └── mock_data
    │   │   └── images
    │   │       ├── 19th_century_wombat_gentleman.png
    │   │       └── Just_a_fruit_basket.png
    ├── playground
    │   ├── README.md
    │   ├── __init__.py
    │   ├── constants.py
    │   ├── data_loader.py
    │   ├── packages.txt
    │   ├── playground.py
    │   └── requirements.txt
    ├── prompttest
    │   ├── __init__.py
    │   ├── error
    │   │   ├── __init__.py
    │   │   └── failure.py
    │   ├── prompttest.py
    │   ├── runner
    │   │   ├── __init__.py
    │   │   └── runner.py
    │   └── threshold_type.py
    ├── requests
    │   ├── __init__.py
    │   ├── request_queue.py
    │   └── retries.py
    ├── selector
    │   ├── __init__.py
    │   └── prompt_selector.py
    ├── sentry.py
    ├── utils
    │   ├── __init__.py
    │   ├── autoeval.py
    │   ├── autoeval_from_expected.py
    │   ├── autoeval_scoring.py
    │   ├── autoeval_with_docs.py
    │   ├── chunk_text.py
    │   ├── error.py
    │   ├── expected.py
    │   ├── moderation.py
    │   ├── ranking_correlation.py
    │   ├── similarity.py
    │   ├── validate_json.py
    │   └── validate_python.py
    └── version.py
├── pyproject.toml
├── requirements.txt
├── scripts
    └── create_comment.py
├── setup.py
├── test
    ├── app.py
    ├── requirements.txt
    ├── test_experiment.py
    ├── test_harness.py
    └── test_logger.py
└── version.txt


/.env.example:
--------------------------------------------------------------------------------
1 | # To access remote service, make a copy of this file and save it as `.env` in the same directory
2 | # Then, paste your Hegel API key below between the quotation marks.
3 | ENV="prod"
4 | HEGELAI_API_KEY=""
5 | 


--------------------------------------------------------------------------------
/.flake8:
--------------------------------------------------------------------------------
1 | [flake8]
2 | max-line-length = 120
3 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/bug-report.yml:
--------------------------------------------------------------------------------
 1 | name: 🐛 Bug Report
 2 | description: Create a report to help us reproduce and fix the bug
 3 | 
 4 | body:
 5 |   - type: markdown
 6 |     attributes:
 7 |       value: >
 8 |         #### Before submitting a bug, please make sure the issue hasn't been already addressed by searching through [the
 9 |         existing and past issues](https://github.com/hegelai/prompttools/issues?q=is%3Aissue+sort%3Acreated-desc+).
10 |   - type: textarea
11 |     attributes:
12 |       label: 🐛 Describe the bug
13 |       description: |
14 |         Please provide a clear and concise description of what the bug is.
15 | 
16 |         If relevant, add a minimal example so that we can reproduce the error by running the code. For example 
17 | 
18 |         ```python
19 |         # Include all the necessary imports at the beginning
20 |         from prompttools.harness import PromptTemplateExperimentationHarness
21 | 
22 |         # A succinct reproducible example trimmed down to the essential parts:
23 |         prompt_templates = ["Answer the following question: {{input}}", "Respond the following query: {{input}}"]
24 |         user_inputs = [{"input": "Who was the first president?"}, {"input": "Who was the first president of India?"}]
25 |         harness = PromptTemplateExperimentationHarness("text-davinci-003", prompt_templates, user_inputs)
26 |         
27 |         harness.run()  
28 |         harness.visualize()  # Note: the bug is here, the visualization "X" is different from expected "Y"
29 |         ```
30 | 
31 |         If the code is too long, feel free to put it in a public gist and link it here: https://gist.github.com.
32 | 
33 |         Please also paste or describe the results you observe instead of the expected results. If you observe an error, please paste the error message including the **full** traceback of the exception. It may be relevant to wrap error messages in ```` ```triple quotes blocks``` ````.
34 |       placeholder: |
35 |         A clear and concise description of what the bug is.
36 | 
37 |         ```python
38 |         Sample code to reproduce the problem
39 |         ```
40 | 
41 |         ```
42 |         The error message you got, with the full traceback.
43 |         ```
44 |     validations:
45 |       required: true
46 |   - type: markdown
47 |     attributes:
48 |       value: >
49 |         Thanks for contributing 🎉!
50 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/config.yml:
--------------------------------------------------------------------------------
1 | blank_issues_enabled: true
2 | contact_links:
3 |   - name: Contact Us
4 |     url: team@hegel-ai.com
5 |     about: If for any reason you do not wish to open a public issue, feel free to contact us with the linked email. We would love to hear about your experience and pain points, and we'll offer bespoke advice and solution whenever possible.
6 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/documentation.yml:
--------------------------------------------------------------------------------
 1 | name: 📚 Documentation
 2 | description: Report an issue related to our documentation
 3 | 
 4 | body:
 5 |   - type: textarea
 6 |     attributes:
 7 |       label: 📚 The doc issue
 8 |       description: >
 9 |         A clear and concise description of what content in our documentation is an issue.
10 |     validations:
11 |       required: true
12 |   - type: textarea
13 |     attributes:
14 |       label: Suggest a potential fix
15 |       description: >
16 |         Tell us how we could improve the documentation in this regard.
17 |   - type: markdown
18 |     attributes:
19 |       value: >
20 |         Thanks for contributing 🎉!
21 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/feature-request.yml:
--------------------------------------------------------------------------------
 1 | name: 🚀 Feature request
 2 | description: Submit a proposal or request for a new feature in prompttools!
 3 | 
 4 | body:
 5 |   - type: textarea
 6 |     attributes:
 7 |       label: 🚀 The feature
 8 |       description: >
 9 |         A clear and concise description of the feature proposal
10 |     validations:
11 |       required: true
12 |   - type: textarea
13 |     attributes:
14 |       label: Motivation, pitch
15 |       description: >
16 |         Please outline the motivation for the proposal. Is your feature request related to a specific problem? e.g.,
17 |         *"I'm working on X and would like Y to be possible"*. If this is related to another GitHub issue, please link
18 |         here too.
19 |     validations:
20 |       required: true
21 |   - type: textarea
22 |     attributes:
23 |       label: Alternatives
24 |       description: >
25 |         A description of any alternative solutions or features you've considered, if any.
26 |   - type: textarea
27 |     attributes:
28 |       label: Additional context
29 |       description: >
30 |         Add any other context or screenshots about the feature request.
31 |   - type: markdown
32 |     attributes:
33 |       value: >
34 |         Thanks for contributing 🎉!
35 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/question.yml:
--------------------------------------------------------------------------------
 1 | name: ⁉️ Question and Discussion
 2 | description: Discuss or ask question about how to use prompttools and what the best practice may be.
 3 | 
 4 | body:
 5 |   - type: textarea
 6 |     attributes:
 7 |       label: ⁉️ Discussion/Question
 8 |       description: |
 9 |         Provide context on how you are using prompttools, what you have tried, and what question you may have.
10 |     validations:
11 |       required: true
12 |   - type: markdown
13 |     attributes:
14 |       value: >
15 |         Thanks for contributing 🎉!


--------------------------------------------------------------------------------
/.github/workflows/ci.yml:
--------------------------------------------------------------------------------
 1 | name: Run prompttools tests
 2 | 
 3 | on:
 4 |   push:
 5 |     branches: [ "main" ]
 6 |   pull_request:
 7 |     branches: [ "main" ]
 8 | 
 9 | jobs:
10 |   build:
11 | 
12 |     runs-on: ubuntu-latest
13 |     strategy:
14 |       fail-fast: false
15 |       matrix:
16 |         python-version: ["3.11"]
17 | 
18 |     steps:
19 |     - name: Check out source repository
20 |       uses: actions/checkout@v3
21 |     - name: Set up Python ${{ matrix.python-version }}
22 |       uses: actions/setup-python@v3
23 |       with:
24 |         python-version: ${{ matrix.python-version }}
25 |     - name: Install dependencies
26 |       run: |
27 |         python -m pip install --upgrade pip
28 |         python -m pip install flake8 pytest
29 |         if [ -f requirements.txt ]; then pip install -r requirements.txt; fi
30 |         if [ -f test/requirements.txt ]; then pip install -r test/requirements.txt; fi
31 |     - name: Lint with flake8
32 |       run: |
33 |         # exit-zero treats all errors as warnings. The GitHub editor is 120 chars wide
34 |         flake8 . --count --exit-zero --max-complexity=10 --max-line-length=120 --statistics
35 |     - name: Build prompttools
36 |       run: |
37 |         pip3 install .
38 |     - name: Test with pytest
39 |       run: |
40 |         pytest -v test
41 |     - name: Run examples
42 |       run: |
43 |         DEBUG=1 python examples/prompttests/test_openai_chat.py
44 | 


--------------------------------------------------------------------------------
/.github/workflows/comment.yml:
--------------------------------------------------------------------------------
 1 | name: PromptTools
 2 | 
 3 | on:
 4 |   # Trigger the workflow on push or pull request
 5 |   pull_request:
 6 |     branches:
 7 |       - steventkrawczyk-test
 8 | 
 9 | jobs:
10 |   comment:
11 |     permissions: write-all
12 |     runs-on: ubuntu-latest
13 |     strategy:
14 |       fail-fast: false
15 |       matrix:
16 |         python-version: ["3.11"]
17 |     steps:
18 |      - name: Check out source repository
19 |        uses: actions/checkout@v3
20 |      - name: Set up Python ${{ matrix.python-version }}
21 |        uses: actions/setup-python@v3
22 |        with:
23 |          python-version: ${{ matrix.python-version }}
24 |      - name: Install dependencies
25 |        run: |
26 |          python -m pip install --upgrade pip
27 |          python -m pip install flake8 pytest
28 |          if [ -f requirements.txt ]; then pip install -r requirements.txt; fi
29 |      - name: Build prompttools
30 |        run: |
31 |          pip3 install .
32 |      - name: Create markdown
33 |        run: |
34 |          DEBUG=1 python scripts/create_comment.py
35 |      - name: Write comment
36 |        uses: actions/github-script@v6
37 |        with:
38 |          script: |
39 |            const fs = require('fs')
40 |            fs.readFile('./markdown.md', 'utf8', (err, data) => {
41 |              github.rest.issues.createComment({
42 |                issue_number: context.issue.number,
43 |                owner: context.repo.owner,
44 |                repo: context.repo.repo,
45 |                body: data
46 |              })
47 |            });
48 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | prompttools/version.py
  7 | 
  8 | # C extensions
  9 | *.so
 10 | 
 11 | macOS
 12 | */.DS_Store
 13 | 
 14 | # PyCharm
 15 | #  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
 16 | #  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
 17 | #  and can be added to the global gitignore or merged into this file.  For a more nuclear
 18 | #  option (not recommended) you can uncomment the following to ignore the entire idea folder.
 19 | .idea/
 20 | 
 21 | 
 22 | # Distribution / packaging
 23 | .Python
 24 | build/
 25 | develop-eggs/
 26 | dist/
 27 | downloads/
 28 | eggs/
 29 | .eggs/
 30 | lib/
 31 | lib64/
 32 | parts/
 33 | sdist/
 34 | var/
 35 | wheels/
 36 | share/python-wheels/
 37 | *.egg-info/
 38 | .installed.cfg
 39 | *.egg
 40 | MANIFEST
 41 | 
 42 | # PyInstaller
 43 | #  Usually these files are written by a python script from a template
 44 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 45 | *.manifest
 46 | *.spec
 47 | 
 48 | # Installer logs
 49 | pip-log.txt
 50 | pip-delete-this-directory.txt
 51 | 
 52 | # Unit test / coverage reports
 53 | htmlcov/
 54 | .tox/
 55 | .nox/
 56 | .coverage
 57 | .coverage.*
 58 | .cache
 59 | nosetests.xml
 60 | coverage.xml
 61 | *.cover
 62 | *.py,cover
 63 | .hypothesis/
 64 | .pytest_cache/
 65 | cover/
 66 | 
 67 | # Translations
 68 | *.mo
 69 | *.pot
 70 | 
 71 | # Django stuff:
 72 | *.log
 73 | local_settings.py
 74 | db.sqlite3
 75 | db.sqlite3-journal
 76 | 
 77 | # Flask stuff:
 78 | instance/
 79 | .webassets-cache
 80 | 
 81 | # Scrapy stuff:
 82 | .scrapy
 83 | 
 84 | # Sphinx documentation
 85 | docs/_build/
 86 | 
 87 | # PyBuilder
 88 | .pybuilder/
 89 | target/
 90 | 
 91 | # Jupyter Notebook
 92 | .ipynb_checkpoints
 93 | 
 94 | # Chroma
 95 | .chroma
 96 | 
 97 | # IPython
 98 | profile_default/
 99 | ipython_config.py
100 | 
101 | # pyenv
102 | #   For a library or package, you might want to ignore these files since the code is
103 | #   intended to run in multiple environments; otherwise, check them in:
104 | # .python-version
105 | 
106 | # pipenv
107 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
108 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
109 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
110 | #   install all needed dependencies.
111 | #Pipfile.lock
112 | 
113 | # poetry
114 | #   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
115 | #   This is especially recommended for binary packages to ensure reproducibility, and is more
116 | #   commonly ignored for libraries.
117 | #   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
118 | #poetry.lock
119 | 
120 | # pdm
121 | #   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
122 | #pdm.lock
123 | #   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
124 | #   in version control.
125 | #   https://pdm.fming.dev/#use-with-ide
126 | .pdm.toml
127 | 
128 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
129 | __pypackages__/
130 | 
131 | # Celery stuff
132 | celerybeat-schedule
133 | celerybeat.pid
134 | 
135 | # SageMath parsed files
136 | *.sage.py
137 | 
138 | # Environments
139 | .env
140 | .venv
141 | env/
142 | venv/
143 | ptenv/
144 | ENV/
145 | env.bak/
146 | venv.bak/
147 | 
148 | # Spyder project settings
149 | .spyderproject
150 | .spyproject
151 | 
152 | # Rope project settings
153 | .ropeproject
154 | 
155 | # mkdocs documentation
156 | /site
157 | 
158 | # mypy
159 | .mypy_cache/
160 | .dmypy.json
161 | dmypy.json
162 | 
163 | # Pyre type checker
164 | .pyre/
165 | 
166 | # pytype static type analyzer
167 | .pytype/
168 | 
169 | # Cython debug symbols
170 | cython_debug/
171 | 
172 | # PyCharm
173 | #  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
174 | #  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
175 | #  and can be added to the global gitignore or merged into this file.  For a more nuclear
176 | #  option (not recommended) you can uncomment the following to ignore the entire idea folder.
177 | #.idea/
178 | 
179 | **/.chroma/**
180 | /ptvenv
181 | 
182 | **/lancedb/**
183 | 


--------------------------------------------------------------------------------
/.pre-commit-config.yaml:
--------------------------------------------------------------------------------
 1 | repos:
 2 | -   repo: https://github.com/pre-commit/pre-commit-hooks
 3 |     rev: v2.3.0
 4 |     hooks:
 5 |     -   id: end-of-file-fixer
 6 |     -   id: trailing-whitespace
 7 | -   repo: https://github.com/psf/black
 8 |     rev: 22.10.0
 9 |     hooks:
10 |     -   id: black
11 | -   repo: https://github.com/PyCQA/flake8
12 |     rev: 6.0.0
13 |     hooks:
14 |     -   id: flake8
15 | 


--------------------------------------------------------------------------------
/.readthedocs.yaml:
--------------------------------------------------------------------------------
 1 | # .readthedocs.yaml
 2 | # Read the Docs configuration file
 3 | # See https://docs.readthedocs.io/en/stable/config-file/v2.html for details
 4 | 
 5 | # Required
 6 | version: 2
 7 | 
 8 | # Set the OS, Python version and other tools you might need
 9 | build:
10 |   os: ubuntu-22.04
11 |   tools:
12 |     python: "3.11"
13 | 
14 | # Build documentation in the "docs/" directory with Sphinx
15 | sphinx:
16 |    configuration: docs/source/conf.py
17 | 
18 | # Optionally build your docs in additional formats such as PDF and ePub
19 | # formats:
20 | #    - pdf
21 | #    - epub
22 | 
23 | # Optional but recommended, declare the Python requirements required
24 | # to build your documentation
25 | # See https://docs.readthedocs.io/en/stable/guides/reproducible-builds.html
26 | python:
27 |   install:
28 |     - requirements: docs/requirements.txt
29 | 
30 | 
31 | 


--------------------------------------------------------------------------------
/CODE_OF_CONDUCT.md:
--------------------------------------------------------------------------------
 1 | 
 2 | # Contributor Covenant Code of Conduct
 3 | 
 4 | ## Our Pledge
 5 | 
 6 | We as members, contributors, and leaders pledge to make participation in our
 7 | community a harassment-free experience for everyone, regardless of age, body
 8 | size, visible or invisible disability, ethnicity, sex characteristics, gender
 9 | identity and expression, level of experience, education, socio-economic status,
10 | nationality, personal appearance, race, caste, color, religion, or sexual
11 | identity and orientation.
12 | 
13 | We pledge to act and interact in ways that contribute to an open, welcoming,
14 | diverse, inclusive, and healthy community.
15 | 
16 | ## Our Standards
17 | 
18 | Examples of behavior that contributes to a positive environment for our
19 | community include:
20 | 
21 | * Demonstrating empathy and kindness toward other people
22 | * Being respectful of differing opinions, viewpoints, and experiences
23 | * Giving and gracefully accepting constructive feedback
24 | * Accepting responsibility and apologizing to those affected by our mistakes,
25 |   and learning from the experience
26 | * Focusing on what is best not just for us as individuals, but for the overall
27 |   community
28 | 
29 | Examples of unacceptable behavior include:
30 | 
31 | * The use of sexualized language or imagery, and sexual attention or advances of
32 |   any kind
33 | * Trolling, insulting or derogatory comments, and personal or political attacks
34 | * Public or private harassment
35 | * Publishing others' private information, such as a physical or email address,
36 |   without their explicit permission
37 | * Other conduct which could reasonably be considered inappropriate in a
38 |   professional setting
39 | 
40 | ## Enforcement Responsibilities
41 | 
42 | Community leaders are responsible for clarifying and enforcing our standards of
43 | acceptable behavior and will take appropriate and fair corrective action in
44 | response to any behavior that they deem inappropriate, threatening, offensive,
45 | or harmful.
46 | 
47 | Community leaders have the right and responsibility to remove, edit, or reject
48 | comments, commits, code, wiki edits, issues, and other contributions that are
49 | not aligned to this Code of Conduct, and will communicate reasons for moderation
50 | decisions when appropriate.
51 | 
52 | ## Scope
53 | 
54 | This Code of Conduct applies within all community spaces, and also applies when
55 | an individual is officially representing the community in public spaces.
56 | Examples of representing our community include using an official e-mail address,
57 | posting via an official social media account, or acting as an appointed
58 | representative at an online or offline event.
59 | 
60 | ## Enforcement
61 | 
62 | Instances of abusive, harassing, or otherwise unacceptable behavior may be
63 | reported to the community leaders responsible for enforcement at
64 | [team@hegel-ai.com](team@hegel-ai.com).
65 | All complaints will be reviewed and investigated promptly and fairly.
66 | 
67 | All community leaders are obligated to respect the privacy and security of the
68 | reporter of any incident.
69 | 
70 | ## Attribution
71 | 
72 | This Code of Conduct is adapted from the [Contributor Covenant][homepage],
73 | version 2.1, available at
74 | [https://www.contributor-covenant.org/version/2/1/code_of_conduct.html][v2.1].
75 | 
76 | Community Impact Guidelines were inspired by
77 | [Mozilla's code of conduct enforcement ladder][Mozilla CoC].
78 | 
79 | For answers to common questions about this code of conduct, see the FAQ at
80 | [https://www.contributor-covenant.org/faq][FAQ]. Translations are available at
81 | [https://www.contributor-covenant.org/translations][translations].
82 | 
83 | [homepage]: https://www.contributor-covenant.org
84 | [v2.1]: https://www.contributor-covenant.org/version/2/1/code_of_conduct.html
85 | [Mozilla CoC]: https://github.com/mozilla/diversity
86 | [FAQ]: https://www.contributor-covenant.org/faq
87 | [translations]: https://www.contributor-covenant.org/translations
88 | 


--------------------------------------------------------------------------------
/CONTRIBUTING.md:
--------------------------------------------------------------------------------
 1 | # Contributing to `prompttools`
 2 | 
 3 | ## TL;DR
 4 | 
 5 | We appreciate all contributions to our project! If you are interested in contributing to `prompttools`, there are many ways to help out.
 6 | Your contributions may fall into the following categories:
 7 | 
 8 |  It will greatly help our project if you:
 9 | 
10 | - Star ⭐ our project and share it with your network!
11 | 
12 | - Report issues that you see, or upvote issues that others have reported and are relevant to you
13 | 
14 | - Look through existing issues for new feature ideas
15 |   (["Help Wanted" issues](https://github.com/hegelai/prompttools/issues?q=is%3Aopen+is%3Aissue+label%3A%22help+wanted%22)) and open PRs to implement them.
16 | 
17 | - Answer questions on the issue tracker, investigating and fixing bugs are very valuable contributions to the project.
18 | 
19 | - Improve the documentation is welcomed. If you find a typo in the documentation,
20 |   do not hesitate to submit a GitHub issue or pull request.
21 | 
22 | - Feature a usage example in our documentation, that is welcomed as well.
23 | 
24 | ## Issues
25 | 
26 | We use GitHub issues to track bugs. Please follow the existing templates if possible and ensure that the
27 | description is clear and has sufficient instructions to reproduce the issue.
28 | 
29 | You can also use open an issue to seek advice or discuss best practices of using our tool or prompting in general.
30 | 
31 | ## Development installation
32 | 
33 | ### Install `prompttools` from source
34 | 
35 | ```bash
36 | git clone https://github.com/hegelai/prompttools.git
37 | cd prompttools
38 | pip install -e .
39 | pip install flake8
40 | ```
41 | 
42 | ## Pull Requests
43 | 
44 | We actively welcome your pull requests.
45 | 
46 | 1. Fork the repo and create your branch from `main`.
47 |     - Optionally, you can create a new branch locally and push to the branch to `origin`.
48 | 2. If you've added code that should be tested, add tests.
49 | 3. If you've changed APIs, update the inline documentation and examples.
50 | 4. Ensure all unit tests pass.
51 | 5. If you haven't already, complete the Contributor License Agreement ("CLA"). More details below
52 | 
53 | ### Code style
54 | 
55 | `prompttools` adheres to code format through [`pre-commit`](https://pre-commit.com). You can install it with
56 | 
57 | ```shell
58 | pip install pre-commit
59 | ```
60 | 
61 | To check and in most cases fix the code format, stage all your changes (`git add`) and run `pre-commit run`.
62 | 
63 | We recommend you to perform the checks automatically before every `git commit`, you can install that by executing
64 | this in the directory:
65 | 
66 | ```shell
67 | pre-commit install
68 | ```
69 | 
70 | 
71 | ## Contributor License Agreement ("CLA")
72 | 
73 | In order to accept your pull request, we need you to sign a CLA. You only need to do this once to work on our project.
74 | 
75 | Please sign the CLA here: <https://cla-assistant.io/hegelai/prompttools>
76 | 
77 | ## License
78 | 
79 | By contributing to `prompttools`, you agree that your contributions will be licensed under the LICENSE file in the root
80 | directory of this source tree.
81 | 


--------------------------------------------------------------------------------
/docs/Makefile:
--------------------------------------------------------------------------------
 1 | # Minimal makefile for Sphinx documentation
 2 | #
 3 | 
 4 | # You can set these variables from the command line, and also
 5 | # from the environment for the first two.
 6 | SPHINXOPTS    ?=
 7 | SPHINXBUILD   ?= sphinx-build
 8 | SOURCEDIR     = source
 9 | BUILDDIR      = build
10 | 
11 | # Put it first so that "make" without argument is like "make help".
12 | help:
13 | 	@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
14 | 
15 | .PHONY: help Makefile
16 | 
17 | # Catch-all target: route all unknown targets to Sphinx using the new
18 | # "make mode" option.  $(O) is meant as a shortcut for $(SPHINXOPTS).
19 | %: Makefile
20 | 	@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
21 | 


--------------------------------------------------------------------------------
/docs/README.md:
--------------------------------------------------------------------------------
 1 | ## Building the Documentation
 2 | 
 3 | To build the documentation, you will need [Sphinx](http://www.sphinx-doc.org) and various dependencies. You can install them via:
 4 | 
 5 | ```bash
 6 | cd docs/
 7 | pip install -r requirements.txt
 8 | ```
 9 | 
10 | You can then build the documentation by running `make <format>` from the `docs/` folder. Run `make` to get a list of all
11 | available output formats. The most common case is `html`:
12 | 
13 | ```bash
14 | make html
15 | ```
16 | 
17 | ## Improving the Documentation
18 | 
19 | Feel free to open an issue or pull request regarding any inaccuracy or potential improvement for
20 | our documentation. Thank you!
21 | 


--------------------------------------------------------------------------------
/docs/img/feedback.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hegelai/prompttools/2446cc9e629fef0a82553ec338c20a203f0688b3/docs/img/feedback.png


--------------------------------------------------------------------------------
/docs/img/table.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hegelai/prompttools/2446cc9e629fef0a82553ec338c20a203f0688b3/docs/img/table.png


--------------------------------------------------------------------------------
/docs/requirements.txt:
--------------------------------------------------------------------------------
1 | # Defining the exact version will make sure things don't break
2 | sphinx==5.3.0
3 | furo
4 | readthedocs-sphinx-search==0.1.1
5 | prompttools
6 | 


--------------------------------------------------------------------------------
/docs/source/conf.py:
--------------------------------------------------------------------------------
 1 | # Configuration file for the Sphinx documentation builder.
 2 | #
 3 | # For the full list of built-in configuration values, see the documentation:
 4 | # https://www.sphinx-doc.org/en/master/usage/configuration.html
 5 | 
 6 | # -- Project information -----------------------------------------------------
 7 | # https://www.sphinx-doc.org/en/master/usage/configuration.html#project-information
 8 | 
 9 | from sphinx.builders.html import StandaloneHTMLBuilder
10 | 
11 | project = "prompttools"
12 | copyright = "2023, Hegel AI"
13 | author = "Hegel AI"
14 | release = "0.0.46"
15 | 
16 | # -- General configuration ---------------------------------------------------
17 | # https://www.sphinx-doc.org/en/master/usage/configuration.html#general-configuration
18 | 
19 | extensions = [
20 |     "sphinx.ext.duration",
21 |     "sphinx.ext.doctest",
22 |     "sphinx.ext.autodoc",
23 |     "sphinx.ext.autosummary",
24 |     "sphinx.ext.intersphinx",
25 |     "sphinx.ext.napoleon",
26 | ]
27 | 
28 | intersphinx_mapping = {
29 |     "python": ("https://docs.python.org/3/", None),
30 |     "sphinx": ("https://www.sphinx-doc.org/en/master/", None),
31 | }
32 | intersphinx_disabled_domains = ["std"]
33 | 
34 | templates_path = ["_templates"]
35 | exclude_patterns = []
36 | 
37 | # -- Options for HTML output -------------------------------------------------
38 | # https://www.sphinx-doc.org/en/master/usage/configuration.html#options-for-html-output
39 | 
40 | html_theme = "furo"
41 | html_static_path = ["_static"]
42 | 
43 | 
44 | # We can enable these options individually if appropriate
45 | html_theme_options = {
46 |     # 'analytics_id': 'G-XXXXXXXXXX',  #  Provided by Google in your dashboard
47 |     # 'analytics_anonymize_ip': False,
48 |     # 'logo_only': False,
49 |     # 'display_version': True,
50 |     # 'prev_next_buttons_location': 'bottom',
51 |     # 'style_external_links': False,
52 |     # 'vcs_pageview_mode': '',
53 |     # 'style_nav_header_background': 'white',
54 |     # Toc options
55 |     # 'collapse_navigation': False,
56 |     # 'sticky_navigation': True,
57 |     # 'navigation_depth': 4,
58 |     # 'includehidden': True,
59 |     # 'titles_only': False
60 | }
61 | 
62 | autodoc_typehints = "description"
63 | 
64 | # -- Options for EPUB output
65 | epub_show_urls = "footnote"
66 | 
67 | StandaloneHTMLBuilder.supported_image_types = ["image/svg+xml", "image/gif", "image/png", "image/jpeg"]
68 | 


--------------------------------------------------------------------------------
/docs/source/experiment.rst:
--------------------------------------------------------------------------------
 1 | Experiment
 2 | ==========================
 3 | 
 4 | .. currentmodule:: prompttools.experiment
 5 | 
 6 | There are two main abstractions used in the ``prompttools`` library: Experiments and Harnesses.
 7 | Occasionally, you may want to use a harness, because it abstracts away more details.
 8 | 
 9 | An experiment is a low level abstraction that takes the Cartesian product of possible inputs to
10 | an LLM API. For example, the ``OpenAIChatExperiment`` accepts lists of inputs for each parameter
11 | of the OpenAI Chat Completion API. Then, it constructs and asynchronously executes requests
12 | using those potential inputs. An example of using experiment is `here <https://github.com/hegelai/prompttools/blob/main/examples/notebooks/BasicExperiment.ipynb>`_.
13 | 
14 | There are two ways to initialize an experiment:
15 | 
16 |   1. Wrap your parameters in ``list``\ s and pass them into the ``__init__`` method. See each class's
17 |      method signature in the "Integrated Experiment APIs" section for details.
18 |   2. Define which parameters should be tested and which ones should be frozen in two dictionaries. Pass the
19 |      dictionaries to the ``initialize`` method. See the ``classmethod initialize`` below for details.
20 | 
21 | The ``Experiment`` superclass's shared API is below.
22 | 
23 | .. autoclass:: Experiment
24 |     :members:
25 | 
26 | Integrated Experiment APIs
27 | -----------------------------
28 | 
29 | LLMs
30 | +++++++++++++++++++++++++++++++++++++++++
31 | 
32 | .. autoclass:: OpenAIChatExperiment
33 | 
34 | .. autoclass:: OpenAICompletionExperiment
35 | 
36 | .. autoclass:: AnthropicCompletionExperiment
37 | 
38 | .. autoclass:: HuggingFaceHubExperiment
39 | 
40 | .. autoclass:: GoogleGeminiChatCompletionExperiment
41 | 
42 | .. autoclass:: GooglePaLMCompletionExperiment
43 | 
44 | .. autoclass:: GoogleVertexChatCompletionExperiment
45 | 
46 | .. autoclass:: MistralChatCompletionExperiment
47 | 
48 | .. autoclass:: LlamaCppExperiment
49 | 
50 | .. autoclass:: ReplicateExperiment
51 | 
52 | Frameworks
53 | +++++++++++++++++++++++++++++++++++++++++
54 | 
55 | .. autoclass:: SequentialChainExperiment
56 | 
57 | .. autoclass:: RouterChainExperiment
58 | 
59 | .. autoclass:: MindsDBExperiment
60 | 
61 | Vector DBs
62 | +++++++++++++++++++++++++++++++++++++++++
63 | 
64 | .. autoclass:: ChromaDBExperiment
65 | 
66 | .. autoclass:: WeaviateExperiment
67 | 
68 | .. autoclass:: LanceDBExperiment
69 | 
70 | .. autoclass:: QdrantExperiment
71 | 
72 | .. autoclass:: PineconeExperiment
73 | 
74 | Computer Vision
75 | +++++++++++++++++++++++++++++++++++++++++
76 | 
77 | .. autoclass:: StableDiffusionExperiment
78 | 
79 | .. autoclass:: ReplicateExperiment
80 | 


--------------------------------------------------------------------------------
/docs/source/harness.rst:
--------------------------------------------------------------------------------
 1 | Harness
 2 | ===========
 3 | 
 4 | .. currentmodule:: prompttools.harness
 5 | 
 6 | There are two main abstractions used in the ``prompttools`` library: Experiments and Harnesses.
 7 | Occasionally, you may want to use a harness, because it abstracts away more details.
 8 | 
 9 | A harness is built on top of an experiment, and manages abstractions over inputs.
10 | For example, the ``PromptTemplateExperimentationHarness`` freezes one set of model arguments
11 | and varies the prompt input based on prompt templates and user inputs. It then constructs
12 | a corresponding experiment, and keeps track of the templates and inputs used for each prompt.
13 | 
14 | .. autoclass:: ExperimentationHarness
15 |     :members:
16 | 
17 | .. autoclass:: ChatHistoryExperimentationHarness
18 | 
19 | .. autoclass:: ChatModelComparisonHarness
20 | 
21 | .. autoclass:: ChatPromptTemplateExperimentationHarness
22 | 
23 | .. autoclass:: ModelComparisonHarness
24 | 
25 | .. autoclass:: MultiExperimentHarness
26 | 
27 | .. autoclass:: PromptTemplateExperimentationHarness
28 | 
29 | .. autoclass::  RetrievalAugmentedGenerationExperimentationHarness
30 | 
31 | .. autoclass:: SystemPromptExperimentationHarness
32 | 


--------------------------------------------------------------------------------
/docs/source/index.rst:
--------------------------------------------------------------------------------
  1 | .. prompttools documentation master file, created by
  2 |    sphinx-quickstart on Sun Jul 16 15:34:13 2023.
  3 |    You can adapt this file completely to your liking, but it should at least
  4 |    contain the root `toctree` directive.
  5 | 
  6 | PromptTools
  7 | ===========
  8 | 
  9 | Welcome to ``prompttools`` created by `Hegel
 10 | AI <https://hegel-ai.com/>`__! This
 11 | `repository <https://github.com/hegelai/prompttools>`_
 12 | offers a set of free, open-source tools for testing and experimenting with prompts.
 13 | The core idea is to enable developers to evaluate prompts using familiar
 14 | interfaces like *code* and *notebooks*.
 15 | 
 16 | .. image:: ../../img/demo.gif
 17 |    :alt: The prompttools notebook demo.
 18 |    :align: center
 19 | 
 20 | There are primarily two ways you can use ``prompttools`` in your LLM workflow:
 21 | 
 22 | 1. Run experiments in `notebooks <https://github.com/hegelai/prompttools/tree/main/examples/notebooks>`_ and evaluate the outputs.
 23 | 2. Turn evaluations into
 24 |    `unit tests <https://github.com/hegelai/prompttools/tree/main/examples/prompttests/test_openai_chat.py>`_ and
 25 |    integrate them into your CI/CD workflow
 26 |    `via Github Actions <https://github.com/hegelai/prompttools/blob/main/.github/workflows/ci.yml>`_.
 27 | 
 28 | Please don't hesitate to star our repo, reach out, and provide feedback on GitHub!
 29 | 
 30 | To stay in touch with us about issues and future updates, join the
 31 | `Discord <https://discord.gg/7KeRPNHGdJ>`__.
 32 | 
 33 | Installation
 34 | ------------
 35 | 
 36 | To install ``prompttools`` using pip:
 37 | 
 38 | .. code:: bash
 39 | 
 40 |    pip install prompttools
 41 | 
 42 | To install from source, first clone this GitHub repo to your local
 43 | machine, then, from the repo, run:
 44 | 
 45 | .. code:: bash
 46 | 
 47 |    pip install .
 48 | 
 49 | You can then proceed to run `our examples <https://github.com/hegelai/prompttools/tree/main/examples/notebooks/>`__.
 50 | 
 51 | Frequently Asked Questions (FAQs)
 52 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 53 | 
 54 | 1. Will this library forward my LLM calls to a server before sending it
 55 |    to OpenAI/Anthropic/etc?
 56 | 
 57 |    -  No, the source code will be executed on your machine. Any call to
 58 |       LLM APIs will be directly executed from your machine without any
 59 |       forwarding.
 60 | 
 61 | Contributing
 62 | ------------
 63 | 
 64 | We welcome PRs and suggestions! Don’t hesitate to open a PR/issue or to
 65 | reach out to us `via email <mailto:team@hegel-ai.com>`__. Please have a
 66 | look at our `contribution guide <https://github.com/hegelai/prompttools/blob/main/CONTRIBUTING.md>`__ and `“Help Wanted”
 67 | issues <https://github.com/hegelai/prompttools/issues?q=is%3Aopen+is%3Aissue+label%3A%22help+wanted%22>`__
 68 | to get started!
 69 | 
 70 | Usage and Feedback
 71 | ------------------
 72 | 
 73 | We will be delighted to work with early adopters to shape our designs.
 74 | Please reach out to us `via email <mailto:team@hegel-ai.com>`__ if
 75 | you’re interested in using this tooling for your project or have any
 76 | feedback.
 77 | 
 78 | License
 79 | -------
 80 | 
 81 | We will be gradually releasing more components to the open-source
 82 | community. The current license can be found in the `LICENSE <https://github.com/hegelai/prompttools/tree/main/LICENSE>`__
 83 | file. If there is any concern, please `contact
 84 | us <mailto:eam@hegel-ai.com>`__ and we will be happy to work with you.
 85 | 
 86 | Module Index
 87 | -------
 88 | 
 89 | * :ref:`modindex`
 90 | 
 91 | .. Hidden TOCs
 92 | 
 93 | .. toctree::
 94 |    :caption: Getting Started
 95 |    :maxdepth: 2
 96 |    :hidden:
 97 | 
 98 |    quickstart
 99 |    usage
100 |    playground
101 | 
102 | .. toctree::
103 |    :caption: Concepts
104 |    :maxdepth: 2
105 |    :hidden:
106 | 
107 |    experiment
108 |    harness
109 |    utils
110 |    testing
111 | 


--------------------------------------------------------------------------------
/docs/source/modules.rst:
--------------------------------------------------------------------------------
1 | prompttools
2 | ===========
3 | 
4 | .. toctree::
5 |    :maxdepth: 4
6 | 
7 |    prompttools
8 | 


--------------------------------------------------------------------------------
/docs/source/playground.rst:
--------------------------------------------------------------------------------
 1 | Playground
 2 | ===========
 3 | 
 4 | If you want to interact with `prompttools` using our playground interface, you can launch it with the following commands.
 5 | 
 6 | .. image:: ../../img/playground.gif
 7 |    :alt: The prompttools playground.
 8 |    :align: center
 9 | 
10 | First, install prompttools:
11 | 
12 | .. code:: bash
13 | 
14 |     pip install prompttools
15 | 
16 | Then, clone the git repo and launch the streamlit app:
17 | 
18 | .. code:: bash
19 | 
20 |     git clone https://github.com/hegelai/prompttools.git
21 |     cd prompttools && streamlit run prompttools/playground/playground.py
22 | 


--------------------------------------------------------------------------------
/docs/source/prompttools.experiment.experiments.rst:
--------------------------------------------------------------------------------
  1 | prompttools.experiment.experiments package
  2 | ==========================================
  3 | 
  4 | Submodules
  5 | ----------
  6 | 
  7 | prompttools.experiment.experiments.anthropic\_claude\_experiment module
  8 | -----------------------------------------------------------------------
  9 | 
 10 | .. automodule:: prompttools.experiment.experiments.anthropic_claude_experiment
 11 |    :members:
 12 |    :undoc-members:
 13 |    :show-inheritance:
 14 | 
 15 | prompttools.experiment.experiments.chromadb\_experiment module
 16 | --------------------------------------------------------------
 17 | 
 18 | .. automodule:: prompttools.experiment.experiments.chromadb_experiment
 19 |    :members:
 20 |    :undoc-members:
 21 |    :show-inheritance:
 22 | 
 23 | prompttools.experiment.experiments.error module
 24 | -----------------------------------------------
 25 | 
 26 | .. automodule:: prompttools.experiment.experiments.error
 27 |    :members:
 28 |    :undoc-members:
 29 |    :show-inheritance:
 30 | 
 31 | prompttools.experiment.experiments.experiment module
 32 | ----------------------------------------------------
 33 | 
 34 | .. automodule:: prompttools.experiment.experiments.experiment
 35 |    :members:
 36 |    :undoc-members:
 37 |    :show-inheritance:
 38 | 
 39 | prompttools.experiment.experiments.google\_palm\_experiment module
 40 | ------------------------------------------------------------------
 41 | 
 42 | .. automodule:: prompttools.experiment.experiments.google_palm_experiment
 43 |    :members:
 44 |    :undoc-members:
 45 |    :show-inheritance:
 46 | 
 47 | prompttools.experiment.experiments.huggingface\_endpoint\_experiment module
 48 | ---------------------------------------------------------------------------
 49 | 
 50 | .. automodule:: prompttools.experiment.experiments.huggingface_endpoint_experiment
 51 |    :members:
 52 |    :undoc-members:
 53 |    :show-inheritance:
 54 | 
 55 | prompttools.experiment.experiments.huggingface\_hub\_experiment module
 56 | ----------------------------------------------------------------------
 57 | 
 58 | .. automodule:: prompttools.experiment.experiments.huggingface_hub_experiment
 59 |    :members:
 60 |    :undoc-members:
 61 |    :show-inheritance:
 62 | 
 63 | prompttools.experiment.experiments.llama\_cpp\_experiment module
 64 | ----------------------------------------------------------------
 65 | 
 66 | .. automodule:: prompttools.experiment.experiments.llama_cpp_experiment
 67 |    :members:
 68 |    :undoc-members:
 69 |    :show-inheritance:
 70 | 
 71 | prompttools.experiment.experiments.openai\_chat\_experiment module
 72 | ------------------------------------------------------------------
 73 | 
 74 | .. automodule:: prompttools.experiment.experiments.openai_chat_experiment
 75 |    :members:
 76 |    :undoc-members:
 77 |    :show-inheritance:
 78 | 
 79 | prompttools.experiment.experiments.openai\_completion\_experiment module
 80 | ------------------------------------------------------------------------
 81 | 
 82 | .. automodule:: prompttools.experiment.experiments.openai_completion_experiment
 83 |    :members:
 84 |    :undoc-members:
 85 |    :show-inheritance:
 86 | 
 87 | prompttools.experiment.experiments.openai\_function\_experiment module
 88 | ----------------------------------------------------------------------
 89 | 
 90 | .. automodule:: prompttools.experiment.experiments.openai_function_experiment
 91 |    :members:
 92 |    :undoc-members:
 93 |    :show-inheritance:
 94 | 
 95 | prompttools.experiment.experiments.vector\_database\_experiment module
 96 | ----------------------------------------------------------------------
 97 | 
 98 | .. automodule:: prompttools.experiment.experiments.vector_database_experiment
 99 |    :members:
100 |    :undoc-members:
101 |    :show-inheritance:
102 | 
103 | Module contents
104 | ---------------
105 | 
106 | .. automodule:: prompttools.experiment.experiments
107 |    :members:
108 |    :undoc-members:
109 |    :show-inheritance:
110 | 


--------------------------------------------------------------------------------
/docs/source/prompttools.experiment.rst:
--------------------------------------------------------------------------------
 1 | prompttools.experiment package
 2 | ==============================
 3 | 
 4 | Subpackages
 5 | -----------
 6 | 
 7 | .. toctree::
 8 |    :maxdepth: 4
 9 | 
10 |    prompttools.experiment.experiments
11 |    prompttools.experiment.widgets
12 | 
13 | Module contents
14 | ---------------
15 | 
16 | .. automodule:: prompttools.experiment
17 |    :members:
18 |    :undoc-members:
19 |    :show-inheritance:
20 | 


--------------------------------------------------------------------------------
/docs/source/prompttools.experiment.widgets.rst:
--------------------------------------------------------------------------------
 1 | prompttools.experiment.widgets package
 2 | ======================================
 3 | 
 4 | Submodules
 5 | ----------
 6 | 
 7 | prompttools.experiment.widgets.comparison module
 8 | ------------------------------------------------
 9 | 
10 | .. automodule:: prompttools.experiment.widgets.comparison
11 |    :members:
12 |    :undoc-members:
13 |    :show-inheritance:
14 | 
15 | prompttools.experiment.widgets.feedback module
16 | ----------------------------------------------
17 | 
18 | .. automodule:: prompttools.experiment.widgets.feedback
19 |    :members:
20 |    :undoc-members:
21 |    :show-inheritance:
22 | 
23 | prompttools.experiment.widgets.utility module
24 | ---------------------------------------------
25 | 
26 | .. automodule:: prompttools.experiment.widgets.utility
27 |    :members:
28 |    :undoc-members:
29 |    :show-inheritance:
30 | 
31 | Module contents
32 | ---------------
33 | 
34 | .. automodule:: prompttools.experiment.widgets
35 |    :members:
36 |    :undoc-members:
37 |    :show-inheritance:
38 | 


--------------------------------------------------------------------------------
/docs/source/prompttools.harness.rst:
--------------------------------------------------------------------------------
 1 | prompttools.harness package
 2 | ===========================
 3 | 
 4 | Submodules
 5 | ----------
 6 | 
 7 | prompttools.harness.chat\_history\_harness module
 8 | -------------------------------------------------
 9 | 
10 | .. automodule:: prompttools.harness.chat_history_harness
11 |    :members:
12 |    :undoc-members:
13 |    :show-inheritance:
14 | 
15 | prompttools.harness.chat\_model\_comparison\_harness module
16 | -----------------------------------------------------------
17 | 
18 | .. automodule:: prompttools.harness.chat_model_comparison_harness
19 |    :members:
20 |    :undoc-members:
21 |    :show-inheritance:
22 | 
23 | prompttools.harness.document\_retrieval\_harness module
24 | -------------------------------------------------------
25 | 
26 | .. automodule:: prompttools.harness.document_retrieval_harness
27 |    :members:
28 |    :undoc-members:
29 |    :show-inheritance:
30 | 
31 | prompttools.harness.function\_call\_harness module
32 | --------------------------------------------------
33 | 
34 | .. automodule:: prompttools.harness.function_call_harness
35 |    :members:
36 |    :undoc-members:
37 |    :show-inheritance:
38 | 
39 | prompttools.harness.harness module
40 | ----------------------------------
41 | 
42 | .. automodule:: prompttools.harness.harness
43 |    :members:
44 |    :undoc-members:
45 |    :show-inheritance:
46 | 
47 | prompttools.harness.multi\_experiment\_harness module
48 | -----------------------------------------------------
49 | 
50 | .. automodule:: prompttools.harness.multi_experiment_harness
51 |    :members:
52 |    :undoc-members:
53 |    :show-inheritance:
54 | 
55 | prompttools.harness.prompt\_template\_harness module
56 | ----------------------------------------------------
57 | 
58 | .. automodule:: prompttools.harness.prompt_template_harness
59 |    :members:
60 |    :undoc-members:
61 |    :show-inheritance:
62 | 
63 | prompttools.harness.system\_prompt\_harness module
64 | --------------------------------------------------
65 | 
66 | .. automodule:: prompttools.harness.system_prompt_harness
67 |    :members:
68 |    :undoc-members:
69 |    :show-inheritance:
70 | 
71 | Module contents
72 | ---------------
73 | 
74 | .. automodule:: prompttools.harness
75 |    :members:
76 |    :undoc-members:
77 |    :show-inheritance:
78 | 


--------------------------------------------------------------------------------
/docs/source/prompttools.mock.rst:
--------------------------------------------------------------------------------
 1 | prompttools.mock package
 2 | ========================
 3 | 
 4 | Submodules
 5 | ----------
 6 | 
 7 | prompttools.mock.mock module
 8 | ----------------------------
 9 | 
10 | .. automodule:: prompttools.mock.mock
11 |    :members:
12 |    :undoc-members:
13 |    :show-inheritance:
14 | 
15 | Module contents
16 | ---------------
17 | 
18 | .. automodule:: prompttools.mock
19 |    :members:
20 |    :undoc-members:
21 |    :show-inheritance:
22 | 


--------------------------------------------------------------------------------
/docs/source/prompttools.prompttest.error.rst:
--------------------------------------------------------------------------------
 1 | prompttools.prompttest.error package
 2 | ====================================
 3 | 
 4 | Submodules
 5 | ----------
 6 | 
 7 | prompttools.prompttest.error.failure module
 8 | -------------------------------------------
 9 | 
10 | .. automodule:: prompttools.prompttest.error.failure
11 |    :members:
12 |    :undoc-members:
13 |    :show-inheritance:
14 | 
15 | Module contents
16 | ---------------
17 | 
18 | .. automodule:: prompttools.prompttest.error
19 |    :members:
20 |    :undoc-members:
21 |    :show-inheritance:
22 | 


--------------------------------------------------------------------------------
/docs/source/prompttools.prompttest.rst:
--------------------------------------------------------------------------------
 1 | prompttools.prompttest package
 2 | ==============================
 3 | 
 4 | Subpackages
 5 | -----------
 6 | 
 7 | .. toctree::
 8 |    :maxdepth: 4
 9 | 
10 |    prompttools.prompttest.error
11 |    prompttools.prompttest.runner
12 | 
13 | Submodules
14 | ----------
15 | 
16 | prompttools.prompttest.prompttest module
17 | ----------------------------------------
18 | 
19 | .. automodule:: prompttools.prompttest.prompttest
20 |    :members:
21 |    :undoc-members:
22 |    :show-inheritance:
23 | 
24 | prompttools.prompttest.threshold\_type module
25 | ---------------------------------------------
26 | 
27 | .. automodule:: prompttools.prompttest.threshold_type
28 |    :members:
29 |    :undoc-members:
30 |    :show-inheritance:
31 | 
32 | Module contents
33 | ---------------
34 | 
35 | .. automodule:: prompttools.prompttest
36 |    :members:
37 |    :undoc-members:
38 |    :show-inheritance:
39 | 


--------------------------------------------------------------------------------
/docs/source/prompttools.prompttest.runner.rst:
--------------------------------------------------------------------------------
 1 | prompttools.prompttest.runner package
 2 | =====================================
 3 | 
 4 | Submodules
 5 | ----------
 6 | 
 7 | prompttools.prompttest.runner.prompt\_template\_runner module
 8 | -------------------------------------------------------------
 9 | 
10 | .. automodule:: prompttools.prompttest.runner.prompt_template_runner
11 |    :members:
12 |    :undoc-members:
13 |    :show-inheritance:
14 | 
15 | prompttools.prompttest.runner.runner module
16 | -------------------------------------------
17 | 
18 | .. automodule:: prompttools.prompttest.runner.runner
19 |    :members:
20 |    :undoc-members:
21 |    :show-inheritance:
22 | 
23 | prompttools.prompttest.runner.system\_prompt\_runner module
24 | -----------------------------------------------------------
25 | 
26 | .. automodule:: prompttools.prompttest.runner.system_prompt_runner
27 |    :members:
28 |    :undoc-members:
29 |    :show-inheritance:
30 | 
31 | Module contents
32 | ---------------
33 | 
34 | .. automodule:: prompttools.prompttest.runner
35 |    :members:
36 |    :undoc-members:
37 |    :show-inheritance:
38 | 


--------------------------------------------------------------------------------
/docs/source/prompttools.requests.rst:
--------------------------------------------------------------------------------
 1 | prompttools.requests package
 2 | ============================
 3 | 
 4 | Submodules
 5 | ----------
 6 | 
 7 | prompttools.requests.request\_queue module
 8 | ------------------------------------------
 9 | 
10 | .. automodule:: prompttools.requests.request_queue
11 |    :members:
12 |    :undoc-members:
13 |    :show-inheritance:
14 | 
15 | prompttools.requests.retries module
16 | -----------------------------------
17 | 
18 | .. automodule:: prompttools.requests.retries
19 |    :members:
20 |    :undoc-members:
21 |    :show-inheritance:
22 | 
23 | Module contents
24 | ---------------
25 | 
26 | .. automodule:: prompttools.requests
27 |    :members:
28 |    :undoc-members:
29 |    :show-inheritance:
30 | 


--------------------------------------------------------------------------------
/docs/source/prompttools.rst:
--------------------------------------------------------------------------------
 1 | prompttools package
 2 | ===================
 3 | 
 4 | Subpackages
 5 | -----------
 6 | 
 7 | .. toctree::
 8 |    :maxdepth: 4
 9 | 
10 |    prompttools.experiment
11 |    prompttools.harness
12 |    prompttools.mock
13 |    prompttools.prompttest
14 |    prompttools.requests
15 |    prompttools.utils
16 | 
17 | Submodules
18 | ----------
19 | 
20 | prompttools.version module
21 | --------------------------
22 | 
23 | .. automodule:: prompttools.version
24 |    :members:
25 |    :undoc-members:
26 |    :show-inheritance:
27 | 
28 | Module contents
29 | ---------------
30 | 
31 | .. automodule:: prompttools
32 |    :members:
33 |    :undoc-members:
34 |    :show-inheritance:
35 | 


--------------------------------------------------------------------------------
/docs/source/prompttools.utils.rst:
--------------------------------------------------------------------------------
 1 | prompttools.utils package
 2 | =========================
 3 | 
 4 | Submodules
 5 | ----------
 6 | 
 7 | prompttools.utils.autoeval module
 8 | ---------------------------------
 9 | 
10 | .. automodule:: prompttools.utils.autoeval
11 |    :members:
12 |    :undoc-members:
13 |    :show-inheritance:
14 | 
15 | prompttools.utils.error module
16 | ------------------------------
17 | 
18 | .. automodule:: prompttools.utils.error
19 |    :members:
20 |    :undoc-members:
21 |    :show-inheritance:
22 | 
23 | prompttools.utils.expected module
24 | ---------------------------------
25 | 
26 | .. automodule:: prompttools.utils.expected
27 |    :members:
28 |    :undoc-members:
29 |    :show-inheritance:
30 | 
31 | prompttools.utils.json module
32 | -----------------------------
33 | 
34 | .. automodule:: prompttools.utils.json
35 |    :members:
36 |    :undoc-members:
37 |    :show-inheritance:
38 | 
39 | prompttools.utils.python module
40 | -------------------------------
41 | 
42 | .. automodule:: prompttools.utils.python
43 |    :members:
44 |    :undoc-members:
45 |    :show-inheritance:
46 | 
47 | prompttools.utils.similarity module
48 | -----------------------------------
49 | 
50 | .. automodule:: prompttools.utils.similarity
51 |    :members:
52 |    :undoc-members:
53 |    :show-inheritance:
54 | 
55 | Module contents
56 | ---------------
57 | 
58 | .. automodule:: prompttools.utils
59 |    :members:
60 |    :undoc-members:
61 |    :show-inheritance:
62 | 


--------------------------------------------------------------------------------
/docs/source/quickstart.rst:
--------------------------------------------------------------------------------
 1 | Quickstart
 2 | ===========
 3 | 
 4 | To install ``prompttools``, you can use ``pip``:
 5 | 
 6 | ::
 7 | 
 8 |    pip install prompttools
 9 | 
10 | You can run a simple example of a ``prompttools`` locally with the following
11 | 
12 | ::
13 | 
14 |     git clone https://github.com/hegelai/prompttools.git
15 |     cd prompttools && jupyter notebook examples/notebooks/OpenAIChatExperiment.ipynb
16 | 
17 | There are many `notebook examples <https://github.com/hegelai/prompttools/tree/main/examples/notebooks>`_ that
18 | you can explore. The README on that page gives you brief description of the examples.
19 | 
20 | If you prefer to have a UI instead of using jupyter notebook, have a look at
21 | our `Playground <./playground.html>`_.
22 | 


--------------------------------------------------------------------------------
/docs/source/setup.rst:
--------------------------------------------------------------------------------
1 | setup module
2 | ============
3 | 
4 | .. automodule:: setup
5 |    :members:
6 |    :undoc-members:
7 |    :show-inheritance:
8 | 


--------------------------------------------------------------------------------
/docs/source/testing.rst:
--------------------------------------------------------------------------------
 1 | Testing and CI/CD
 2 | ===========
 3 | 
 4 | .. currentmodule:: prompttools.prompttest
 5 | 
 6 | After identifying the right evaluation/validation function for the outputs, you
 7 | can easily create unit tests and add them to your CI/CD workflow.
 8 | 
 9 | Unit tests in ``prompttools`` are called ``prompttests``. They use the ``@prompttest`` annotation to transform a
10 | completion function into an efficient unit test. The ``prompttest`` framework executes and evaluates experiments
11 | so you can test prompts over time. For example:
12 | 
13 | .. code-block:: python
14 | 
15 |     import prompttools.prompttest as prompttest
16 | 
17 |     @prompttest.prompttest(
18 |         metric_name="is_valid_json",
19 |         eval_fn=validate_json.evaluate,
20 |         prompts=[create_json_prompt()],
21 |     )
22 |     def json_completion_fn(prompt: str):
23 |         response = None
24 |         if os.getenv("DEBUG", default=False):
25 |             response = mock_openai_completion_fn(**{"prompt": prompt})
26 |         else:
27 |             response = openai.completions.create(prompt)
28 |         return response.choices[0].text
29 | 
30 | 
31 | In the file, be sure to call the ``main()`` method of ``prompttest`` like you would for ``unittest``.
32 | 
33 | .. code-block:: python
34 | 
35 |     if __name__ == "__main__":
36 |         prompttest.main()
37 | 


--------------------------------------------------------------------------------
/docs/source/usage.rst:
--------------------------------------------------------------------------------
  1 | Using the library
  2 | ===========
  3 | 
  4 | There are primarily two ways you can use ``prompttools`` in your LLM
  5 | workflow:
  6 | 
  7 | 1. Run experiments in `notebooks <https://github.com/hegelai/prompttools/tree/main/examples/notebooks/>`__.
  8 | 2. Write `unit tests <https://github.com/hegelai/prompttools/tree/main/examples/prompttests/test_openai_chat.py>`__ and
  9 |    integrate them into your CI/CD workflow `via Github
 10 |    Actions <https://github.com/hegelai/prompttools/tree/main/.github/workflows/post-commit.yaml>`__.
 11 | 
 12 | Notebooks
 13 | ------------
 14 | 
 15 | There are a few different ways to run an experiment in a notebook.
 16 | 
 17 | The simplest way is to define an experimentation harness and an
 18 | evaluation function:
 19 | 
 20 | .. code:: python
 21 | 
 22 |    from prompttools.harness import PromptTemplateExperimentationHarness
 23 | 
 24 | 
 25 |    def eval_fn(prompt: str, results: Dict, metadata: Dict) -> float:
 26 |        # Your logic here, or use a built-in one such as `prompttools.utils.similarity`.
 27 |        pass
 28 | 
 29 |    prompt_templates = [
 30 |        "Answer the following question: {{input}}",
 31 |        "Respond the following query: {{input}}"
 32 |    ]
 33 | 
 34 |    user_inputs = [
 35 |        {"input": "Who was the first president?"},
 36 |        {"input": "Who was the first president of India?"}
 37 |    ]
 38 | 
 39 |    harness = PromptTemplateExperimentationHarness("text-davinci-003",
 40 |                                                   prompt_templates,
 41 |                                                   user_inputs)
 42 | 
 43 | 
 44 |    harness.run()
 45 |    harness.evaluate("metric_name", eval_fn)
 46 |    harness.visualize()  # The results will be displayed as a table in your notebook
 47 | 
 48 | .. figure:: ../img/table.png
 49 |    :alt: The visualized table in your notebook.
 50 | 
 51 | If you are interested to compare different models, the `ModelComparison
 52 | example <https://github.com/hegelai/prompttools/tree/main/examples/notebooks/ModelComparison.ipynb>`__ may be of
 53 | interest.
 54 | 
 55 | For an example of built-in evaluation function, please see this example
 56 | of `semantic similarity
 57 | comparison <https://github.com/hegelai/prompttools/tree/main/examples/notebooks/SemanticSimilarity.ipynb>`__ for
 58 | details.
 59 | 
 60 | You can also manually enter feedback to evaluate prompts, see
 61 | `HumanFeedback.ipynb <https://github.com/hegelai/prompttools/tree/main/examples/notebooks/HumanFeedback.ipynb>`__.
 62 | 
 63 | .. figure:: ../img/feedback.png
 64 |    :alt: You can annotate feedback directly within the notebook.
 65 | 
 66 | ..
 67 | 
 68 |    Note: Above we used an ``ExperimentationHarness``. Under the hood,
 69 |    that harness uses an ``Experiment`` to construct and make API calls
 70 |    to LLMs. The harness is responsible for managing higher level
 71 |    abstractions, like prompt templates or system prompts. To see how
 72 |    experiments work at a low level, `see this
 73 |    example <https://github.com/hegelai/prompttools/tree/main/examples/notebooks/BasicExperiment.ipynb>`__.
 74 | 
 75 | Unit Tests
 76 | ------------
 77 | 
 78 | Unit tests in ``prompttools`` are called ``prompttests``. They use the ``@prompttest`` annotation to transform a
 79 | completion function into an efficient unit test. The ``prompttest`` framework executes and evaluates experiments
 80 | so you can test prompts over time. For example:
 81 | 
 82 | .. code-block:: python
 83 | 
 84 |     import prompttools.prompttest as prompttest
 85 | 
 86 |     @prompttest.prompttest(
 87 |         metric_name="is_valid_json",
 88 |         eval_fn=validate_json.evaluate,
 89 |         prompts=[create_json_prompt()],
 90 |     )
 91 |     def json_completion_fn(prompt: str):
 92 |         response = None
 93 |         if os.getenv("DEBUG", default=False):
 94 |             response = mock_openai_completion_fn(**{"prompt": prompt})
 95 |         else:
 96 |             response = openai.completions.create(prompt)
 97 |         return response.choices[0].text
 98 | 
 99 | 
100 | The evaluation functions should accept one of the following as it's parameters:
101 | 
102 | * ``input_pair: Tuple[str, Dict[str, str]], results: Dict, metadata: Dict``
103 | * ``prompt: str, results: Dict, metadata: Dict``
104 | * ``messages: List[Dict[str,str], results: Dict, metadata: Dict``
105 | 
106 | 
107 | You can see an example
108 | test `here <https://github.com/hegelai/prompttools/tree/main/examples/prompttests/test_openai_chat.py>`__ and an example
109 | of that test being used as a Github Action
110 | `here <https://github.com/hegelai/prompttools/tree/main/.github/workflows/post-commit.yaml>`__.
111 | 


--------------------------------------------------------------------------------
/docs/source/utils.rst:
--------------------------------------------------------------------------------
 1 | Evaluation and Validation
 2 | ===========
 3 | 
 4 | .. currentmodule:: prompttools.utils
 5 | 
 6 | These built-in functions help you to evaluate the outputs of your experiments.
 7 | They can also be used with ``prompttest`` for be part of your CI/CD system.
 8 | 
 9 | .. autofunction:: prompttools.utils.autoeval_binary_scoring
10 | 
11 | .. autofunction:: prompttools.utils.autoeval_scoring
12 | 
13 | .. autofunction:: prompttools.utils.autoeval_with_documents
14 | 
15 | .. autofunction:: prompttools.utils.chunk_text
16 | 
17 | .. autofunction:: prompttools.utils.compute_similarity_against_model
18 | 
19 | .. autofunction:: prompttools.utils.apply_moderation
20 | 
21 | .. autofunction:: prompttools.utils.ranking_correlation
22 | 
23 | .. autofunction:: prompttools.utils.validate_json_response
24 | 
25 | .. autofunction:: prompttools.utils.validate_json.validate_keys
26 | 
27 | .. autofunction:: prompttools.utils.validate_python_response
28 | 
29 | .. autofunction:: prompttools.utils.semantic_similarity
30 | 
31 | .. autofunction:: prompttools.utils.structural_similarity
32 | 
33 | .. autofunction:: prompttools.utils.similarity.compute
34 | 


--------------------------------------------------------------------------------
/examples/notebooks/GoogleGeminiChatExperiment.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# Google Gemini Chat Experiment Example"
  8 |    ]
  9 |   },
 10 |   {
 11 |    "cell_type": "markdown",
 12 |    "metadata": {},
 13 |    "source": [
 14 |     "## Installations"
 15 |    ]
 16 |   },
 17 |   {
 18 |    "cell_type": "code",
 19 |    "execution_count": null,
 20 |    "metadata": {},
 21 |    "outputs": [],
 22 |    "source": [
 23 |     "# !pip install --quiet --force-reinstall prompttools"
 24 |    ]
 25 |   },
 26 |   {
 27 |    "cell_type": "markdown",
 28 |    "metadata": {},
 29 |    "source": [
 30 |     "## Setup imports and API keys"
 31 |    ]
 32 |   },
 33 |   {
 34 |    "cell_type": "markdown",
 35 |    "metadata": {},
 36 |    "source": [
 37 |     "In order for the Google GenAI API to work, you must set up your Google AI Studio credentials (one example in the following cell) or execute this experiment on Google Colab.\n",
 38 |     "\n",
 39 |     "Executing on Google Colab may require the least amount of set-up."
 40 |    ]
 41 |   },
 42 |   {
 43 |    "cell_type": "code",
 44 |    "execution_count": 5,
 45 |    "metadata": {},
 46 |    "outputs": [],
 47 |    "source": [
 48 |     "import google.generativeai as genai\n",
 49 |     "\n",
 50 |     "from google.colab import userdata\n",
 51 |     "\n",
 52 |     "GOOGLE_API_KEY = \"\"  # You can manually set your key\n",
 53 |     "# GOOGLE_API_KEY = userdata.get('GOOGLE_API_KEY')  # Or, you can read it from your account\n",
 54 |     "\n",
 55 |     "genai.configure(api_key=GOOGLE_API_KEY)"
 56 |    ]
 57 |   },
 58 |   {
 59 |    "cell_type": "markdown",
 60 |    "metadata": {},
 61 |    "source": [
 62 |     "Once you succeed in setting up your credential, you should be able to execute the following cell without error and see the list of models you have access to."
 63 |    ]
 64 |   },
 65 |   {
 66 |    "cell_type": "code",
 67 |    "execution_count": null,
 68 |    "metadata": {},
 69 |    "outputs": [],
 70 |    "source": [
 71 |     "for m in genai.list_models():\n",
 72 |     "  if 'generateContent' in m.supported_generation_methods:\n",
 73 |     "    print(m.name)"
 74 |    ]
 75 |   },
 76 |   {
 77 |    "cell_type": "markdown",
 78 |    "metadata": {},
 79 |    "source": [
 80 |     "Then we'll import the relevant `prompttools` modules to setup our experiment."
 81 |    ]
 82 |   },
 83 |   {
 84 |    "cell_type": "markdown",
 85 |    "metadata": {
 86 |     "ExecuteTime": {
 87 |      "end_time": "2023-07-28T21:15:15.360723Z",
 88 |      "start_time": "2023-07-28T21:15:15.230441Z"
 89 |     },
 90 |     "collapsed": true,
 91 |     "jupyter": {
 92 |      "outputs_hidden": true
 93 |     }
 94 |    },
 95 |    "source": [
 96 |     "## Run an experiment"
 97 |    ]
 98 |   },
 99 |   {
100 |    "cell_type": "markdown",
101 |    "metadata": {},
102 |    "source": [
103 |     "Next, we create our test inputs. We can iterate over models (`\"gemini-pro\"` in this case, you can also use the ultra model if you have access to it), contents (equivalent of prompt). You can also experiment with configurations like temperature using `generation_config` or `safety_settings`.\n"
104 |    ]
105 |   },
106 |   {
107 |    "cell_type": "code",
108 |    "execution_count": 31,
109 |    "metadata": {},
110 |    "outputs": [],
111 |    "source": [
112 |     "from prompttools.experiment import GoogleVertexChatCompletionExperiment\n",
113 |     "\n",
114 |     "model = ['gemini-pro']\n",
115 |     "contents = [\"What is the meaning of life?\", \"Who was the first president?\"]\n",
116 |     "\n",
117 |     "experiment = GoogleVertexChatCompletionExperiment(model=model, contents=contents)"
118 |    ]
119 |   },
120 |   {
121 |    "cell_type": "code",
122 |    "execution_count": null,
123 |    "metadata": {},
124 |    "outputs": [],
125 |    "source": [
126 |     "experiment.run()\n",
127 |     "experiment.visualize()"
128 |    ]
129 |   },
130 |   {
131 |    "cell_type": "markdown",
132 |    "metadata": {},
133 |    "source": [
134 |     "## Evaluate the model response"
135 |    ]
136 |   },
137 |   {
138 |    "cell_type": "markdown",
139 |    "metadata": {},
140 |    "source": [
141 |     "Please reference other notebooks (such as Google PaLM 2, Anthropic) for detailed evaluation of the model's response."
142 |    ]
143 |   },
144 |   {
145 |    "cell_type": "code",
146 |    "execution_count": null,
147 |    "metadata": {},
148 |    "outputs": [],
149 |    "source": []
150 |   }
151 |  ],
152 |  "metadata": {
153 |   "kernelspec": {
154 |    "display_name": "Python 3 (ipykernel)",
155 |    "language": "python",
156 |    "name": "python3"
157 |   },
158 |   "language_info": {
159 |    "codemirror_mode": {
160 |     "name": "ipython",
161 |     "version": 3
162 |    },
163 |    "file_extension": ".py",
164 |    "mimetype": "text/x-python",
165 |    "name": "python",
166 |    "nbconvert_exporter": "python",
167 |    "pygments_lexer": "ipython3",
168 |    "version": "3.11.7"
169 |   }
170 |  },
171 |  "nbformat": 4,
172 |  "nbformat_minor": 4
173 | }
174 | 


--------------------------------------------------------------------------------
/examples/notebooks/GoogleVertexChatExperiment.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# Google Vertex Chat Experiment Example"
  8 |    ]
  9 |   },
 10 |   {
 11 |    "cell_type": "markdown",
 12 |    "metadata": {},
 13 |    "source": [
 14 |     "## Installations"
 15 |    ]
 16 |   },
 17 |   {
 18 |    "cell_type": "code",
 19 |    "execution_count": null,
 20 |    "metadata": {},
 21 |    "outputs": [],
 22 |    "source": [
 23 |     "# !pip install --quiet --force-reinstall prompttools"
 24 |    ]
 25 |   },
 26 |   {
 27 |    "cell_type": "markdown",
 28 |    "metadata": {},
 29 |    "source": [
 30 |     "## Setup imports and API keys"
 31 |    ]
 32 |   },
 33 |   {
 34 |    "cell_type": "markdown",
 35 |    "metadata": {},
 36 |    "source": [
 37 |     "In order for the Google Vertex AI API to work, you must set up your Google aiplatform credentials (one example in the following cell) or execute this experiment on https://console.cloud.google.com/vertex-ai/."
 38 |    ]
 39 |   },
 40 |   {
 41 |    "cell_type": "code",
 42 |    "execution_count": 5,
 43 |    "metadata": {},
 44 |    "outputs": [],
 45 |    "source": [
 46 |     "# from google.cloud import aiplatform\n",
 47 |     "\n",
 48 |     "# aiplatform.init(\n",
 49 |     "#     project=project,\n",
 50 |     "#     location=location,\n",
 51 |     "#     experiment=experiment,\n",
 52 |     "#     staging_bucket=staging_bucket,\n",
 53 |     "#     credentials=credentials,\n",
 54 |     "#     encryption_spec_key_name=encryption_spec_key_name,\n",
 55 |     "#     service_account=service_account,\n",
 56 |     "# )"
 57 |    ]
 58 |   },
 59 |   {
 60 |    "cell_type": "markdown",
 61 |    "metadata": {},
 62 |    "source": [
 63 |     "Once you succeed in setting up your credential, you should be able to execute the following cell without error."
 64 |    ]
 65 |   },
 66 |   {
 67 |    "cell_type": "code",
 68 |    "execution_count": null,
 69 |    "metadata": {},
 70 |    "outputs": [],
 71 |    "source": [
 72 |     "from vertexai.preview.language_models import ChatModel, InputOutputTextPair\n",
 73 |     "\n",
 74 |     "chat_model = ChatModel.from_pretrained(\"chat-bison\")"
 75 |    ]
 76 |   },
 77 |   {
 78 |    "cell_type": "markdown",
 79 |    "metadata": {},
 80 |    "source": [
 81 |     "Then we'll import the relevant `prompttools` modules to setup our experiment."
 82 |    ]
 83 |   },
 84 |   {
 85 |    "cell_type": "markdown",
 86 |    "metadata": {
 87 |     "ExecuteTime": {
 88 |      "end_time": "2023-07-28T21:15:15.360723Z",
 89 |      "start_time": "2023-07-28T21:15:15.230441Z"
 90 |     },
 91 |     "collapsed": true
 92 |    },
 93 |    "source": [
 94 |     "## Run an experiment"
 95 |    ]
 96 |   },
 97 |   {
 98 |    "cell_type": "markdown",
 99 |    "metadata": {},
100 |    "source": [
101 |     "Next, we create our test inputs. We can iterate over models (`\"chat-bison\"` in this case), context (equivalent of system message), message (equivalent of prompt). You can also experiment with configurations like temperature.\n"
102 |    ]
103 |   },
104 |   {
105 |    "cell_type": "code",
106 |    "execution_count": 31,
107 |    "metadata": {},
108 |    "outputs": [],
109 |    "source": [
110 |     "from prompttools.experiment import GoogleVertexChatCompletionExperiment\n",
111 |     "\n",
112 |     "\n",
113 |     "model = [\"chat-bison\"]\n",
114 |     "\n",
115 |     "context = [\"You are a helpful assistant.\",\n",
116 |     "           \"Answer the following question only if you know the answer or can make a well-informed guess; otherwise tell me you don't know it. In addition, explain your reasoning of your final answer.\"]\n",
117 |     "\n",
118 |     "message = [\n",
119 |     "    \"Is 97 a prime number?\",\n",
120 |     "    \"Is 17077 a prime number?\",\n",
121 |     "]\n",
122 |     "\n",
123 |     "experiment = GoogleVertexChatCompletionExperiment(model=model, message=message, context=context)"
124 |    ]
125 |   },
126 |   {
127 |    "cell_type": "code",
128 |    "execution_count": null,
129 |    "metadata": {},
130 |    "outputs": [],
131 |    "source": [
132 |     "experiment.run()\n",
133 |     "experiment.visualize()"
134 |    ]
135 |   },
136 |   {
137 |    "cell_type": "markdown",
138 |    "metadata": {},
139 |    "source": [
140 |     "## Evaluate the model response"
141 |    ]
142 |   },
143 |   {
144 |    "cell_type": "markdown",
145 |    "metadata": {},
146 |    "source": [
147 |     "Please reference other notebooks (such as Google PaLM 2, Anthropic) for detailed evaluation of the model's response."
148 |    ]
149 |   }
150 |  ],
151 |  "metadata": {
152 |   "kernelspec": {
153 |    "display_name": "Python 3 (ipykernel)",
154 |    "language": "python",
155 |    "name": "python3"
156 |   },
157 |   "language_info": {
158 |    "codemirror_mode": {
159 |     "name": "ipython",
160 |     "version": 3
161 |    },
162 |    "file_extension": ".py",
163 |    "mimetype": "text/x-python",
164 |    "name": "python",
165 |    "nbconvert_exporter": "python",
166 |    "pygments_lexer": "ipython3",
167 |    "version": "3.11.4"
168 |   }
169 |  },
170 |  "nbformat": 4,
171 |  "nbformat_minor": 1
172 | }
173 | 


--------------------------------------------------------------------------------
/examples/notebooks/README.md:
--------------------------------------------------------------------------------
 1 | ## Notebook Examples
 2 | 
 3 | In this folder, you will find various examples of how you can use `prompttools` for
 4 | various experimentation and testing. Often, you can simply change a few parameters
 5 | and put in your own test data to make `prompttools` suitable for your use case.
 6 | 
 7 | If you have additional use case in mind or spot an issue, please open an issue
 8 | and we will be happy to discuss.
 9 | 
10 | We also welcome community contribution of usage examples! Please open a PR if you
11 | have something to share.
12 | 
13 | ### LLM
14 | 
15 | #### Single Model Examples
16 | - [OpenAI Chat Experiment](OpenAIChatExperiment.ipynb) shows how you can experiment with OpenAI with different models and parameters.
17 | - [OpenAI Chat Function Experiment](OpenAIChatFunctionExperiment.ipynb) shows how you can experiment with OpenAI's function calling API.
18 | - [Anthropic Experiment](AnthropicExperiment.ipynb) shows how you can experiment with Anthropic Claude with different models and parameters.
19 | - [Google PaLM 2 Text Completion](PaLM2Experiment.ipynb)
20 |   and [Google Vertex AI Chat Completion](GoogleVertexChatExperiment.ipynb) utilizes Google's LLM models.
21 | - [LLaMA Cpp Experiment](LlamaCppExperiment.ipynb) executes LLaMA locally with various parameters and see how it does.
22 | - [LLaMA Cpp Experiment](LlamaCppExperiment.ipynb) executes LLaMA locally with various parameters and see how it does.
23 | - [HuggingFace Hub](HuggingFaceHub.ipynb) compares different OSS models hosted on HuggingFace.
24 | - [GPT-4 Regression](GPT4RegressionTesting.ipynb) examines how the current version GPT-4 model compares with the older, frozen versions.
25 | 
26 | #### Head To Head Model Comparison
27 | 
28 | - [Model Comparison](ModelComparison.ipynb) shows how you can compare two OpenAI models.
29 | - [GPT4 vs LLaMA2](GPT4vsLlama2.ipynb) allows you understand if LLaMA might be enough for your use case.
30 | - [LLaMA Head To Head](LlamaHeadToHead.ipynb) presents a match-up between LLaMA 1 and LLaMA 2!
31 | 
32 | #### Evaluation
33 | - [Auto Evaluation](AutoEval.ipynb) presents an example of how you can use another LLM to evaluate responses.
34 | - [Structured Output](StructuredOutput.ipynb) validates the model outputs adhere to your desired structured format.
35 | - [Semantic Similarity](SemanticSimilarity.ipynb) evaluates your model outputs compared to ideal outputs.
36 | - [Human Feedback](HumanFeedback.ipynb) allows you to provide human feedback to your outputs.
37 | 
38 | 
39 | ### Vector Databases
40 | 
41 | - [Retrieval Augmented Generation](vectordb_experiments/RetrievalAugmentedGeneration.ipynb) combines a vector database
42 |   experiment with LLM to evaluate the whole RAG process.
43 | - [ChromaDB Experiment](vectordb_experiments/ChromaDBExperiment.ipynb) demonstrates how to experiment with different
44 |   embedding functions and query parameters of `Chroma`. The example evaluates the results by computing the
45 |   ranking correlation against an expected output.
46 | - [Weaviate Experiment](vectordb_experiments/WeaviateExperiment.ipynb) shows how you can easily try different vectorizers, configuration,
47 |   and query functions, and compare the final results.
48 | - [LanceDB Experiment](vectordb_experiments/LanceDBExperiment.ipynb) allows you to try different embedding functions, and query methods.
49 | - [Qdrant Experiment](vectordb_experiments/QdrantExperiment.ipynb) explores different ways to query Qdrant, including with vectors.
50 | - [Pinecone Experiment](vectordb_experiments/PineconeExperiment.ipynb) looks into different ways to add data into and query from Pinecone.
51 | 
52 | ### Frameworks
53 | 
54 | - [LangChain Sequential Chain Experiment](frameworks/LangChainSequentialChainExperiment.ipynb)
55 | - [LangChain Router Chain Experiment](frameworks/LangChainRouterChainExperiment.ipynb)
56 | - [MindsDB Experiment](frameworks/MindsDBExperiment.ipynb)
57 | 
58 | ### Computer Vision
59 | - [Stable Diffusion](image_experiments/StableDiffusion.ipynb)
60 | - [Replicate's hosted Stable Diffusion](image_experiments/ReplicateStableDiffusion.ipynb)
61 | 


--------------------------------------------------------------------------------
/examples/notebooks/audio_experiments/sample_audio_files/80s_billy_joel.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hegelai/prompttools/2446cc9e629fef0a82553ec338c20a203f0688b3/examples/notebooks/audio_experiments/sample_audio_files/80s_billy_joel.wav


--------------------------------------------------------------------------------
/examples/notebooks/frameworks/LangChainRouterChainExperiment.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# Lang Chain Router Chain Experiment Example"
  8 |    ]
  9 |   },
 10 |   {
 11 |    "cell_type": "code",
 12 |    "execution_count": 1,
 13 |    "metadata": {},
 14 |    "outputs": [
 15 |     {
 16 |      "ename": "ImportError",
 17 |      "evalue": "cannot import name 'RouterChainExperiment' from 'prompttools.experiment' (/home/hashem/.local/lib/python3.10/site-packages/prompttools/experiment/__init__.py)",
 18 |      "output_type": "error",
 19 |      "traceback": [
 20 |       "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
 21 |       "\u001b[0;31mImportError\u001b[0m                               Traceback (most recent call last)",
 22 |       "Input \u001b[0;32mIn [1]\u001b[0m, in \u001b[0;36m<cell line: 1>\u001b[0;34m()\u001b[0m\n\u001b[0;32m----> 1\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mprompttools\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mexperiment\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m RouterChainExperiment\n\u001b[1;32m      2\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mlangchain\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mllms\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m OpenAI\n\u001b[1;32m      4\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mprompttools\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mutils\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m semantic_similarity\n",
 23 |       "\u001b[0;31mImportError\u001b[0m: cannot import name 'RouterChainExperiment' from 'prompttools.experiment' (/home/hashem/.local/lib/python3.10/site-packages/prompttools/experiment/__init__.py)"
 24 |      ]
 25 |     }
 26 |    ],
 27 |    "source": [
 28 |     "from prompttools.experiment import RouterChainExperiment\n",
 29 |     "from langchain.llms import OpenAI\n",
 30 |     "\n",
 31 |     "from prompttools.utils import semantic_similarity\n",
 32 |     "\n",
 33 |     "\n",
 34 |     "cooking_template = \"\"\"You are a well versed chef. \\\n",
 35 |     "You enjoy baking bread, \\\n",
 36 |     "explain how to make good sushi.\n",
 37 |     "\n",
 38 |     "Here is a question:\n",
 39 |     "{input}\"\"\"\n",
 40 |     "\n",
 41 |     "\n",
 42 |     "restaurant_template = \"\"\"You are a fantastic restaurateur. \\\n",
 43 |     "You specialize in restaurant operations in New York City.\n",
 44 |     "\n",
 45 |     "Here is a question:\n",
 46 |     "{input}\"\"\"\n",
 47 |     "\n",
 48 |     "prompt_infos = [\n",
 49 |     "    [\n",
 50 |     "        {\n",
 51 |     "            \"name\": \"cooking\",\n",
 52 |     "            \"description\": \"Good for answering questions about cooking\",\n",
 53 |     "            \"prompt_template\": cooking_template,\n",
 54 |     "        },\n",
 55 |     "        {\n",
 56 |     "            \"name\": \"restaurant\",\n",
 57 |     "            \"description\": \"Good for building a restaurant\",\n",
 58 |     "            \"prompt_template\": restaurant_template,\n",
 59 |     "        },\n",
 60 |     "    ],\n",
 61 |     "]\n",
 62 |     "\n",
 63 |     "\n",
 64 |     "experiment = RouterChainExperiment(\n",
 65 |     "    llm=[OpenAI],\n",
 66 |     "    prompt=[\"How can I make a delicious smoothie?\"],\n",
 67 |     "    prompt_infos=prompt_infos,\n",
 68 |     "    **{\n",
 69 |     "        \"temperature\": [0.1, 0.9],\n",
 70 |     "    },\n",
 71 |     ")\n",
 72 |     "\n",
 73 |     "expected = \"\"\"You will need fruit of your choice, a blender, and some ice. Place about one cup of fruit in the blender.\n",
 74 |     "Place about a quarter cup of ice in the blender. Blend to your desired consistency.\"\"\"\n",
 75 |     "\n",
 76 |     "experiment.run()\n",
 77 |     "\n",
 78 |     "experiment.evaluate(\"similar_to_expected\", semantic_similarity, expected=[expected] * 2)\n",
 79 |     "\n",
 80 |     "experiment.visualize()"
 81 |    ]
 82 |   }
 83 |  ],
 84 |  "metadata": {
 85 |   "kernelspec": {
 86 |    "display_name": "Python 3 (ipykernel)",
 87 |    "language": "python",
 88 |    "name": "python3"
 89 |   },
 90 |   "language_info": {
 91 |    "codemirror_mode": {
 92 |     "name": "ipython",
 93 |     "version": 3
 94 |    },
 95 |    "file_extension": ".py",
 96 |    "mimetype": "text/x-python",
 97 |    "name": "python",
 98 |    "nbconvert_exporter": "python",
 99 |    "pygments_lexer": "ipython3",
100 |    "version": "3.10.12"
101 |   },
102 |   "vscode": {
103 |    "interpreter": {
104 |     "hash": "eec05f12730ef3ef66f433616fcd3cfdacd3dcf1f1c49c706eaa0465be8f325b"
105 |    }
106 |   }
107 |  },
108 |  "nbformat": 4,
109 |  "nbformat_minor": 2
110 | }
111 | 


--------------------------------------------------------------------------------
/examples/notebooks/image_experiments/compare_images_folder/A panda writing code in the Swiss Alps3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hegelai/prompttools/2446cc9e629fef0a82553ec338c20a203f0688b3/examples/notebooks/image_experiments/compare_images_folder/A panda writing code in the Swiss Alps3.png


--------------------------------------------------------------------------------
/examples/notebooks/image_experiments/compare_images_folder/A_fruit_basket_on_the_moon.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hegelai/prompttools/2446cc9e629fef0a82553ec338c20a203f0688b3/examples/notebooks/image_experiments/compare_images_folder/A_fruit_basket_on_the_moon.png


--------------------------------------------------------------------------------
/examples/notebooks/image_experiments/stablediffusion_images/An_apple_orchard.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hegelai/prompttools/2446cc9e629fef0a82553ec338c20a203f0688b3/examples/notebooks/image_experiments/stablediffusion_images/An_apple_orchard.png


--------------------------------------------------------------------------------
/examples/notebooks/image_experiments/stablediffusion_images/Just_a_fruit_basket.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hegelai/prompttools/2446cc9e629fef0a82553ec338c20a203f0688b3/examples/notebooks/image_experiments/stablediffusion_images/Just_a_fruit_basket.png


--------------------------------------------------------------------------------
/examples/prompttests/test_chromadb.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Hegel AI, Inc.
 2 | # All rights reserved.
 3 | #
 4 | # This source code's license can be found in the
 5 | # LICENSE file in the root directory of this source tree.
 6 | 
 7 | import os
 8 | from typing import Dict, Tuple
 9 | import prompttools.prompttest as prompttest
10 | from prompttools.utils import similarity
11 | from prompttools.experiment import ChromaDBExperiment
12 | 
13 | EXPECTED = {"Who was the first president of the USA?": "George Washington"}
14 | 
15 | if not (("CHROMADB_API_TOKEN" in os.environ) or ("DEBUG" in os.environ)):  # placeholder api naming
16 |     print("Error: This example requires you to set either your CHROMADB_API_TOKEN or DEBUG=1")
17 |     exit(1)
18 | 
19 | 
20 | def extract_chromadb_dists(output: Dict[str, object]) -> list[str]:
21 |     return output
22 | 
23 | 
24 | def measure_fn():  # TODO: Do we want to build a separate framework from prompttest that handles vectors?
25 |     pass
26 | 


--------------------------------------------------------------------------------
/examples/prompttests/test_huggingface_hub.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Hegel AI, Inc.
 2 | # All rights reserved.
 3 | #
 4 | # This source code's license can be found in the
 5 | # LICENSE file in the root directory of this source tree.
 6 | 
 7 | import os
 8 | import jinja2
 9 | import prompttools.prompttest as prompttest
10 | from prompttools.utils import similarity
11 | from prompttools.prompttest.threshold_type import ThresholdType
12 | from prompttools.mock.mock import mock_hf_completion_fn
13 | from huggingface_hub.inference_api import InferenceApi
14 | 
15 | if not (("HUGGINGFACEHUB_API_TOKEN" in os.environ) or ("DEBUG" in os.environ)):
16 |     print("Error: This example requires you to set either your HUGGINGFACEHUB_API_TOKEN or DEBUG=1")
17 |     exit(1)
18 | 
19 | 
20 | client = InferenceApi(
21 |     repo_id="google/flan-t5-xxl",
22 |     token=os.environ.get("HUGGINGFACEHUB_API_TOKEN"),
23 |     task="text2text-generation",
24 | )
25 | 
26 | 
27 | def create_prompt():
28 |     prompt_template = "Answer the following question: {{ input }}"
29 |     user_input = {"input": "Who was the first president of the USA?"}
30 |     environment = jinja2.Environment()
31 |     template = environment.from_string(prompt_template)
32 |     return template.render(**user_input)
33 | 
34 | 
35 | @prompttest.prompttest(
36 |     metric_name="similar_to_expected",
37 |     eval_fn=similarity.evaluate,
38 |     prompts=[create_prompt()],
39 |     expected=["George Washington"],
40 |     threshold=1.0,
41 |     threshold_type=ThresholdType.MAXIMUM,
42 | )
43 | def completion_fn(prompt: str):
44 |     response = None
45 |     if os.getenv("DEBUG", default=False):
46 |         response = mock_hf_completion_fn(**{"inputs": prompt})
47 |     else:
48 |         response = client(inputs=prompt)
49 |     return response[0]["generated_text"]
50 | 
51 | 
52 | if __name__ == "__main__":
53 |     prompttest.main()
54 | 


--------------------------------------------------------------------------------
/examples/prompttests/test_openai_chat.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Hegel AI, Inc.
 2 | # All rights reserved.
 3 | #
 4 | # This source code's license can be found in the
 5 | # LICENSE file in the root directory of this source tree.
 6 | 
 7 | import os
 8 | import openai
 9 | import jinja2
10 | from prompttools import prompttest
11 | from prompttools.prompttest.threshold_type import ThresholdType
12 | from prompttools.utils import similarity
13 | from prompttools.utils import validate_json
14 | from prompttools.mock.mock import mock_openai_completion_fn
15 | 
16 | 
17 | if not (("OPENAI_API_KEY" in os.environ) or ("DEBUG" in os.environ)):
18 |     print("Error: This example requires you to set either your OPENAI_API_KEY or DEBUG=1")
19 |     exit(1)
20 | 
21 | 
22 | def create_json_prompt():
23 |     prompt_template = "Answer the following question using a valid JSON format: {{ input }}"
24 |     user_input = {"input": "Who was the first president?"}
25 |     environment = jinja2.Environment()
26 |     template = environment.from_string(prompt_template)
27 |     return template.render(**user_input)
28 | 
29 | 
30 | def create_prompt():
31 |     prompt_template = "Answer the following question: {{ input }}"
32 |     user_input = {"input": "Who was the first president of the USA?"}
33 |     environment = jinja2.Environment()
34 |     template = environment.from_string(prompt_template)
35 |     return template.render(**user_input)
36 | 
37 | 
38 | @prompttest.prompttest(
39 |     metric_name="is_valid_json",
40 |     eval_fn=validate_json.evaluate,
41 |     prompts=[create_json_prompt()],
42 | )
43 | def json_completion_fn(prompt: str):
44 |     if os.getenv("DEBUG", default=False):
45 |         response = mock_openai_completion_fn(**{"prompt": prompt})
46 |     else:
47 |         response = openai.completions.create(model="babbage-002", prompt=prompt)
48 |     return response.choices[0].text
49 | 
50 | 
51 | @prompttest.prompttest(
52 |     metric_name="similar_to_expected",
53 |     eval_fn=similarity.evaluate,
54 |     prompts=[create_prompt()],
55 |     expected=["George Washington"],
56 |     threshold=1.0,
57 |     threshold_type=ThresholdType.MAXIMUM,
58 | )
59 | def completion_fn(prompt: str):
60 |     if os.getenv("DEBUG", default=False):
61 |         response = mock_openai_completion_fn(**{"prompt": prompt})
62 |     else:
63 |         response = openai.completions.create(model="babbage-002", prompt=prompt)
64 |     return response.choices[0].text
65 | 
66 | 
67 | if __name__ == "__main__":
68 |     prompttest.main()
69 | 


--------------------------------------------------------------------------------
/examples/prompttests/test_qdrant.py:
--------------------------------------------------------------------------------
 1 | import hashlib
 2 | from typing import List
 3 | 
 4 | from qdrant_client import QdrantClient
 5 | 
 6 | from prompttools.experiment import QdrantExperiment
 7 | 
 8 | 
 9 | def embedding_function(text: str) -> List[float]:
10 |     r"""
11 |     Create vector embedding from text. This is a dummy function for testing purposes
12 |     and returns a vector of 16 floats.
13 | 
14 |     Args:
15 |         text (str): Text to be vectorized
16 |     Returns:
17 |         List[float]: Vector embedding of the text
18 |     """
19 |     import numpy as np
20 |     import struct
21 | 
22 |     vectorized_text = np.abs(
23 |         np.array(struct.unpack(">ffffffffffffffff", hashlib.sha512(text.encode("utf-8")).digest()))
24 |     )
25 |     normalized_vector = vectorized_text / np.linalg.norm(vectorized_text)
26 |     return normalized_vector.tolist()
27 | 
28 | 
29 | test_parameters = {
30 |     "collection_params": {
31 |         "vectors_config__distance": ["Cosine", "Euclid", "Dot"],
32 |         "hnsw_config__m": [16, 32, 64, 128],
33 |     },
34 |     "query_params": {
35 |         "search_params__hnsw_ef": [1, 16, 32, 64, 128],
36 |         "search_params__exact": [True, False],
37 |     },
38 | }
39 | frozen_parameters = {
40 |     # Run Qdrant server locally with:
41 |     # docker run -p "6333:6333" -p "6334:6334" qdrant/qdrant:v1.4.0
42 |     "client": QdrantClient("http://localhost:6333"),
43 |     "collection_name": "test_collection",
44 |     "embedding_fn": embedding_function,
45 |     "vector_size": 16,
46 |     "documents": ["test document 1", "test document 2"],
47 |     "queries": ["test query 1", "test query 2"],
48 | }
49 | experiment = QdrantExperiment.initialize(test_parameters=test_parameters, frozen_parameters=frozen_parameters)
50 | experiment.run()
51 | 
52 | print(experiment.get_table(True))
53 | 


--------------------------------------------------------------------------------
/img/demo.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hegelai/prompttools/2446cc9e629fef0a82553ec338c20a203f0688b3/img/demo.gif


--------------------------------------------------------------------------------
/img/feedback.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hegelai/prompttools/2446cc9e629fef0a82553ec338c20a203f0688b3/img/feedback.png


--------------------------------------------------------------------------------
/img/hegel_ai_logo.svg:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <svg id="Layer_1" data-name="Layer 1" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 2584 1080">
 3 |   <defs>
 4 |     <style>
 5 |       .cls-1 {
 6 |         fill: #c6c5c5;
 7 |       }
 8 | 
 9 |       .cls-2 {
10 |         fill: #1e1b1d;
11 |       }
12 | 
13 |       .cls-3 {
14 |         fill: #771441;
15 |       }
16 |     </style>
17 |   </defs>
18 |   <g>
19 |     <path class="cls-2" d="m957.09,682.36v-268.76h44.98v113.11h127.29v-113.11h44.79v268.76h-44.79v-113.48h-127.29v113.48h-44.98Z"/>
20 |     <path class="cls-2" d="m1387.3,594.07h22.02c1.74-24.14-.93-45.1-8.03-62.9-7.09-17.79-17.95-31.57-32.57-41.34-14.62-9.77-32.38-14.65-53.29-14.65-19.78,0-37.21,4.48-52.26,13.44-15.06,8.96-26.82,21.56-35.28,37.79-8.46,16.24-12.69,35.37-12.69,57.39,0,20.41,4.32,38.45,12.97,54.13,8.65,15.68,20.68,27.94,36.12,36.77,15.43,8.83,33.34,13.25,53.75,13.25s37.17-5.04,53.29-15.12c16.11-10.08,28.03-24.2,35.74-42.37l-44.42-13.44c-4.36,9.33-10.54,16.46-18.57,21.37-8.03,4.92-17.58,7.37-28.65,7.37-17.17,0-30.24-5.6-39.2-16.8-7.03-8.79-11.29-20.43-12.8-34.9h123.86Zm-69.99-79.51c16.3,0,28.24,5.04,35.83,15.12,5.39,7.16,8.89,17.31,10.51,30.42h-99.12c2.06-11.65,5.95-21.15,11.71-28.46,8.96-11.39,22.65-17.08,41.06-17.08Z"/>
21 |     <path class="cls-2" d="m1592.43,480.78v14.41c-2.39-2.17-4.91-4.21-7.56-6.11-13-9.27-29.09-13.91-48.25-13.91s-35.59,4.64-49.65,13.91c-14.06,9.27-24.92,21.9-32.57,37.89-7.65,15.99-11.48,34.19-11.48,54.59s3.79,38.26,11.38,54.31c7.59,16.05,18.29,28.74,32.1,38.08,13.81,9.33,29.99,14,48.53,14,19.66,0,36.14-4.67,49.46-14,.82-.58,1.63-1.17,2.43-1.77v19.12c.25,9.58-1.65,17.57-5.69,23.98-4.04,6.41-9.74,11.26-17.08,14.56-7.34,3.3-15.68,4.95-25.01,4.95-7.96,0-15.46-1.77-22.49-5.32-7.03-3.55-12.48-8.99-16.33-16.33l-41.43,20.53c5.23,9.21,11.95,17.08,20.16,23.61s17.45,11.54,27.72,15.02c10.26,3.48,21,5.23,32.2,5.23,15.18,0,29.02-2.05,41.53-6.16,12.51-4.11,22.99-10.36,31.45-18.76,8.46-8.4,14.19-19.13,17.17-32.2.99-4.48,1.71-9.05,2.15-13.72.43-4.67.65-9.55.65-14.65v-211.28h-39.38Zm-21.37,158.55c-7.4,5.54-17.14,8.3-29.21,8.3s-21.34-2.92-28.93-8.77c-7.59-5.85-13.22-13.78-16.89-23.8-3.67-10.01-5.51-21.18-5.51-33.5s1.87-23.3,5.6-33.32c3.73-10.01,9.52-17.98,17.36-23.89,7.84-5.91,17.79-8.87,29.86-8.87,11.45,0,20.78,2.77,28,8.31,7.21,5.54,12.53,13.25,15.96,23.14,3.42,9.89,5.13,21.43,5.13,34.62s-1.71,24.58-5.13,34.53c-3.42,9.96-8.84,17.7-16.24,23.24Z"/>
22 |     <path class="cls-2" d="m1844.95,594.07h22.02c1.74-24.14-.93-45.1-8.03-62.9-7.09-17.79-17.95-31.57-32.57-41.34-14.62-9.77-32.38-14.65-53.29-14.65-19.78,0-37.21,4.48-52.26,13.44-15.06,8.96-26.82,21.56-35.28,37.79-8.46,16.24-12.69,35.37-12.69,57.39,0,20.41,4.32,38.45,12.97,54.13,8.65,15.68,20.68,27.94,36.12,36.77,15.43,8.83,33.34,13.25,53.75,13.25s37.17-5.04,53.29-15.12c16.11-10.08,28.03-24.2,35.74-42.37l-44.42-13.44c-4.36,9.33-10.54,16.46-18.57,21.37-8.03,4.92-17.58,7.37-28.65,7.37-17.17,0-30.24-5.6-39.2-16.8-7.03-8.79-11.29-20.43-12.8-34.9h123.86Zm-69.99-79.51c16.3,0,28.24,5.04,35.83,15.12,5.39,7.16,8.89,17.31,10.51,30.42h-99.12c2.06-11.65,5.95-21.15,11.71-28.46,8.96-11.39,22.65-17.08,41.06-17.08Z"/>
23 |     <path class="cls-2" d="m1912.51,682.36v-274.36h44.98v274.36h-44.98Z"/>
24 |     <path class="cls-1" d="m2192.11,682.36h22.77l-95.19-268.76h-26.69l-95.19,268.76h22.77l23.84-67.56h123.69l23.99,67.56Zm-140.5-87.91l54.48-154.37,54.81,154.37h-109.29Z"/>
25 |     <path class="cls-1" d="m2257.21,682.36v-268.76h21.46v268.76h-21.46Z"/>
26 |   </g>
27 |   <path class="cls-3" d="m793.3,259.14c-29.39,0-57.38,6.07-82.81,17-30.93-25.54-68.98-39.5-108.98-39.64-.21,0-.41,0-.62,0-24.41,0-48.04,4.95-70.23,14.72-13.9,6.12-26.87,14.11-38.59,23.66-24.62-10.13-51.57-15.74-79.8-15.74v39.61c17.37,0,34.15,2.62,49.96,7.46-5.15,6.99-9.8,14.39-13.86,22.15-17.81,34-23.97,73.6-17.35,111.52,1.62,9.3,3.66,20.68,6.09,33.72-.21.05-.41.1-.62.16-23.65,6.07-47.04,20.2-67.66,40.86-18.34,18.39-33.74,41-44.54,65.4-3.34,7.15-39.28,88.97-2.59,223.72l9.94,39.77,16.31-.87c35.84-1.9,63.93-12.98,93.93-37.04,13.75-11.03,39.18-38.19,54.98-96.2,2.34-8.58,5.9-34.17,6.46-41.1,3.44-42.51-4.21-87.36-12.99-129.5-1.81-8.68-3.53-17.11-5.15-25.23,8.24,2.82,16.35,7.27,24.28,13.33,18.33,14.02,30.19,39.24,35.23,74.95,5.17,36.56,1.52,72.64-1.12,90.9-3.32,22.98-9.29,44.64-17.76,64.4-10.95,25.53-25.78,43.75-33.65,52.35l-30.39,33.18h106.82c48.34,0,89.49-22.66,122.29-67.35,24.61-33.53,44.44-79.71,58.95-137.26,24.2-95.99,25.51-190.3,25.56-194.26v-31.13c0-38.3-12.37-74.96-35.22-105.43,16.73-5.5,34.6-8.48,53.15-8.48v-39.61Zm-246.67,28.33c17.27-7.6,35.69-11.48,54.75-11.36,25.46.09,49.94,7.5,71.17,21.24-29.9,21.04-54.09,49.67-69.76,83.06-16.06-34.23-41.08-63.45-72.02-84.63,5.04-3.1,10.32-5.88,15.86-8.32Zm-82.79,377.63c-.5,6.11-3.81,28.79-5.2,33.88-13.33,48.96-33.49,69.25-41.54,75.71-18.81,15.08-35.3,23.2-55.17,26.63l-1.85-7.4-.11-.41c-33.51-122.85-.13-196.01.17-196.64l.29-.63c18.44-41.78,51.31-74.39,84.15-83.65,2.15,10.86,4.47,22.33,6.96,34.27,8.17,39.24,15.32,80.72,12.29,118.23Zm271.93-252.44v30.77c-.05,3.42-1.67,95.3-24.5,185.52-13.27,52.41-30.92,93.84-52.48,123.12-25.24,34.3-54.75,50.97-90.2,50.97h-22.22c5.49-8.83,10.99-18.98,15.84-30.3,9.84-22.93,16.75-47.95,20.57-74.35,2.94-20.34,6.99-60.66,1.14-102.1-6.55-46.37-23.51-80.31-50.39-100.87-17.79-13.61-36.9-21.76-56.57-24.3-2.79-14.89-5.1-27.78-6.9-38.07-7.11-40.69,4.45-80.79,29.4-110.32,49.98,29.81,83.54,84.41,83.54,146.72h39.61c0-61.11,32.28-114.81,80.68-144.98,21.02,24.85,32.49,55.78,32.49,88.18Z"/>
28 | </svg>
29 | 


--------------------------------------------------------------------------------
/img/hegel_ai_logo_dark.svg:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <svg id="Layer_1" data-name="Layer 1" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 2584 1080">
 3 |   <defs>
 4 |     <style>
 5 |       .cls-1 {
 6 |         fill: #f7f6f6;
 7 |       }
 8 | 
 9 |       .cls-2 {
10 |         fill: #3d3e3f;
11 |       }
12 | 
13 |       .cls-3 {
14 |         fill: #771441;
15 |       }
16 |     </style>
17 |   </defs>
18 |   <path class="cls-1" d="m957.09,682.36v-268.76h44.98v113.11h127.29v-113.11h44.79v268.76h-44.79v-113.48h-127.29v113.48h-44.98Z"/>
19 |   <path class="cls-1" d="m1387.3,594.07h22.02c1.74-24.14-.93-45.1-8.03-62.9-7.09-17.79-17.95-31.57-32.57-41.34-14.62-9.77-32.38-14.65-53.29-14.65-19.78,0-37.21,4.48-52.26,13.44-15.06,8.96-26.82,21.56-35.28,37.79-8.46,16.24-12.69,35.37-12.69,57.39,0,20.41,4.32,38.45,12.97,54.13,8.65,15.68,20.68,27.94,36.12,36.77,15.43,8.83,33.34,13.25,53.75,13.25s37.17-5.04,53.29-15.12c16.11-10.08,28.03-24.2,35.74-42.37l-44.42-13.44c-4.36,9.33-10.54,16.46-18.57,21.37-8.03,4.92-17.58,7.37-28.65,7.37-17.17,0-30.24-5.6-39.2-16.8-7.03-8.79-11.29-20.43-12.8-34.9h123.86Zm-69.99-79.51c16.3,0,28.24,5.04,35.83,15.12,5.39,7.16,8.89,17.31,10.51,30.42h-99.12c2.06-11.65,5.95-21.15,11.71-28.46,8.96-11.39,22.65-17.08,41.06-17.08Z"/>
20 |   <path class="cls-1" d="m1592.43,480.78v14.41c-2.39-2.17-4.91-4.21-7.56-6.11-13-9.27-29.09-13.91-48.25-13.91s-35.59,4.64-49.65,13.91c-14.06,9.27-24.92,21.9-32.57,37.89-7.65,15.99-11.48,34.19-11.48,54.59s3.79,38.26,11.38,54.31c7.59,16.05,18.29,28.74,32.1,38.08,13.81,9.33,29.99,14,48.53,14,19.66,0,36.14-4.67,49.46-14,.82-.58,1.63-1.17,2.43-1.77v19.12c.25,9.58-1.65,17.57-5.69,23.98-4.04,6.41-9.74,11.26-17.08,14.56-7.34,3.3-15.68,4.95-25.01,4.95-7.96,0-15.46-1.77-22.49-5.32-7.03-3.55-12.48-8.99-16.33-16.33l-41.43,20.53c5.23,9.21,11.95,17.08,20.16,23.61s17.45,11.54,27.72,15.02c10.26,3.48,21,5.23,32.2,5.23,15.18,0,29.02-2.05,41.53-6.16,12.51-4.11,22.99-10.36,31.45-18.76,8.46-8.4,14.19-19.13,17.17-32.2.99-4.48,1.71-9.05,2.15-13.72.43-4.67.65-9.55.65-14.65v-211.28h-39.38Zm-21.37,158.55c-7.4,5.54-17.14,8.3-29.21,8.3s-21.34-2.92-28.93-8.77c-7.59-5.85-13.22-13.78-16.89-23.8-3.67-10.01-5.51-21.18-5.51-33.5s1.87-23.3,5.6-33.32c3.73-10.01,9.52-17.98,17.36-23.89,7.84-5.91,17.79-8.87,29.86-8.87,11.45,0,20.78,2.77,28,8.31,7.21,5.54,12.53,13.25,15.96,23.14,3.42,9.89,5.13,21.43,5.13,34.62s-1.71,24.58-5.13,34.53c-3.42,9.96-8.84,17.7-16.24,23.24Z"/>
21 |   <path class="cls-1" d="m1844.95,594.07h22.02c1.74-24.14-.93-45.1-8.03-62.9-7.09-17.79-17.95-31.57-32.57-41.34-14.62-9.77-32.38-14.65-53.29-14.65-19.78,0-37.21,4.48-52.26,13.44-15.06,8.96-26.82,21.56-35.28,37.79-8.46,16.24-12.69,35.37-12.69,57.39,0,20.41,4.32,38.45,12.97,54.13,8.65,15.68,20.68,27.94,36.12,36.77,15.43,8.83,33.34,13.25,53.75,13.25s37.17-5.04,53.29-15.12c16.11-10.08,28.03-24.2,35.74-42.37l-44.42-13.44c-4.36,9.33-10.54,16.46-18.57,21.37-8.03,4.92-17.58,7.37-28.65,7.37-17.17,0-30.24-5.6-39.2-16.8-7.03-8.79-11.29-20.43-12.8-34.9h123.86Zm-69.99-79.51c16.3,0,28.24,5.04,35.83,15.12,5.39,7.16,8.89,17.31,10.51,30.42h-99.12c2.06-11.65,5.95-21.15,11.71-28.46,8.96-11.39,22.65-17.08,41.06-17.08Z"/>
22 |   <path class="cls-1" d="m1912.51,682.36v-274.36h44.98v274.36h-44.98Z"/>
23 |   <path class="cls-2" d="m2192.11,682.36h22.77l-95.19-268.76h-26.69l-95.19,268.76h22.77l23.84-67.56h123.69l23.99,67.56Zm-140.5-87.91l54.48-154.37,54.81,154.37h-109.29Z"/>
24 |   <path class="cls-2" d="m2257.21,682.36v-268.76h21.46v268.76h-21.46Z"/>
25 |   <path class="cls-3" d="m793.3,259.14c-29.39,0-57.38,6.07-82.81,17-30.93-25.54-68.98-39.5-108.98-39.64-.21,0-.41,0-.62,0-24.41,0-48.04,4.95-70.23,14.72-13.9,6.12-26.87,14.11-38.59,23.66-24.62-10.13-51.57-15.74-79.8-15.74v39.61c17.37,0,34.15,2.62,49.96,7.46-5.15,6.99-9.8,14.39-13.86,22.15-17.81,34-23.97,73.6-17.35,111.52,1.62,9.3,3.66,20.68,6.09,33.72-.21.05-.41.1-.62.16-23.65,6.07-47.04,20.2-67.66,40.86-18.34,18.39-33.74,41-44.54,65.4-3.34,7.15-39.28,88.97-2.59,223.72l9.94,39.77,16.31-.87c35.84-1.9,63.93-12.98,93.93-37.04,13.75-11.03,39.18-38.19,54.98-96.2,2.34-8.58,5.9-34.17,6.46-41.1,3.44-42.51-4.21-87.36-12.99-129.5-1.81-8.68-3.53-17.11-5.15-25.23,8.24,2.82,16.35,7.27,24.28,13.33,18.33,14.02,30.19,39.24,35.23,74.95,5.17,36.56,1.52,72.64-1.12,90.9-3.32,22.98-9.29,44.64-17.76,64.4-10.95,25.53-25.78,43.75-33.65,52.35l-30.39,33.18h106.82c48.34,0,89.49-22.66,122.29-67.35,24.61-33.53,44.44-79.71,58.95-137.26,24.2-95.99,25.51-190.3,25.56-194.26v-31.13c0-38.3-12.37-74.96-35.22-105.43,16.73-5.5,34.6-8.48,53.15-8.48v-39.61Zm-246.67,28.33c17.27-7.6,35.69-11.48,54.75-11.36,25.46.09,49.94,7.5,71.17,21.24-29.9,21.04-54.09,49.67-69.76,83.06-16.06-34.23-41.08-63.45-72.02-84.63,5.04-3.1,10.32-5.88,15.86-8.32Zm-82.79,377.63c-.5,6.11-3.81,28.79-5.2,33.88-13.33,48.96-33.49,69.25-41.54,75.71-18.81,15.08-35.3,23.2-55.17,26.63l-1.85-7.4-.11-.41c-33.51-122.85-.13-196.01.17-196.64l.29-.63c18.44-41.78,51.31-74.39,84.15-83.65,2.15,10.86,4.47,22.33,6.96,34.27,8.17,39.24,15.32,80.72,12.29,118.23Zm271.93-252.44v30.77c-.05,3.42-1.67,95.3-24.5,185.52-13.27,52.41-30.92,93.84-52.48,123.12-25.24,34.3-54.75,50.97-90.2,50.97h-22.22c5.49-8.83,10.99-18.98,15.84-30.3,9.84-22.93,16.75-47.95,20.57-74.35,2.94-20.34,6.99-60.66,1.14-102.1-6.55-46.37-23.51-80.31-50.39-100.87-17.79-13.61-36.9-21.76-56.57-24.3-2.79-14.89-5.1-27.78-6.9-38.07-7.11-40.69,4.45-80.79,29.4-110.32,49.98,29.81,83.54,84.41,83.54,146.72h39.61c0-61.11,32.28-114.81,80.68-144.98,21.02,24.85,32.49,55.78,32.49,88.18Z"/>
26 | </svg>
27 | 


--------------------------------------------------------------------------------
/img/playground.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hegelai/prompttools/2446cc9e629fef0a82553ec338c20a203f0688b3/img/playground.gif


--------------------------------------------------------------------------------
/img/prompttest.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hegelai/prompttools/2446cc9e629fef0a82553ec338c20a203f0688b3/img/prompttest.png


--------------------------------------------------------------------------------
/img/table.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hegelai/prompttools/2446cc9e629fef0a82553ec338c20a203f0688b3/img/table.png


--------------------------------------------------------------------------------
/prompttools/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Hegel AI, Inc.
 2 | # All rights reserved.
 3 | #
 4 | # This source code's license can be found in the
 5 | # LICENSE file in the root directory of this source tree.
 6 | 
 7 | from .prompttest import prompttest
 8 | from .sentry import init_sentry
 9 | 
10 | 
11 | init_sentry()
12 | 
13 | __all__ = ["prompttest"]
14 | 


--------------------------------------------------------------------------------
/prompttools/benchmarks/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Hegel AI, Inc.
 2 | # All rights reserved.
 3 | #
 4 | # This source code's license can be found in the
 5 | # LICENSE file in the root directory of this source tree.
 6 | 
 7 | from .benchmark import Benchmark
 8 | 
 9 | 
10 | __all__ = [
11 |     "Benchmark",
12 | ]
13 | 


--------------------------------------------------------------------------------
/prompttools/benchmarks/benchmark.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Hegel AI, Inc.
 2 | # All rights reserved.
 3 | #
 4 | # This source code's license can be found in the
 5 | # LICENSE file in the root directory of this source tree.
 6 | 
 7 | from typing import Any, Callable, List, Optional
 8 | import pandas as pd
 9 | import warnings
10 | 
11 | 
12 | class Benchmark:
13 |     r"""
14 |     Benchmark models using defined data sets.
15 |     Find example under benchmarks/examples/benchmarking.ipynb.
16 | 
17 |     Args:
18 |     ----
19 |         experiment (experiment type): experiment to use
20 |         eval_methods (Callable): list of evaluation methods to measure response similarity
21 |         prompts (list(str)): list of queries, questions, prompts for LLMs to respond to
22 |         response_options (list(str)): possible responses to measure against
23 |         correct_response_indices (list(int)): list of index of correct response in response_options
24 |     """
25 | 
26 |     def __init__(
27 |         self,
28 |         experiment: Any,
29 |         eval_method: Callable,
30 |         prompts: List[str],
31 |         response_options: List[Any],
32 |         correct_response_indices: Optional[List[int]] = None,
33 |     ):
34 |         self.experiment = experiment
35 |         self.eval_method = eval_method
36 |         self.prompts = prompts
37 |         self.response_options = response_options
38 |         self.correct_response_indices = correct_response_indices
39 | 
40 |     def _get_precision(
41 |         self,
42 |         dataframe: pd.DataFrame,
43 |         pred_col: str,
44 |         label_col: str,
45 |     ) -> float:
46 |         r"""
47 |         Calculate precision.
48 |         """
49 |         # TODO: coming soon
50 |         pass
51 | 
52 |     def multiple_choice_accuracy(
53 |         self,
54 |         dataframe: pd.DataFrame,
55 |         col1: str,
56 |         col2: str,
57 |     ) -> float:
58 |         r"""
59 |         Benchmark LLM accuracy on multiple choice
60 |         prompt endings.
61 |         """
62 |         correct = 0
63 |         for _, row in dataframe.iterrows():
64 |             if row[col1] == row[col2]:
65 |                 correct += 1
66 |         return correct / len(dataframe)
67 | 
68 |     def multiple_choice_benchmark(
69 |         self,
70 |     ) -> Any:
71 |         r"""
72 |         Run model experiments to measure response quality.
73 |         """
74 |         self.experiment.run()
75 | 
76 |         if "prompt" not in self.experiment.full_df.columns:
77 |             # Assume messages column is in place of prompt
78 |             self.experiment.full_df["prompt"] = self.experiment.full_df["messages"].map(lambda x: str(x))
79 |             warnings.warn("Column 'prompt' does not exist. Using column 'messages' instead.", UserWarning, stacklevel=2)
80 |         # Get option with highest similarity to LLM response
81 |         benchmark_df = self.experiment.full_df[["prompt", "response"]]
82 |         benchmark_df["response_options"] = self.response_options
83 |         benchmark_df = benchmark_df.explode(column="response_options").reset_index()
84 |         scores = []
85 |         for _, row in benchmark_df.iterrows():
86 |             scores.append(self.eval_method(row=row, expected=row["response_options"]))
87 |         benchmark_df["scores"] = scores
88 |         benchmark_df["max_value"] = benchmark_df.groupby("prompt")["scores"].transform("max")
89 |         benchmark_df = benchmark_df[benchmark_df["scores"] == benchmark_df["max_value"]]
90 |         benchmark_df = benchmark_df.sort_index()
91 |         # Colect model choices
92 |         model_choice = []
93 |         for i, choice in enumerate(benchmark_df["response_options"].values):
94 |             model_choice.append(self.response_options[i].index(choice))
95 |         benchmark_df["model_choice"] = model_choice
96 |         benchmark_df["labels"] = self.correct_response_indices
97 |         return self.multiple_choice_accuracy(benchmark_df, "model_choice", "labels")
98 | 


--------------------------------------------------------------------------------
/prompttools/common.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Hegel AI, Inc.
 2 | # All rights reserved.
 3 | #
 4 | # This source code's license can be found in the
 5 | # LICENSE file in the root directory of this source tree.
 6 | 
 7 | 
 8 | import os
 9 | from os.path import join, dirname
10 | 
11 | try:
12 |     from dotenv import load_dotenv
13 | except ImportError:
14 |     load_dotenv = None
15 | 
16 | if load_dotenv is not None:
17 |     dotenv_path = join(dirname(dirname(__file__)), ".env")
18 |     load_dotenv(dotenv_path)
19 | 
20 | 
21 | ENV = os.environ.get("ENV", "prod")
22 | if ENV == "development":
23 |     HEGEL_BACKEND_URL = """http://127.0.0.1:5000"""
24 | else:
25 |     HEGEL_BACKEND_URL = """https://api.hegel-ai.com"""
26 | 


--------------------------------------------------------------------------------
/prompttools/experiment/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Hegel AI, Inc.
 2 | # All rights reserved.
 3 | #
 4 | # This source code's license can be found in the
 5 | # LICENSE file in the root directory of this source tree.
 6 | 
 7 | 
 8 | from .experiments.experiment import Experiment
 9 | from .experiments.openai_chat_experiment import OpenAIChatExperiment
10 | from .experiments.openai_completion_experiment import OpenAICompletionExperiment
11 | from .experiments.anthropic_completion_experiment import AnthropicCompletionExperiment
12 | from .experiments.huggingface_hub_experiment import HuggingFaceHubExperiment
13 | from .experiments.google_gemini_chat_experiment import GoogleGeminiChatCompletionExperiment
14 | from .experiments.google_palm_experiment import GooglePaLMCompletionExperiment
15 | from .experiments.google_vertex_chat_experiment import GoogleVertexChatCompletionExperiment
16 | from .experiments.llama_cpp_experiment import LlamaCppExperiment
17 | from .experiments.chromadb_experiment import ChromaDBExperiment
18 | from .experiments.weaviate_experiment import WeaviateExperiment
19 | from .experiments.lancedb_experiment import LanceDBExperiment
20 | from .experiments.mistral_experiment import MistralChatCompletionExperiment
21 | from .experiments.mindsdb_experiment import MindsDBExperiment
22 | from .experiments.langchain_experiment import SequentialChainExperiment, RouterChainExperiment
23 | from .experiments.stablediffusion_experiment import StableDiffusionExperiment
24 | from .experiments.replicate_experiment import ReplicateExperiment
25 | from .experiments.qdrant_experiment import QdrantExperiment
26 | from .experiments.pinecone_experiment import PineconeExperiment
27 | from .experiments.musicgen_experiment import MusicGenExperiment
28 | 
29 | __all__ = [
30 |     "AnthropicCompletionExperiment",
31 |     "ChromaDBExperiment",
32 |     "Experiment",
33 |     "GoogleGeminiChatCompletionExperiment",
34 |     "GooglePaLMCompletionExperiment",
35 |     "GoogleVertexChatCompletionExperiment",
36 |     "LanceDBExperiment",
37 |     "LlamaCppExperiment",
38 |     "HuggingFaceHubExperiment",
39 |     "MistralChatCompletionExperiment",
40 |     "MindsDBExperiment",
41 |     "MusicGenExperiment",
42 |     "OpenAIChatExperiment",
43 |     "OpenAICompletionExperiment",
44 |     "PineconeExperiment",
45 |     "QdrantExperiment",
46 |     "ReplicateExperiment",
47 |     "RouterChainExperiment",
48 |     "SequentialChainExperiment",
49 |     "StableDiffusionExperiment",
50 |     "WeaviateExperiment",
51 | ]
52 | 


--------------------------------------------------------------------------------
/prompttools/experiment/experiments/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hegelai/prompttools/2446cc9e629fef0a82553ec338c20a203f0688b3/prompttools/experiment/experiments/__init__.py


--------------------------------------------------------------------------------
/prompttools/experiment/experiments/_utils.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Hegel AI, Inc.
 2 | # All rights reserved.
 3 | #
 4 | # This source code's license can be found in the
 5 | # LICENSE file in the root directory of this source tree.
 6 | 
 7 | import pandas as pd
 8 | 
 9 | 
10 | def _check_column_uniqueness(column: "pd.core.series.Series") -> bool:
11 |     r"""
12 |     Check if all elements are equal in the column.
13 | 
14 |     Arg:
15 |         column (pandas.core.series.Series): Column to check
16 |     """
17 |     first_ele = column[0]
18 |     for ele in column:
19 |         if first_ele != ele:
20 |             return True
21 |     return False
22 | 
23 | 
24 | def _get_dynamic_columns(df: pd.DataFrame) -> pd.DataFrame:
25 |     r"""
26 |     Given a ``pd.DataFrame``, return a DataFrame where columns have more than 1 unique value.
27 | 
28 |     Args:
29 |         df (pd.DataFrame): DataFrame to examine
30 |     """
31 |     hashable_columns = []
32 |     unhashable_columns = []
33 |     for col in df.columns:
34 |         try:
35 |             hash(df[col][0])
36 |             hashable_columns.append(col)
37 |         except TypeError:
38 |             # If a column is not hashable, check if there exists value differ from the
39 |             if _check_column_uniqueness(df[col]):
40 |                 unhashable_columns.append(col)
41 | 
42 |     unique_counts = df[hashable_columns].nunique()
43 |     columns_with_multiple_unique_values = unique_counts[unique_counts > 1].index
44 |     dfs_to_concat = [df[columns_with_multiple_unique_values], df[unhashable_columns]]
45 |     if (
46 |         "prompt" in df
47 |         and "prompt" not in df[columns_with_multiple_unique_values]
48 |         and "prompt" not in df[unhashable_columns]
49 |     ):
50 |         dfs_to_concat.append(df["prompt"])
51 |     elif (
52 |         "messages" in df
53 |         and "messages" not in df[columns_with_multiple_unique_values]
54 |         and "messages" not in df[unhashable_columns]
55 |     ):
56 |         dfs_to_concat.append(df["messages"])
57 |     return pd.concat(dfs_to_concat, axis=1)
58 | 


--------------------------------------------------------------------------------
/prompttools/experiment/experiments/error.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Hegel AI, Inc.
 2 | # All rights reserved.
 3 | #
 4 | # This source code's license can be found in the
 5 | # LICENSE file in the root directory of this source tree.
 6 | 
 7 | 
 8 | class PromptExperimentException(Exception):
 9 |     r"""
10 |     An exception to throw when something goes wrong with the prompt test setup
11 |     """
12 | 
13 |     pass
14 | 


--------------------------------------------------------------------------------
/prompttools/experiment/experiments/google_gemini_chat_experiment.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Hegel AI, Inc.
 2 | # All rights reserved.
 3 | #
 4 | # This source code's license can be found in the
 5 | # LICENSE file in the root directory of this source tree.
 6 | 
 7 | try:
 8 |     import google.generativeai as genai
 9 |     from google.generativeai.types import content_types
10 |     from google.generativeai.types import generation_types
11 |     from google.generativeai.types import safety_types
12 | except ImportError:
13 |     genai = None
14 |     content_types, generation_types, safety_types = None, None, None
15 | 
16 | 
17 | from .experiment import Experiment
18 | from typing import Optional
19 | import copy
20 | 
21 | 
22 | class GoogleGeminiChatCompletionExperiment(Experiment):
23 |     r"""
24 |     This class defines an experiment for Google GenAI's chat API. It accepts lists for each argument
25 |     passed into Vertex AI's API, then creates a cartesian product of those arguments, and gets results for each.
26 | 
27 |     Note:
28 |         - All arguments here should be a ``list``, even if you want to keep the argument frozen
29 |           (i.e. ``temperature=[1.0]``), because the experiment will try all possible combination
30 |           of the input arguments.
31 |         - You need to set up your Google Vertex AI credentials properly before executing this experiment. One option
32 |           is to execute on Google Cloud's Colab.
33 | 
34 |     Args:
35 |         model (list[str]): Which model to call, as a string or a ``types.Model`` (e.g. ``'models/text-bison-001'``).
36 | 
37 |         contents (list[content_types]): Message for the chat model to respond.
38 | 
39 |         generation_config (list[generation_types]): Configurations for the generation of the model.
40 | 
41 |         safety_settings (list[safety_types]): Configurations for the safety features of the model.
42 |     """
43 | 
44 |     def __init__(
45 |         self,
46 |         model: list[str],
47 |         contents: list["content_types.ContentsType"],
48 |         generation_config: list[Optional["generation_types.GenerationConfigType"]] = [None],
49 |         safety_settings: list[Optional["safety_types.SafetySettingOptions"]] = [None],
50 |     ):
51 |         if genai is None:
52 |             raise ModuleNotFoundError(
53 |                 "Package `google-generativeai` is required to be installed to use Google GenAI API in this experiment."
54 |                 "Please use `pip install google-generativeai` to install the package or run this in Google Colab."
55 |             )
56 | 
57 |         self.completion_fn = self.google_text_completion_fn
58 | 
59 |         self.all_args = dict(
60 |             model=model,
61 |             contents=contents,
62 |             generation_config=generation_config,
63 |             safety_settings=safety_settings,
64 |         )
65 |         super().__init__()
66 | 
67 |     def google_text_completion_fn(self, **input_args):
68 |         params = copy.deepcopy(input_args)
69 |         model = genai.GenerativeModel(input_args["model"])
70 |         del params["model"]
71 |         response = model.generate_content(**params)
72 |         return response
73 | 
74 |     @staticmethod
75 |     def _extract_responses(response) -> list[str]:
76 |         # `response.text` will return the top response
77 |         return response.text
78 | 
79 |     def _get_model_names(self):
80 |         return [combo["model"] for combo in self.argument_combos]
81 | 
82 |     def _get_prompts(self):
83 |         return [combo["message"] for combo in self.argument_combos]
84 | 


--------------------------------------------------------------------------------
/prompttools/experiment/experiments/google_palm_experiment.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) Hegel AI, Inc.
  2 | # All rights reserved.
  3 | #
  4 | # This source code's license can be found in the
  5 | # LICENSE file in the root directory of this source tree.
  6 | 
  7 | try:
  8 |     import google.generativeai as palm
  9 | except ImportError:
 10 |     palm = None
 11 | 
 12 | from prompttools.selector.prompt_selector import PromptSelector
 13 | from prompttools.mock.mock import mock_palm_completion_fn
 14 | from .experiment import Experiment
 15 | from typing import Optional, Union, Iterable
 16 | import os
 17 | 
 18 | 
 19 | class GooglePaLMCompletionExperiment(Experiment):
 20 |     r"""
 21 |     This class defines an experiment for Google PaLM's generate text API. It accepts lists for each argument
 22 |     passed into PaLM's API, then creates a cartesian product of those arguments, and gets results for each.
 23 | 
 24 |     Note:
 25 |         - All arguments here should be a ``list``, even if you want to keep the argument frozen
 26 |           (i.e. ``temperature=[1.0]``), because the experiment will try all possible combination
 27 |           of the input arguments.
 28 |         - You should set ``os.environ["GOOGLE_PALM_API_KEY"] = YOUR_KEY`` in order to connect with PaLM's API.
 29 | 
 30 |     Args:
 31 |         model (list[str]): Which model to call, as a string or a ``types.Model`` (e.g. ``'models/text-bison-001'``).
 32 | 
 33 |         prompt (list[str]): Free-form input text given to the model. Given a prompt, the model will
 34 |             generate text that completes the input text.
 35 | 
 36 |         temperature (list[float]): Controls the randomness of the output. Must be positive.
 37 |             Typical values are in the range: ``[0.0, 1.0]``. Higher values produce a
 38 |             more random and varied response. A temperature of zero will be deterministic.
 39 | 
 40 |         candidate_count (list[int]): The **maximum** number of generated response messages to return.
 41 |             This value must be between ``[1, 8]``, inclusive. If unset, this will default to ``1``.
 42 | 
 43 |         max_output_tokens (list[int]): Maximum number of tokens to include in a candidate. Must be greater
 44 |             than zero. If unset, will default to ``64``.
 45 | 
 46 |         top_k (list[float]): The API uses combined nucleus and top-k sampling.
 47 |             ``top_k`` sets the maximum number of tokens to sample from on each step.
 48 | 
 49 |         top_p (list[float]): The API uses combined nucleus and top-k sampling. ``top_p`` configures the nucleus
 50 |             sampling. It sets the maximum cumulative probability of tokens to sample from.
 51 | 
 52 |         safety_settings (list[Iterable[palm.types.SafetySettingDict]]): A list of unique ``types.SafetySetting``
 53 |             instances for blocking unsafe content.
 54 | 
 55 |         stop_sequences (list[Union[str, Iterable[str]]]): A set of up to 5 character sequences that will stop output
 56 |             generation. If specified, the API will stop at the first appearance of a stop sequence.
 57 |     """
 58 | 
 59 |     def __init__(
 60 |         self,
 61 |         model: list[str],
 62 |         prompt: list[str],
 63 |         temperature: list[Optional[float]] = [None],
 64 |         candidate_count: list[Optional[int]] = [None],
 65 |         max_output_tokens: list[Optional[int]] = [None],
 66 |         top_p: list[Optional[float]] = [None],
 67 |         top_k: list[Optional[float]] = [None],
 68 |         safety_settings: list[Optional[Iterable["palm.types.SafetySettingDict"]]] = [None],
 69 |         stop_sequences: list[Union[str, Iterable[str]]] = [None],
 70 |     ):
 71 |         if palm is None:
 72 |             raise ModuleNotFoundError(
 73 |                 "Package `google.generativeai` is required to be installed to use PaLM API in this experiment."
 74 |                 "Please use `pip install google.generativeai` to install the package"
 75 |             )
 76 |         if os.getenv("DEBUG", default=False):
 77 |             self.completion_fn = mock_palm_completion_fn()
 78 |         else:
 79 |             self.completion_fn = self.palm_completion_fn
 80 |             palm.configure(api_key=os.environ["GOOGLE_PALM_API_KEY"])
 81 | 
 82 |         # If we are using a prompt selector, we need to
 83 |         # render the prompts from the selector
 84 |         if isinstance(prompt[0], PromptSelector):
 85 |             prompt = [selector.for_palm() for selector in prompt]
 86 | 
 87 |         self.all_args = dict(
 88 |             model=model,
 89 |             prompt=prompt,
 90 |             temperature=temperature,
 91 |             candidate_count=candidate_count,
 92 |             max_output_tokens=max_output_tokens,
 93 |             top_p=top_p,
 94 |             top_k=top_k,
 95 |             safety_settings=safety_settings,
 96 |             stop_sequences=stop_sequences,
 97 |         )
 98 |         super().__init__()
 99 | 
100 |     def palm_completion_fn(self, **input_args):
101 |         return palm.generate_text(**input_args)
102 | 
103 |     @staticmethod
104 |     def _extract_responses(completion_response: "palm.text.text_types.Completion") -> list[str]:
105 |         # `# completion_response.result` will return the top response
106 |         return [candidate["output"] for candidate in completion_response.candidates][0]
107 | 
108 |     def _get_model_names(self):
109 |         return [combo["model"] for combo in self.argument_combos]
110 | 
111 |     def _get_prompts(self):
112 |         return [combo["prompt"] for combo in self.argument_combos]
113 | 


--------------------------------------------------------------------------------
/prompttools/experiment/experiments/google_vertex_chat_experiment.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) Hegel AI, Inc.
  2 | # All rights reserved.
  3 | #
  4 | # This source code's license can be found in the
  5 | # LICENSE file in the root directory of this source tree.
  6 | 
  7 | try:
  8 |     from vertexai.preview.language_models import ChatModel, InputOutputTextPair
  9 | except ImportError:
 10 |     ChatModel = None
 11 |     InputOutputTextPair = None
 12 | 
 13 | from .experiment import Experiment
 14 | from typing import Optional
 15 | import copy
 16 | 
 17 | 
 18 | class GoogleVertexChatCompletionExperiment(Experiment):
 19 |     r"""
 20 |     This class defines an experiment for Google Vertex AI's chat API. It accepts lists for each argument
 21 |     passed into Vertex AI's API, then creates a cartesian product of those arguments, and gets results for each.
 22 | 
 23 |     Note:
 24 |         - All arguments here should be a ``list``, even if you want to keep the argument frozen
 25 |           (i.e. ``temperature=[1.0]``), because the experiment will try all possible combination
 26 |           of the input arguments.
 27 |         - You need to set up your Google Vertex AI credentials properly before executing this experiment. One option
 28 |           is to execute on Google Cloud's Colab.
 29 | 
 30 |     Args:
 31 |         model (list[str]): Which model to call, as a string or a ``types.Model`` (e.g. ``'models/text-bison-001'``).
 32 | 
 33 |         message (list[str]): Message for the chat model to respond.
 34 | 
 35 |         context (list[str]): Context shapes how the model responds throughout the conversation. For example,
 36 |             you can use context to specify words the model can or cannot use,
 37 |             topics to focus on or avoid, or the response format or style.
 38 | 
 39 |         examples (list[list['InputOutputTextPair']]): Examples for the model to learn how to
 40 |             respond to the conversation.
 41 | 
 42 |         temperature (list[float]): Controls the randomness of the output. Must be positive.
 43 |             Typical values are in the range: ``[0.0, 1.0]``. Higher values produce a
 44 |             more random and varied response. A temperature of zero will be deterministic.
 45 | 
 46 |         max_output_tokens (list[int]): Maximum number of tokens to include in a candidate. Must be greater
 47 |             than zero. If unset, will default to ``64``.
 48 | 
 49 |         top_k (list[float]): The API uses combined nucleus and top-k sampling.
 50 |             ``top_k`` sets the maximum number of tokens to sample from on each step.
 51 | 
 52 |         top_p (list[float]): The API uses combined nucleus and top-k sampling. ``top_p`` configures the nucleus
 53 |             sampling. It sets the maximum cumulative probability of tokens to sample from.
 54 | 
 55 |         stop_sequences (list[Union[str, Iterable[str]]]): A set of up to 5 character sequences that will stop output
 56 |             generation. If specified, the API will stop at the first appearance of a stop sequence.
 57 |     """
 58 | 
 59 |     def __init__(
 60 |         self,
 61 |         model: list[str],
 62 |         message: list[str],
 63 |         context: list[Optional[str]] = [None],
 64 |         examples: list[Optional[list[InputOutputTextPair]]] = [None],
 65 |         temperature: list[Optional[float]] = [None],
 66 |         max_output_tokens: list[Optional[int]] = [None],
 67 |         top_p: list[Optional[float]] = [None],
 68 |         top_k: list[Optional[int]] = [None],
 69 |         stop_sequences: list[list[str]] = [None],
 70 |     ):
 71 |         if ChatModel is None:
 72 |             raise ModuleNotFoundError(
 73 |                 "Package `vertexai` is required to be installed to use Google Vertex API in this experiment."
 74 |                 "Please use `pip install google-cloud-aiplatform` to install the package"
 75 |             )
 76 | 
 77 |         self.completion_fn = self.vertex_chat_completion_fn
 78 | 
 79 |         self.all_args = dict(
 80 |             model=model,
 81 |             message=message,
 82 |             context=context,
 83 |             examples=examples,
 84 |             temperature=temperature,
 85 |             max_output_tokens=max_output_tokens,
 86 |             top_p=top_p,
 87 |             top_k=top_k,
 88 |             stop_sequences=stop_sequences,
 89 |         )
 90 |         super().__init__()
 91 | 
 92 |     def vertex_chat_completion_fn(self, **input_args):
 93 |         chat_model = ChatModel.from_pretrained(model_name=input_args["model"])
 94 |         message = input_args["message"]
 95 |         params = copy.deepcopy(input_args)
 96 |         del params["model"], params["message"]
 97 |         chat = chat_model.start_chat(**params)
 98 |         return chat.send_message(message)
 99 | 
100 |     @staticmethod
101 |     def _extract_responses(response) -> list[str]:
102 |         # `response.text` will return the top response
103 |         return response.text
104 | 
105 |     def _get_model_names(self):
106 |         return [combo["model"] for combo in self.argument_combos]
107 | 
108 |     def _get_prompts(self):
109 |         return [combo["message"] for combo in self.argument_combos]
110 | 


--------------------------------------------------------------------------------
/prompttools/experiment/experiments/huggingface_endpoint_experiment.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Hegel AI, Inc.
2 | # All rights reserved.
3 | #
4 | # This source code's license can be found in the
5 | # LICENSE file in the root directory of this source tree.
6 | 
7 | # TODO: Coming soon
8 | 


--------------------------------------------------------------------------------
/prompttools/experiment/experiments/mindsdb_experiment.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) Hegel AI, Inc.
  2 | # All rights reserved.
  3 | #
  4 | # This source code's license can be found in the
  5 | # LICENSE file in the root directory of this source tree.
  6 | 
  7 | import os
  8 | from typing import Any, Dict, List, Tuple
  9 | import itertools
 10 | from time import perf_counter
 11 | import logging
 12 | 
 13 | try:
 14 |     from mysql.connector.connection_cext import CMySQLConnection
 15 | except ImportError:
 16 |     CMySQLConnection = None
 17 | 
 18 | from prompttools.mock.mock import mock_mindsdb_completion_fn
 19 | 
 20 | from .experiment import Experiment
 21 | from .error import PromptExperimentException
 22 | 
 23 | 
 24 | class MindsDBExperiment(Experiment):
 25 |     r"""
 26 |     An experiment class for MindsDB.
 27 |     This accepts combinations of MindsDB inputs to form SQL queries, returning a list of responses.
 28 | 
 29 |     Args:
 30 |         db_connector (CMySQLConnection): Connector MindsDB
 31 |         kwargs (dict): keyword arguments for the model
 32 |     """
 33 | 
 34 |     def __init__(
 35 |         self,
 36 |         db_connector: "CMySQLConnection",
 37 |         **kwargs: Dict[str, object],
 38 |     ):
 39 |         self.cursor = db_connector.cursor()
 40 |         self.completion_fn = self.mindsdb_completion_fn
 41 |         if os.getenv("DEBUG", default=False):
 42 |             self.completion_fn = mock_mindsdb_completion_fn
 43 | 
 44 |         self.call_params = dict(prompt=kwargs["prompt"])
 45 |         self.model_params = dict({k: kwargs[k] for k in kwargs if k != "prompt"})
 46 | 
 47 |         self.all_args = self.model_params | self.call_params
 48 |         super().__init__()
 49 | 
 50 |     def prepare(self) -> None:
 51 |         r"""
 52 |         Creates argument combinations by taking the cartesian product of all inputs.
 53 |         """
 54 |         self.model_argument_combos = [
 55 |             dict(zip(self.model_params, val)) for val in itertools.product(*self.model_params.values())
 56 |         ]
 57 |         self.call_argument_combos = [
 58 |             dict(zip(self.call_params, val)) for val in itertools.product(*self.call_params.values())
 59 |         ]
 60 | 
 61 |     def mindsdb_completion_fn(
 62 |         self,
 63 |         **params: Dict[str, Any],
 64 |     ) -> List[Any]:
 65 |         r"""
 66 |         MindsDB helper function to make request.
 67 |         """
 68 |         prompt = params["prompt"]
 69 | 
 70 |         self.cursor.execute(prompt)
 71 |         return [x for x in self.cursor]
 72 | 
 73 |     def run(
 74 |         self,
 75 |         runs: int = 1,
 76 |     ) -> None:
 77 |         r"""
 78 |         Create tuples of input and output for every possible combination of arguments.
 79 |         For each combination, it will execute `runs` times, default to 1.
 80 |         # TODO This can be done with an async queue
 81 |         """
 82 |         if not self.argument_combos:
 83 |             logging.info("Preparing first...")
 84 |             self.prepare()
 85 |         results = []
 86 |         latencies = []
 87 |         for model_combo in self.model_argument_combos:
 88 |             for call_combo in self.call_argument_combos:
 89 |                 call_combo["prompt"] = call_combo["prompt"].format(
 90 |                     table=model_combo["table"],
 91 |                     author_username=model_combo["author_username"],
 92 |                     text=model_combo["text"],
 93 |                 )
 94 |                 for _ in range(runs):
 95 |                     call_combo["client"] = self.cursor
 96 |                     start = perf_counter()
 97 |                     res = self.completion_fn(**call_combo)
 98 |                     latencies.append(perf_counter() - start)
 99 |                     results.append(res)
100 |                     self.argument_combos.append(model_combo | call_combo)
101 |         if len(results) == 0:
102 |             logging.error("No results. Something went wrong.")
103 |             raise PromptExperimentException
104 |         self._construct_result_dfs(self.argument_combos, results, latencies, extract_response_equal_full_result=True)
105 | 
106 |     @staticmethod
107 |     def _extract_responses(output: List[Dict[str, object]]) -> Tuple[str]:
108 |         return output[0]
109 | 


--------------------------------------------------------------------------------
/prompttools/experiment/experiments/mistral_experiment.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) Hegel AI, Inc.
  2 | # All rights reserved.
  3 | #
  4 | # This source code's license can be found in the
  5 | # LICENSE file in the root directory of this source tree.
  6 | 
  7 | import os
  8 | 
  9 | from typing import Optional
 10 | 
 11 | 
 12 | from .experiment import Experiment
 13 | 
 14 | 
 15 | try:
 16 |     import mistralai
 17 |     from mistralai.client import MistralClient
 18 |     from mistralai.models.chat_completion import ChatMessage
 19 | except ImportError:
 20 |     mistralai = None
 21 |     MistralClient = None
 22 |     ChatMessage = None
 23 | 
 24 | 
 25 | class MistralChatCompletionExperiment(Experiment):
 26 |     r"""
 27 |     This class defines an experiment for Mistral's  chatcompletion API. It accepts lists for each argument
 28 |     passed into the API, then creates a cartesian product of those arguments, and gets results for each.
 29 | 
 30 |     Note:
 31 |         - All arguments here should be a ``list``, even if you want to keep the argument frozen
 32 |           (i.e. ``temperature=[1.0]``), because the experiment will try all possible combination
 33 |           of the input arguments.
 34 |         - You should set ``os.environ["MISTRAL_API_KEY"] = YOUR_KEY`` in order to connect with Mistral's API.
 35 | 
 36 |     Args:
 37 |         model (list[str]):
 38 |             the model(s) that will complete your prompt (e.g. "mistral-tiny")
 39 | 
 40 |         messages (list[ChatMessage]):
 41 |             Input prompts (using Mistral's Python library). The first prompt role should be `user` or `system`.
 42 | 
 43 |         temperature (list[float], optional):
 44 |              The amount of randomness injected into the response
 45 | 
 46 |         top_p (list[float], optional):
 47 |             use nucleus sampling.
 48 | 
 49 |         max_tokens (list[int]):
 50 |             The maximum number of tokens to generate in the completion.
 51 | 
 52 |         safe_prompt (list[bool]):
 53 |             Whether to inject a safety prompt before all conversations.
 54 | 
 55 |         random_seed (list[int], optional):
 56 |            The seed to use for random sampling. If set, different calls will generate deterministic results.
 57 |     """
 58 | 
 59 |     url = "https://api.mistral.ai/v1/chat/completions"
 60 | 
 61 |     def __init__(
 62 |         self,
 63 |         model: list[str],
 64 |         messages: list[str],
 65 |         temperature: list[float] = [None],
 66 |         top_p: list[float] = [None],
 67 |         max_tokens: list[Optional[int]] = [None],
 68 |         safe_prompt: list[bool] = [False],
 69 |         random_seed: list[Optional[int]] = [None],
 70 |     ):
 71 |         if mistralai is None:
 72 |             raise ModuleNotFoundError(
 73 |                 "Package `mistralai` is required to be installed to use this experiment."
 74 |                 "Please use `pip install mistralai` to install the package"
 75 |             )
 76 |         self.client = MistralClient(api_key=os.environ["MISTRAL_API_KEY"])
 77 |         self.completion_fn = self.mistral_completion_fn
 78 | 
 79 |         self.all_args = dict(
 80 |             model=model,
 81 |             messages=messages,
 82 |             temperature=temperature,
 83 |             top_p=top_p,
 84 |             max_tokens=max_tokens,
 85 |             safe_prompt=safe_prompt,
 86 |             random_seed=random_seed,
 87 |         )
 88 |         super().__init__()
 89 | 
 90 |     def mistral_completion_fn(self, **input_args):
 91 |         response = self.client.chat(**input_args)
 92 |         return response
 93 | 
 94 |     @staticmethod
 95 |     def _extract_responses(response) -> list[str]:
 96 |         return response.choices[0].message.content
 97 | 
 98 |     def _get_model_names(self):
 99 |         return [combo["model"] for combo in self.argument_combos]
100 | 
101 |     def _get_prompts(self):
102 |         return [combo["messages"] for combo in self.argument_combos]
103 | 


--------------------------------------------------------------------------------
/prompttools/experiment/experiments/style.mplstyle:
--------------------------------------------------------------------------------
 1 | 
 2 | figure.figsize: 12,8
 3 | figure.dpi : 100
 4 | 
 5 | lines.linewidth : 3.0
 6 | axes.linewidth: 1.8
 7 | font.size : 22
 8 | axes.labelsize : 22
 9 | xtick.direction : in 
10 | ytick.direction : in
11 | xtick.top : True
12 | ytick.right : True
13 | 
14 | xtick.major.size:    10    # major tick size in points
15 | xtick.minor.size:    5       # minor tick size in points
16 | xtick.major.width:   1.8     # major tick width in points
17 | xtick.minor.width:   1.2     # minor tick width in points
18 | 
19 | ytick.major.size:    10     # major tick size in points
20 | ytick.minor.size:    5       # minor tick size in points
21 | ytick.major.width:   1.8     # major tick width in points
22 | ytick.minor.width:   1.2     # minor tick width in points
23 | 
24 | xtick.major.pad : 6
25 | xtick.minor.pad : 6
26 | ytick.major.pad : 6
27 | ytick.minor.pad : 6
28 | 
29 | axes.labelpad: 6
30 | 
31 | xtick.labelsize : 16
32 | ytick.labelsize : 16
33 | legend.fontsize : 16
34 | legend.frameon : False
35 | 
36 | #axes.edgecolor : "333333"   # Color of the figure axis.
37 | #axes.edgecolor : "red"   # Color of the figure axis.
38 | 
39 | 
40 | savefig.bbox : tight
41 | savefig.dpi : 100
42 | 
43 | # Hegel AI color cycle
44 | axes.prop_cycle: cycler('color', ["black", "771541", "EB8F4C","594F3B","A8B7AB","9C92A3"])
45 | 
46 | #font.family : serif
47 | #text.usetex : True
48 | #font.serif : Palatino
49 | 


--------------------------------------------------------------------------------
/prompttools/experiment/widgets/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hegelai/prompttools/2446cc9e629fef0a82553ec338c20a203f0688b3/prompttools/experiment/widgets/__init__.py


--------------------------------------------------------------------------------
/prompttools/experiment/widgets/comparison.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Hegel AI, Inc.
 2 | # All rights reserved.
 3 | #
 4 | # This source code's license can be found in the
 5 | # LICENSE file in the root directory of this source tree.
 6 | 
 7 | from typing import Callable, List
 8 | import pandas as pd
 9 | from IPython import display
10 | import ipywidgets as widgets
11 | 
12 | 
13 | class ComparisonWidgetProvider:
14 |     r"""
15 |     Provides functionality for widgets to compare models. This includes
16 |     displaying widgets, and recording evaluations in the experiment.
17 |     """
18 | 
19 |     def __init__(self, completion_fn, agg_fn, eval_listener_fn):
20 |         self.completion_fn = completion_fn
21 |         self.agg_fn = agg_fn
22 |         self.eval_listener_fn = eval_listener_fn
23 | 
24 |     def _get_comparison_submission_listener(self, table: pd.DataFrame, models: List[str]) -> Callable:
25 |         def on_click(b):
26 |             sorted_scores = self.agg_fn(table, 0)
27 |             data = {
28 |                 models[0]: sorted_scores.keys(),
29 |                 "feedback": sorted_scores.values(),
30 |             }
31 |             df = pd.DataFrame(data)
32 |             display.display(df)
33 | 
34 |         return on_click
35 | 
36 |     def set_models(self, models: List[str]) -> None:
37 |         self.models = models
38 |         self.row_len = 2 + len(self.models)
39 | 
40 |     def get_header_widgets(self) -> List[object]:
41 |         return [widgets.Label("Input")] + [widgets.Label(model) for model in self.models] + [widgets.Label("Feedback")]
42 | 
43 |     def get_row_widgets(self, index, row):
44 |         items = [widgets.HTML(value="<style>p{word-wrap: break-word}</style> <p>" + row.name + " </p>")]
45 |         items += [
46 |             widgets.HTML(value="<style>p{word-wrap: break-word}</style> <p>" + row[model] + " </p>")
47 |             for model in self.models
48 |         ]
49 |         feedback_dropdown = widgets.Dropdown(
50 |             options=[("\U0001F44D", 1), ("\U0001F44E", 0)],
51 |             value=1,
52 |             layout={"width": "50px"},
53 |         )
54 |         feedback_dropdown.observe(self.eval_listener_fn(index), names="value")
55 |         items += [feedback_dropdown]
56 |         return items
57 | 
58 |     def get_footer_widgets(self, table):
59 |         submit_button = widgets.Button(
60 |             description="Submit",
61 |             disabled=False,
62 |             button_style="success",
63 |             tooltip="Submit",
64 |         )
65 |         submit_button.on_click(self._get_comparison_submission_listener(table, self.models))
66 |         return [widgets.Label("")] * (self.row_len - 1) + [submit_button]
67 | 
68 |     def display(self, items):
69 |         row_len = 2 + len(self.models)
70 |         grid = widgets.GridBox(
71 |             items,
72 |             layout=widgets.Layout(grid_template_columns="repeat(" + str(row_len) + ", 230px)"),
73 |         )
74 |         display.display(grid)
75 | 


--------------------------------------------------------------------------------
/prompttools/experiment/widgets/feedback.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Hegel AI, Inc.
 2 | # All rights reserved.
 3 | #
 4 | # This source code's license can be found in the
 5 | # LICENSE file in the root directory of this source tree.
 6 | 
 7 | from typing import Callable, List
 8 | import pandas as pd
 9 | from IPython import display
10 | import ipywidgets as widgets
11 | 
12 | 
13 | class FeedbackWidgetProvider:
14 |     r"""
15 |     Provides functionality for widgets to evaluate models. This includes
16 |     displaying widgets, and recording evaluations in the experiment.
17 |     """
18 | 
19 |     def __init__(self, completion_fn, agg_fn, eval_listener_fn):
20 |         self.completion_fn = completion_fn
21 |         self.agg_fn = agg_fn
22 |         self.eval_listener_fn = eval_listener_fn
23 | 
24 |     def _get_feedback_submission_listener(self, table: pd.DataFrame, pivot_columns: List[str]) -> Callable:
25 |         def on_click(b):
26 |             sorted_scores = self.agg_fn(table, "feedback", pivot_columns[0])
27 |             data = {
28 |                 pivot_columns[0]: sorted_scores.keys(),
29 |                 "feedback": sorted_scores.values(),
30 |             }
31 |             df = pd.DataFrame(data)
32 |             display.display(df)
33 | 
34 |         return on_click
35 | 
36 |     def set_pivot_columns(self, pivot_columns: List[str]) -> None:
37 |         self.pivot_columns = pivot_columns
38 | 
39 |     def get_header_widgets(self) -> List[object]:
40 |         return [
41 |             widgets.Label(self.pivot_columns[0]),
42 |             widgets.Label(self.pivot_columns[1]),
43 |             widgets.Label("response(s)"),
44 |             widgets.Label("Feedback"),
45 |         ]
46 | 
47 |     def get_row_widgets(self, index, row):
48 |         items = [
49 |             widgets.HTML(value="<style>p{word-wrap: break-word}</style> <p>" + row[self.pivot_columns[0]] + " </p>"),
50 |             widgets.HTML(value="<style>p{word-wrap: break-word}</style> <p>" + row[self.pivot_columns[1]] + " </p>"),
51 |             widgets.HTML(value="<style>p{word-wrap: break-word}</style> <p>" + row["response(s)"] + " </p>"),
52 |         ]
53 |         feedback_dropdown = widgets.Dropdown(
54 |             options=[("\U0001F44D", 1), ("\U0001F44E", 0)],
55 |             value=1,
56 |             layout={"width": "50px"},
57 |         )
58 |         feedback_dropdown.observe(self.eval_listener_fn(index), names="value")
59 |         items += [feedback_dropdown]
60 |         return items
61 | 
62 |     def get_footer_widgets(self, table):
63 |         submit_button = widgets.Button(
64 |             description="Submit",
65 |             disabled=False,
66 |             button_style="success",
67 |             tooltip="Submit",
68 |         )
69 |         submit_button.on_click(self._get_feedback_submission_listener(table, self.pivot_columns))
70 |         return [
71 |             widgets.Label(""),
72 |             widgets.Label(""),
73 |             widgets.Label(""),
74 |             submit_button,
75 |         ]
76 | 
77 |     def display(self, items):
78 |         grid = widgets.GridBox(
79 |             items,
80 |             layout=widgets.Layout(grid_template_columns="repeat(4, 230px)"),
81 |         )
82 |         display.display(grid)
83 | 


--------------------------------------------------------------------------------
/prompttools/experiment/widgets/utility.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Hegel AI, Inc.
 2 | # All rights reserved.
 3 | #
 4 | # This source code's license can be found in the
 5 | # LICENSE file in the root directory of this source tree.
 6 | 
 7 | 
 8 | def is_interactive() -> bool:
 9 |     r"""
10 |     Used to determine if we are in a jupyter notebook, which
11 |     determines how we present the visualizations.
12 |     """
13 |     import __main__ as main
14 | 
15 |     return not hasattr(main, "__file__")
16 | 


--------------------------------------------------------------------------------
/prompttools/harness/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Hegel AI, Inc.
 2 | # All rights reserved.
 3 | #
 4 | # This source code's license can be found in the
 5 | # LICENSE file in the root directory of this source tree.
 6 | 
 7 | 
 8 | from .harness import ExperimentationHarness
 9 | from .chat_history_harness import ChatHistoryExperimentationHarness
10 | from .chat_model_comparison_harness import ChatModelComparisonHarness
11 | from .chat_prompt_template_harness import ChatPromptTemplateExperimentationHarness
12 | from .model_comparison_harness import ModelComparisonHarness
13 | from .multi_experiment_harness import MultiExperimentHarness
14 | from .prompt_template_harness import PromptTemplateExperimentationHarness
15 | from .rag_harness import RetrievalAugmentedGenerationExperimentationHarness
16 | from .system_prompt_harness import SystemPromptExperimentationHarness
17 | 
18 | 
19 | __all__ = [
20 |     "ChatHistoryExperimentationHarness",
21 |     "ChatModelComparisonHarness",
22 |     "ChatPromptTemplateExperimentationHarness",
23 |     "ExperimentationHarness",
24 |     "ModelComparisonHarness",
25 |     "MultiExperimentHarness",
26 |     "PromptTemplateExperimentationHarness",
27 |     "RetrievalAugmentedGenerationExperimentationHarness",
28 |     "SystemPromptExperimentationHarness",
29 | ]
30 | 


--------------------------------------------------------------------------------
/prompttools/harness/chat_history_harness.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Hegel AI, Inc.
 2 | # All rights reserved.
 3 | #
 4 | # This source code's license can be found in the
 5 | # LICENSE file in the root directory of this source tree.
 6 | 
 7 | from typing import Dict, List, Optional
 8 | from .harness import ExperimentationHarness
 9 | from prompttools.experiment import OpenAIChatExperiment
10 | 
11 | 
12 | class ChatHistoryExperimentationHarness(ExperimentationHarness):
13 |     r"""
14 |     An experimentation harness used for compare multiple chat histories.
15 | 
16 |     Args:
17 |         model_name (str): The name of the model.
18 |         chat_histories (List[List[Dict[str, str]]]): A list of chat histories that will be fed into the model.
19 |         model_arguments (Optional[Dict[str, object]], optional): Additional arguments for the model.
20 |             Defaults to ``None``.
21 |     """
22 | 
23 |     def __init__(
24 |         self,
25 |         model_name: str,
26 |         chat_histories: List[List[Dict[str, str]]],
27 |         model_arguments: Optional[Dict[str, object]] = None,
28 |     ):
29 |         self.experiment_cls_constructor = OpenAIChatExperiment
30 |         self.model_name = model_name
31 |         self.chat_histories = chat_histories
32 |         self.model_arguments = {} if model_arguments is None else model_arguments
33 |         super().__init__()
34 | 
35 |     def prepare(self) -> None:
36 |         r"""
37 |         Initializes and prepares the experiment.
38 |         """
39 |         self.experiment = self.experiment_cls_constructor(
40 |             [self.model_name],
41 |             self.chat_histories,
42 |             **self._prepare_arguments(self.model_arguments),
43 |         )
44 |         super().prepare()
45 | 
46 |     def run(self):
47 |         if not self.experiment:
48 |             self.prepare()
49 |         super().run()
50 | 


--------------------------------------------------------------------------------
/prompttools/harness/chat_model_comparison_harness.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Hegel AI, Inc.
 2 | # All rights reserved.
 3 | #
 4 | # This source code's license can be found in the
 5 | # LICENSE file in the root directory of this source tree.
 6 | 
 7 | from typing import Dict, List, Optional
 8 | from .harness import ExperimentationHarness
 9 | from prompttools.experiment import OpenAIChatExperiment
10 | 
11 | 
12 | class ChatModelComparisonHarness(ExperimentationHarness):
13 |     r"""
14 |     An experimentation harness used for comparing chat models.
15 |     Multi-model version of ``ChatHistoryExperimentationHarness``.
16 | 
17 |     Args:
18 |         model_names (List[str]): The names of the models that you would like to compare
19 |         chat_histories (List[List[Dict[str, str]]]): A list of chat histories that will be fed into the models.
20 |         runs (int): Number of runs to execute. Defaults to ``1``.
21 |         model_arguments (Optional[Dict[str, object]], optional): Additional arguments for the model.
22 |             Defaults to ``None``.
23 |     """
24 | 
25 |     PIVOT_COLUMNS = ["model", "messages"]
26 | 
27 |     def __init__(
28 |         self,
29 |         model_names: List[str],
30 |         chat_histories: List[List[Dict[str, str]]],
31 |         runs: int = 1,
32 |         model_arguments: Optional[Dict[str, object]] = None,
33 |     ):
34 |         self.experiment_cls_constructor = OpenAIChatExperiment
35 |         self.model_names = model_names
36 |         self.chat_histories = chat_histories
37 |         self.runs = runs
38 |         self.model_arguments = {} if model_arguments is None else model_arguments
39 |         super().__init__()
40 | 
41 |     def prepare(self) -> None:
42 |         """
43 |         Initializes and prepares the experiment.
44 |         """
45 |         self.experiment = self.experiment_cls_constructor(
46 |             self.model_names,
47 |             self.chat_histories,
48 |             **self._prepare_arguments(self.model_arguments),
49 |         )
50 |         super().prepare()
51 | 
52 |     def run(self):
53 |         if not self.experiment:
54 |             self.prepare()
55 |         super().run()
56 | 
57 |     def compare(self):
58 |         self.experiment.compare(self.model_names[0], self.PIVOT_COLUMNS)
59 | 


--------------------------------------------------------------------------------
/prompttools/harness/document_retrieval_harness.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Hegel AI, Inc.
2 | # All rights reserved.
3 | #
4 | # This source code's license can be found in the
5 | # LICENSE file in the root directory of this source tree.
6 | 
7 | # TODO: Coming soon
8 | 


--------------------------------------------------------------------------------
/prompttools/harness/function_call_harness.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Hegel AI, Inc.
2 | # All rights reserved.
3 | #
4 | # This source code's license can be found in the
5 | # LICENSE file in the root directory of this source tree.
6 | 
7 | # TODO: Coming soon.
8 | 


--------------------------------------------------------------------------------
/prompttools/harness/multi_experiment_harness.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Hegel AI, Inc.
 2 | # All rights reserved.
 3 | #
 4 | # This source code's license can be found in the
 5 | # LICENSE file in the root directory of this source tree.
 6 | 
 7 | 
 8 | from typing import Callable, Dict, List
 9 | from collections import defaultdict
10 | from prompttools.experiment import Experiment
11 | import pandas as pd
12 | 
13 | 
14 | class MultiExperimentHarness:
15 |     r"""
16 |     This is designed to run experiments across multiple model providers. The underlying APIs for different models
17 |     (e.g. LlamaCpp and OpenAI) are different, this provides a way to manage that complexity.
18 |     This will run experiments for different providers, and combine the results into a single table.
19 | 
20 |     The notebook "examples/notebooks/GPT4vsLlama2.ipynb" provides a good example how this can used
21 |     to test prompts across different models.
22 | 
23 |     Args:
24 |         experiments (list[Experiment]): The list of experiments that you would like to execute (e.g.
25 |             ``prompttools.experiment.OpenAICompletionExperiment``)
26 |     """
27 | 
28 |     def __init__(self, experiments: List[Experiment]):
29 |         self.experiments = experiments
30 | 
31 |     def prepare(self):
32 |         for experiment in self.experiments:
33 |             experiment.prepare()
34 | 
35 |     def run(self):
36 |         for experiment in self.experiments:
37 |             experiment.run()
38 | 
39 |     def evaluate(self, metric_name: str, eval_fn: Callable) -> None:
40 |         for experiment in self.experiments:
41 |             experiment.evaluate(metric_name, eval_fn)
42 | 
43 |     def gather_feedback(self) -> None:
44 |         pass
45 | 
46 |     def _get_argument_combos(self):
47 |         tmp = [combo for experiment in self.experiments for combo in experiment.argument_combos]
48 |         return tmp
49 | 
50 |     def _get_prompts(self):
51 |         tmp = [combo for experiment in self.experiments for combo in experiment._get_prompts()]
52 |         return tmp
53 | 
54 |     def _get_results(self):
55 |         tmp = [
56 |             experiment._extract_responses(result) for experiment in self.experiments for result in experiment.results
57 |         ]
58 |         return tmp
59 | 
60 |     def _get_scores(self):
61 |         scores = defaultdict(list)
62 |         for experiment in self.experiments:
63 |             for name, score in experiment.scores.items():
64 |                 scores[name].extend(score)
65 |         return scores
66 | 
67 |     def _get_experiment_names(self):
68 |         tmp = [name for experiment in self.experiments for name in experiment._get_model_names()]
69 |         return tmp
70 | 
71 |     def visualize(self, colname: str = None) -> None:
72 |         scores = self._get_scores()
73 |         data = {
74 |             "prompt": self._get_prompts(),
75 |             "response(s)": self._get_results(),
76 |             "latency": scores["latency"],
77 |             "model": self._get_experiment_names(),
78 |         }
79 |         # Add scores for each eval fn, including feedback
80 |         for metric_name, evals in scores.items():
81 |             if metric_name != "comparison":
82 |                 data[metric_name] = evals
83 |         df = pd.DataFrame(data)
84 |         if colname:
85 |             df = pd.pivot_table(
86 |                 df,
87 |                 values=colname,
88 |                 index=["prompt"],
89 |                 columns=["model"],
90 |                 aggfunc=lambda x: x.iloc[0],
91 |             )
92 |         return df
93 | 
94 |     def rank(self, metric_name: str, is_average: bool = False) -> Dict[str, float]:
95 |         pass
96 | 


--------------------------------------------------------------------------------
/prompttools/harness/prompt_template_harness.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Hegel AI, Inc.
 2 | # All rights reserved.
 3 | #
 4 | # This source code's license can be found in the
 5 | # LICENSE file in the root directory of this source tree.
 6 | 
 7 | from typing import Dict, List, Optional, Type
 8 | import jinja2
 9 | from .harness import ExperimentationHarness, Experiment
10 | import logging
11 | 
12 | 
13 | class PromptTemplateExperimentationHarness(ExperimentationHarness):
14 |     r"""
15 |     An experimentation harness used to test various prompt templates.
16 |     We use `jinja` templates, e.g. "Answer the following question: {{input}}".
17 | 
18 |     Args:
19 |         experiment (Type[Experiment]): The experiment constructor that you would like to execute within the harness
20 |             (e.g. ``prompttools.experiment.OpenAICompletionExperiment``)
21 |         model_name (str): The name of the model.
22 |         prompt_templates (List[str]): A list of prompt ``jinja``-styled templates.
23 |         user_inputs (List[Dict[str, str]]): A list of dictionaries representing user inputs.
24 |         model_arguments (Optional[Dict[str, object]], optional): Additional arguments for the model.
25 |             Defaults to ``None``.
26 |     """
27 | 
28 |     PIVOT_COLUMNS = ["prompt_template", "user_input"]
29 | 
30 |     def __init__(
31 |         self,
32 |         experiment: Type[Experiment],
33 |         model_name: str,
34 |         prompt_templates: List[str],
35 |         user_inputs: List[Dict[str, str]],
36 |         model_arguments: Optional[Dict[str, object]] = None,
37 |     ):
38 |         self.environment = jinja2.Environment()
39 |         self.experiment_cls_constructor = experiment
40 |         self.model_name = model_name
41 |         self.prompt_templates = prompt_templates
42 |         self.user_inputs = user_inputs
43 |         self.model_arguments = {} if model_arguments is None else model_arguments
44 |         super().__init__()
45 | 
46 |     def prepare(self) -> None:
47 |         r"""
48 |         Creates prompts from templates to use for the experiment, and then initializes and prepares the experiment.
49 |         """
50 |         self.input_pairs_dict = {}
51 |         rendered_inputs = []
52 |         for pt in self.prompt_templates:
53 |             for user_input in self.user_inputs:
54 |                 template = self.environment.from_string(pt)
55 |                 prompt = template.render(**user_input)
56 |                 rendered_inputs.append(prompt)
57 |                 self.input_pairs_dict[prompt] = (pt, user_input)
58 |         self.experiment = self.experiment_cls_constructor(
59 |             [self.model_name],
60 |             rendered_inputs,
61 |             **self._prepare_arguments(self.model_arguments),
62 |         )
63 |         super().prepare()
64 | 
65 |     def run(self):
66 |         if not self.experiment:
67 |             self.prepare()
68 |         super().run()
69 | 


--------------------------------------------------------------------------------
/prompttools/harness/rag_harness.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) Hegel AI, Inc.
  2 | # All rights reserved.
  3 | #
  4 | # This source code's license can be found in the
  5 | # LICENSE file in the root directory of this source tree.
  6 | 
  7 | from typing import Optional, Type, Callable, Union
  8 | import jinja2
  9 | from .harness import ExperimentationHarness, Experiment
 10 | import copy
 11 | 
 12 | 
 13 | DOC_PROMPT_TEMPLATE = r"""Given these documents:{{documents}}
 14 | 
 15 | {{prompt}}
 16 | """
 17 | 
 18 | 
 19 | def _doc_list_to_str(documents: list[str]) -> str:
 20 |     res = ""
 21 |     for d in documents:
 22 |         res += "\n"
 23 |         res += d
 24 |     return res
 25 | 
 26 | 
 27 | def _generate_doc_prompt(documents: list[str], prompt_or_msg: Union[str, list[dict[str, str]]], is_chat: bool):
 28 |     if not is_chat:
 29 |         prompt = prompt_or_msg
 30 |     else:  # You have a chat message object
 31 |         prompt = prompt_or_msg[-1]["content"]
 32 |     environment = jinja2.Environment()
 33 |     template = environment.from_string(DOC_PROMPT_TEMPLATE)
 34 |     doc_str = _doc_list_to_str(documents)
 35 | 
 36 |     doc_prompt = template.render(
 37 |         {
 38 |             "documents": doc_str,
 39 |             "prompt": prompt,
 40 |         }
 41 |     )
 42 |     if not is_chat:
 43 |         return doc_prompt
 44 |     else:
 45 |         new_msg = copy.deepcopy(prompt_or_msg)
 46 |         new_msg[-1]["content"] = doc_prompt
 47 |         return new_msg
 48 | 
 49 | 
 50 | class RetrievalAugmentedGenerationExperimentationHarness(ExperimentationHarness):
 51 |     r"""
 52 |     An experimentation harness used to test the Retrieval-Augmented Generation process, which
 53 |     involves a vector DB and a LLM at the same time.
 54 | 
 55 |     Args:
 56 |         vector_db_experiment (Experiment): An initialized vector DB experiment.
 57 |         llm_experiment_cls (Type[Experiment]): The experiment constructor that you would like to execute
 58 |             within the harness (e.g. ``prompttools.experiment.OpenAICompletionExperiment``)
 59 |         llm_arguments (dict[str, list]): Dictionary of arguments for the LLM.
 60 |         extract_document_fn (Callable): A function, when given a row of results from the vector DB experiment,
 61 |             extract the relevant documents (``list[str]``) that will be inserted into the template.
 62 |         extract_query_metadata_fn (Callable): A function, when given a row of results from the vector DB experiment,
 63 |             extract the relevant metadata and return a ``str`` that will be shown for visualization in the final
 64 |             result table
 65 |         prompt_template (str): A ``jinja``-styled templates, where documents and prompt will be inserted.
 66 |     """
 67 | 
 68 |     def __init__(
 69 |         self,
 70 |         vector_db_experiment: Experiment,
 71 |         llm_experiment_cls: Type[Experiment],
 72 |         llm_arguments: dict,
 73 |         extract_document_fn: Callable,
 74 |         extract_query_metadata_fn: Callable,
 75 |         prompt_template: str = DOC_PROMPT_TEMPLATE,
 76 |     ):
 77 |         self.vector_db_experiment = vector_db_experiment
 78 |         self.llm_experiment_cls: Type[Experiment] = llm_experiment_cls
 79 |         self.experiment: Optional[Experiment] = None
 80 |         self.llm_arguments = copy.copy(llm_arguments)
 81 |         self.extract_document_fn = extract_document_fn
 82 |         self.extract_query_metadata_fn = extract_query_metadata_fn
 83 |         self.prompt_templates = prompt_template
 84 | 
 85 |     def run(self) -> None:
 86 |         self.vector_db_experiment.run()
 87 |         document_lists: list[list[str]] = []
 88 |         # latencies = []  # TODO: Include latency results
 89 |         # Extract documents from the result of
 90 |         for i, row in self.vector_db_experiment.full_df.iterrows():
 91 |             document_lists.append(self.extract_document_fn(row))
 92 |             # latencies.append(row["latencies"])
 93 | 
 94 |         # Put documents into prompt template
 95 |         augmented_prompts = []
 96 |         is_chat = self.llm_experiment_cls._is_chat()
 97 |         input_arg_name = "messages" if is_chat else "prompt"
 98 |         for doc in document_lists:
 99 |             for prompt_or_msg in self.llm_arguments[input_arg_name]:
100 |                 augmented_prompts.append(_generate_doc_prompt(doc, prompt_or_msg, is_chat))
101 | 
102 |         # Pass documents into LLM
103 |         self.llm_arguments[input_arg_name]: list[str] = augmented_prompts
104 |         self.experiment = self.llm_experiment_cls(**self.llm_arguments)
105 | 
106 |         # Run the LLM experiment
107 |         self.experiment.run()
108 | 
109 |         # Add "query text" (i.e. the prompt used to retrieve documents from the vector DB)
110 |         # to the final results table here
111 |         retrieval_n_rows = len(self.vector_db_experiment.full_df)
112 |         query_metadata = [
113 |             self.extract_query_metadata_fn(row) for _, row in self.vector_db_experiment.full_df.iterrows()
114 |         ]
115 |         final_n_row = len(self.full_df)
116 | 
117 |         self.partial_df["retrieval_metadata"] = [query_metadata[i % retrieval_n_rows] for i in range(final_n_row)]
118 |         self.full_df["retrieval_metadata"] = self.partial_df["retrieval_metadata"]
119 | 
120 |     def visualize(self) -> None:
121 |         if self.experiment is None:
122 |             self.run()
123 |         self.experiment.visualize()
124 | 


--------------------------------------------------------------------------------
/prompttools/harness/utility.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Hegel AI, Inc.
 2 | # All rights reserved.
 3 | #
 4 | # This source code's license can be found in the
 5 | # LICENSE file in the root directory of this source tree.
 6 | 
 7 | 
 8 | def is_interactive() -> bool:
 9 |     r"""
10 |     Used to determine if we are in a jupyter notebook, which
11 |     determines how we present the visualizations.
12 |     """
13 |     import __main__ as main
14 | 
15 |     return not hasattr(main, "__file__")
16 | 


--------------------------------------------------------------------------------
/prompttools/logger/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Hegel AI, Inc.
 2 | # All rights reserved.
 3 | #
 4 | # This source code's license can be found in the
 5 | # LICENSE file in the root directory of this source tree.
 6 | 
 7 | 
 8 | from .logger import Logger, add_feedback
 9 | 
10 | 
11 | __all__ = [
12 |     "Logger",
13 |     "add_feedback",
14 | ]
15 | 


--------------------------------------------------------------------------------
/prompttools/mock/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hegelai/prompttools/2446cc9e629fef0a82553ec338c20a203f0688b3/prompttools/mock/__init__.py


--------------------------------------------------------------------------------
/prompttools/mock/mock_data/images/19th_century_wombat_gentleman.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hegelai/prompttools/2446cc9e629fef0a82553ec338c20a203f0688b3/prompttools/mock/mock_data/images/19th_century_wombat_gentleman.png


--------------------------------------------------------------------------------
/prompttools/mock/mock_data/images/Just_a_fruit_basket.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hegelai/prompttools/2446cc9e629fef0a82553ec338c20a203f0688b3/prompttools/mock/mock_data/images/Just_a_fruit_basket.png


--------------------------------------------------------------------------------
/prompttools/playground/README.md:
--------------------------------------------------------------------------------
 1 | ## `prompttools` Playground
 2 | 
 3 | If you are interested to have experiment with a UI rather than a notebook, the playground allows you to do that!
 4 | You can:
 5 | - Evaluate different instructions (system prompts)
 6 | - Try different prompt templates
 7 | - Compare across models (e.g. GPT-4 vs. local LLaMA 2)
 8 | 
 9 | <p align="center">
10 |   <img src="../../img/playground.gif" width="1000" height="500">
11 | </p>
12 | 
13 | To launch the playground locally, clone the git repo and run the following script with streamlit:
14 | 
15 | ```
16 | git clone https://github.com/hegelai/prompttools.git
17 | cd prompttools && pip install -r prompttools/playground/requirements.txt
18 | streamlit run prompttools/playground/playground.py
19 | ```
20 | 
21 | Similar to the notebook examples, all the executions and calls to LLM services happen within your local machines,
22 | `prompttools` do not forward your requests or log your information.
23 | 


--------------------------------------------------------------------------------
/prompttools/playground/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Hegel AI, Inc.
2 | # All rights reserved.
3 | #
4 | # This source code's license can be found in the
5 | # LICENSE file in the root directory of this source tree.
6 | 


--------------------------------------------------------------------------------
/prompttools/playground/constants.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Hegel AI, Inc.
 2 | # All rights reserved.
 3 | #
 4 | # This source code's license can be found in the
 5 | # LICENSE file in the root directory of this source tree.
 6 | 
 7 | 
 8 | from prompttools.experiment import LlamaCppExperiment
 9 | from prompttools.experiment import OpenAIChatExperiment
10 | from prompttools.experiment import OpenAICompletionExperiment
11 | from prompttools.experiment import AnthropicCompletionExperiment
12 | from prompttools.experiment import GooglePaLMCompletionExperiment
13 | from prompttools.experiment import HuggingFaceHubExperiment
14 | from prompttools.experiment import ReplicateExperiment
15 | 
16 | ENVIRONMENT_VARIABLE = {
17 |     "Replicate": "REPLICATE_API_TOKEN",
18 |     "OpenAI Chat": "OPENAI_API_KEY",
19 |     "OpenAI Completion": "OPENAI_API_KEY",
20 |     "Anthropic": "ANTHROPIC_API_KEY",
21 |     "Google PaLM": "GOOGLE_PALM_API_KEY",
22 |     "HuggingFace Hub": "HUGGINGFACEHUB_API_TOKEN",
23 | }
24 | 
25 | EXPERIMENTS = {
26 |     "LlamaCpp Chat": LlamaCppExperiment,
27 |     "OpenAI Chat": OpenAIChatExperiment,
28 |     "OpenAI Completion": OpenAICompletionExperiment,
29 |     "Anthropic": AnthropicCompletionExperiment,
30 |     "Google PaLM": GooglePaLMCompletionExperiment,
31 |     "HuggingFace Hub": HuggingFaceHubExperiment,
32 |     "Replicate": ReplicateExperiment,
33 | }
34 | 
35 | MODES = ("Instruction", "Prompt Template", "Model Comparison")
36 | 
37 | MODEL_TYPES = (
38 |     "OpenAI Chat",
39 |     "OpenAI Completion",
40 |     "Anthropic",
41 |     "Google PaLM",
42 |     "LlamaCpp Chat",
43 |     "LlamaCpp Completion",
44 |     "HuggingFace Hub",
45 |     "Replicate",
46 | )
47 | 
48 | OPENAI_CHAT_MODELS = (
49 |     "gpt-3.5-turbo",
50 |     "gpt-3.5-turbo-16k",
51 |     "gpt-3.5-turbo-0613",
52 |     "gpt-3.5-turbo-16k-0613",
53 |     "gpt-3.5-turbo-0301",
54 |     "gpt-4",
55 |     "gpt-4-0613",
56 |     "gpt-4-32k",
57 |     "gpt-4-32k-0613",
58 |     "gpt-4-0314",
59 |     "gpt-4-32k-0314",
60 | )
61 | 
62 | OPENAI_COMPLETION_MODELS = ("text-davinci-003", "text-davinci-002", "code-davinci-002")
63 | 


--------------------------------------------------------------------------------
/prompttools/playground/data_loader.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) Hegel AI, Inc.
  2 | # All rights reserved.
  3 | #
  4 | # This source code's license can be found in the
  5 | # LICENSE file in the root directory of this source tree.
  6 | 
  7 | 
  8 | import os
  9 | import jinja2
 10 | import streamlit as st
 11 | 
 12 | from prompttools.selector.prompt_selector import PromptSelector
 13 | from prompttools.playground.constants import ENVIRONMENT_VARIABLE, EXPERIMENTS
 14 | 
 15 | 
 16 | def render_prompts(templates, vars):
 17 |     prompts = []
 18 |     for template in templates:
 19 |         for var_set in vars:
 20 |             environment = jinja2.Environment()
 21 |             jinja_template = environment.from_string(template)
 22 |             prompts.append(jinja_template.render(**var_set))
 23 |     return prompts
 24 | 
 25 | 
 26 | @st.cache_data
 27 | def load_data(
 28 |     model_type,
 29 |     model,
 30 |     instructions,
 31 |     user_inputs,
 32 |     temperature=0.0,
 33 |     top_p=1,
 34 |     max_tokens=None,
 35 |     frequency_penalty=0.0,
 36 |     presence_penalty=0.0,
 37 |     api_key=None,
 38 | ):
 39 |     if api_key:
 40 |         os.environ[ENVIRONMENT_VARIABLE[model_type]] = api_key
 41 |     selectors = [PromptSelector(instruction, user_input) for instruction in instructions for user_input in user_inputs]
 42 | 
 43 |     experiment = None
 44 |     if model_type == "LlamaCpp Chat":
 45 |         call_params = dict(temperature=[temperature])
 46 |         experiment = EXPERIMENTS[model_type]([model], selectors, call_params=call_params)
 47 |     elif model_type in {"OpenAI Chat", "OpenAI Completion"}:
 48 |         experiment = EXPERIMENTS[model_type](
 49 |             [model],
 50 |             selectors,
 51 |             temperature=[temperature],
 52 |             top_p=[top_p],
 53 |             max_tokens=[max_tokens],
 54 |             frequency_penalty=[frequency_penalty],
 55 |             presence_penalty=[presence_penalty],
 56 |         )
 57 |     elif model_type == "HuggingFace Hub":
 58 |         experiment = EXPERIMENTS[model_type]([model], selectors, temperature=[temperature])
 59 |     elif model_type == "Anthropic":
 60 |         experiment = EXPERIMENTS[model_type]([model], selectors, temperature=[temperature])
 61 |     elif model_type == "Google PaLM":
 62 |         experiment = EXPERIMENTS[model_type]([model], selectors, temperature=[temperature])
 63 |     elif model_type == "Replicate":
 64 |         input_kwargs = {"prompt": selectors,
 65 |                         "temperature": [temperature]}
 66 |         model_specific_kwargs = {model: {}}
 67 |         experiment = EXPERIMENTS[model_type]([model], input_kwargs, model_specific_kwargs)
 68 | 
 69 |     return experiment.to_pandas_df(True, True)
 70 | 
 71 | 
 72 | @st.cache_data
 73 | def run_multiple(
 74 |     model_types,
 75 |     models,
 76 |     instructions,
 77 |     prompts,
 78 |     openai_api_key=None,
 79 |     anthropic_api_key=None,
 80 |     google_api_key=None,
 81 |     hf_api_key=None,
 82 |     replicate_api_key=None,
 83 | ):
 84 |     import os
 85 | 
 86 |     if openai_api_key:
 87 |         os.environ["OPENAI_API_KEY"] = openai_api_key
 88 |     if anthropic_api_key:
 89 |         os.environ["ANTHROPIC_API_KEY"] = anthropic_api_key
 90 |     if google_api_key:
 91 |         os.environ["GOOGLE_PALM_API_KEY"] = google_api_key
 92 |     if hf_api_key:
 93 |         os.environ["HUGGINGFACEHUB_API_TOKEN"] = hf_api_key
 94 |     if replicate_api_key:
 95 |         os.environ["REPLICATE_API_TOKEN"] = replicate_api_key
 96 |     dfs = []
 97 |     for i in range(len(models)):
 98 |         # TODO Support temperature and other parameters
 99 |         selectors = []
100 |         if i + 1 in instructions:
101 |             selectors = [PromptSelector(instructions[i + 1], prompt) for prompt in prompts]
102 |             if model_types[i] == "Replicate":
103 |                 input_kwargs = {"prompt": selectors}
104 |                 model_specific_kwargs = {models[i]: {}}
105 |                 experiment = EXPERIMENTS[model_types[i]]([models[i]], input_kwargs, model_specific_kwargs)
106 |             else:
107 |                 experiment = EXPERIMENTS[model_types[i]]([models[i]], selectors)
108 |         else:
109 |             if model_types[i] == "Replicate":
110 |                 input_kwargs = {"prompt": prompts}
111 |                 model_specific_kwargs = {models[i]: {}}
112 |                 experiment = EXPERIMENTS[model_types[i]]([models[i]], input_kwargs, model_specific_kwargs)
113 |             else:
114 |                 experiment = EXPERIMENTS[model_types[i]]([models[i]], prompts)
115 |         dfs.append(experiment.to_pandas_df(True, True))
116 |     return dfs
117 | 


--------------------------------------------------------------------------------
/prompttools/playground/packages.txt:
--------------------------------------------------------------------------------
1 | pkg-config


--------------------------------------------------------------------------------
/prompttools/playground/requirements.txt:
--------------------------------------------------------------------------------
1 | prompttools
2 | jinja2
3 | huggingface_hub
4 | llama-cpp-python
5 | anthropic
6 | pyperclip
7 | google-generativeai
8 | replicate


--------------------------------------------------------------------------------
/prompttools/prompttest/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hegelai/prompttools/2446cc9e629fef0a82553ec338c20a203f0688b3/prompttools/prompttest/__init__.py


--------------------------------------------------------------------------------
/prompttools/prompttest/error/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hegelai/prompttools/2446cc9e629fef0a82553ec338c20a203f0688b3/prompttools/prompttest/error/__init__.py


--------------------------------------------------------------------------------
/prompttools/prompttest/error/failure.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Hegel AI, Inc.
 2 | # All rights reserved.
 3 | #
 4 | # This source code's license can be found in the
 5 | # LICENSE file in the root directory of this source tree.
 6 | 
 7 | from prompttools.prompttest.threshold_type import ThresholdType
 8 | 
 9 | 
10 | class PromptTestSetupException(Exception):
11 |     r"""
12 |     An exception to throw when something goes wrong with the prompt test setup
13 |     """
14 | 
15 |     pass
16 | 
17 | 
18 | def log_failure(metric_name, threshold, actual, threshold_type):
19 |     r"""
20 |     Prints the test results to the console.
21 |     """
22 |     print(
23 |         "Test failed:  "
24 |         + metric_name
25 |         + "\nThreshold: "
26 |         + (" " * (len("Test failed") - len("Threshold") + 1))
27 |         + str(threshold)
28 |         + "\nActual: "
29 |         + (" " * (len("Test failed") - len("Actual") + 1))
30 |         + str(actual)
31 |         + "\nType: "
32 |         + (" " * (len("Test failed") - len("Type") + 1))
33 |         + str("Minimum" if threshold_type is ThresholdType.MINIMUM else "Maximum")
34 |     )
35 |     print("-" * (len("Test failed: " + metric_name) + 2))
36 | 


--------------------------------------------------------------------------------
/prompttools/prompttest/prompttest.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Hegel AI, Inc.
 2 | # All rights reserved.
 3 | #
 4 | # This source code's license can be found in the
 5 | # LICENSE file in the root directory of this source tree.
 6 | 
 7 | from typing import Callable, List, Optional
 8 | from functools import wraps
 9 | import logging
10 | 
11 | from .threshold_type import ThresholdType
12 | from .error.failure import PromptTestSetupException
13 | from .runner.runner import run_prompttest
14 | 
15 | TESTS_TO_RUN = []
16 | 
17 | 
18 | def prompttest(
19 |     metric_name: str,
20 |     eval_fn: Callable,
21 |     prompts: List[str],
22 |     threshold: float = 1.0,
23 |     threshold_type: ThresholdType = ThresholdType.MINIMUM,
24 |     expected: Optional[List[str]] = None,
25 | ):
26 |     r"""
27 |     Creates a decorator for prompt tests, which can annotate evaluation functions.
28 |     This enables developers to create a prompt test suite from their evaluations.
29 |     """
30 | 
31 |     def prompttest_decorator(completion_fn: Callable):
32 |         @wraps(completion_fn)
33 |         def runs_test():
34 |             results = [completion_fn(prompt) for prompt in prompts]
35 |             return run_prompttest(
36 |                 metric_name,
37 |                 eval_fn,
38 |                 threshold,
39 |                 threshold_type,
40 |                 prompts,
41 |                 results,
42 |                 expected=expected,
43 |             )
44 | 
45 |         TESTS_TO_RUN.append(runs_test)
46 |         return runs_test
47 | 
48 |     return prompttest_decorator
49 | 
50 | 
51 | def main():
52 |     logging.getLogger().setLevel(logging.WARNING)
53 |     print("Running " + str(len(TESTS_TO_RUN)) + " test(s)")
54 |     failures = int(sum([test() for test in TESTS_TO_RUN]))
55 |     if failures == 0:
56 |         print("All " + str(len(TESTS_TO_RUN)) + " test(s) passed!")
57 |         exit(0)
58 |     else:
59 |         print("Tests failed: " + str(failures))
60 |         exit(1)
61 | 


--------------------------------------------------------------------------------
/prompttools/prompttest/runner/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hegelai/prompttools/2446cc9e629fef0a82553ec338c20a203f0688b3/prompttools/prompttest/runner/__init__.py


--------------------------------------------------------------------------------
/prompttools/prompttest/runner/runner.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) Hegel AI, Inc.
  2 | # All rights reserved.
  3 | #
  4 | # This source code's license can be found in the
  5 | # LICENSE file in the root directory of this source tree.
  6 | 
  7 | from collections import defaultdict
  8 | from typing import Callable, Dict, List, Optional, Type
  9 | import logging
 10 | 
 11 | from prompttools.prompttest.threshold_type import ThresholdType
 12 | from prompttools.prompttest.error.failure import log_failure
 13 | from prompttools.experiment import Experiment
 14 | from prompttools.prompttest.error.failure import PromptTestSetupException
 15 | 
 16 | 
 17 | class PromptTestRunner:
 18 |     r"""
 19 |     Base class for prompt test runners. Please use the subclass instead.s
 20 |     """
 21 | 
 22 |     def __init__(self):
 23 |         self.ran = defaultdict(bool)
 24 |         self.experiments = dict()
 25 | 
 26 |     def run(self, *args, **kwargs) -> str:
 27 |         r"""
 28 |         Runs the test if it has not already been run.
 29 |         """
 30 |         key = str(args)
 31 |         if self.ran[key]:
 32 |             return key
 33 |         self.experiments[key] = self._get_experiment(*args, **kwargs)
 34 |         self.experiments[key].run()
 35 |         self.ran[key] = True
 36 |         return key
 37 | 
 38 |     def evaluate(
 39 |         self,
 40 |         key: str,
 41 |         metric_name: str,
 42 |         eval_fn: Callable,
 43 |         expected: Optional[str] = None,
 44 |     ) -> None:
 45 |         r"""
 46 |         Evaluates the test results using the given ``eval_fn``.
 47 |         """
 48 |         self.experiments[key].evaluate(metric_name, eval_fn, expected=expected)
 49 | 
 50 |     def visualize(self, key: str) -> None:
 51 |         r"""
 52 |         Evaluates the test results using the given ``eval_fn``.
 53 |         """
 54 |         self.experiments[key].visualize()
 55 | 
 56 |     def scores(self, key):
 57 |         r"""
 58 |         Returns the scores for the underlying experiment at the
 59 |         given key.
 60 |         """
 61 |         return self.experiments[key].scores
 62 | 
 63 |     @staticmethod
 64 |     def _get_experiment(
 65 |         experiment: Type[Experiment],
 66 |         model_name: str,
 67 |         prompts: List[str],
 68 |         model_args: Dict[str, object],
 69 |     ) -> Experiment:
 70 |         return experiment([model_name], prompts, **{k: [v] for k, v in model_args})
 71 | 
 72 | 
 73 | prompt_test_runner = PromptTestRunner()
 74 | 
 75 | 
 76 | def run_prompttest(
 77 |     metric_name: str,
 78 |     eval_fn: Callable,
 79 |     threshold: float,
 80 |     threshold_type: ThresholdType,
 81 |     prompts: List[str],
 82 |     results: List[str],
 83 |     expected: Optional[List[str]],
 84 | ) -> int:
 85 |     """
 86 |     Runs the prompt test evaluation.
 87 |     """
 88 |     scores = []
 89 |     for i, result in enumerate(results):
 90 |         if expected:
 91 |             score = eval_fn(prompts[i], result, metadata={}, expected=expected[i])
 92 |         else:
 93 |             score = eval_fn(prompts[i], result, metadata={})
 94 |         scores.append(score)
 95 |     if not scores:
 96 |         logging.error("Something went wrong during testing. Make sure your API keys are set correctly.")
 97 |         raise PromptTestSetupException
 98 |     for score in scores:
 99 |         if not (score <= threshold if threshold_type == ThresholdType.MAXIMUM else score >= threshold):
100 |             log_failure(metric_name, threshold, score, threshold_type)
101 |             return 1
102 |     return 0
103 | 


--------------------------------------------------------------------------------
/prompttools/prompttest/threshold_type.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Hegel AI, Inc.
 2 | # All rights reserved.
 3 | #
 4 | # This source code's license can be found in the
 5 | # LICENSE file in the root directory of this source tree.
 6 | 
 7 | from enum import Enum
 8 | 
 9 | 
10 | class ThresholdType(Enum):
11 |     r"""
12 |     Defines the types of thresholds a user can specify for their test case.
13 |     """
14 | 
15 |     MINIMUM = 1
16 |     MAXIMUM = 2
17 | 


--------------------------------------------------------------------------------
/prompttools/requests/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hegelai/prompttools/2446cc9e629fef0a82553ec338c20a203f0688b3/prompttools/requests/__init__.py


--------------------------------------------------------------------------------
/prompttools/requests/request_queue.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) Hegel AI, Inc.
  2 | # All rights reserved.
  3 | #
  4 | # This source code's license can be found in the
  5 | # LICENSE file in the root directory of this source tree.
  6 | 
  7 | import os
  8 | from typing import Callable, Dict, List, Tuple
  9 | from queue import Queue, Empty
 10 | from time import perf_counter
 11 | import threading
 12 | import openai
 13 | import logging
 14 | 
 15 | from prompttools.requests.retries import retry_decorator
 16 | 
 17 | 
 18 | class RequestQueue:
 19 |     r"""
 20 |     A generic queue for processing requests in the `prompttools` library.
 21 |     It can be used to handle and time requests to any LLM asynchronously.
 22 |     """
 23 | 
 24 |     def __init__(self):
 25 |         self.data_queue = Queue()
 26 |         self.is_running = True
 27 |         self.worker_thread = threading.Thread(target=self._process_queue, daemon=True)
 28 |         self.worker_thread.start()
 29 |         self.request_args: list[dict[str, object]] = []
 30 |         self.request_results: list[dict[str, object]] = []
 31 |         self.request_latencies: list[float] = []
 32 | 
 33 |     def _process_queue(self) -> None:
 34 |         while self.is_running:
 35 |             try:
 36 |                 fn, args = self.data_queue.get(timeout=0.2)
 37 |                 self._do_task(fn, args)
 38 |                 self.data_queue.task_done()
 39 |             except Empty:
 40 |                 continue
 41 | 
 42 |     def _do_task(self, fn: Callable, args: Dict[str, object]) -> None:
 43 |         try:
 44 |             # TODO: For the streamlit app, we need to set the api key this way.
 45 |             # Ideally, OpenAI should be able to use the env var.
 46 |             if "OPENAI_API_KEY" in os.environ:
 47 |                 openai.api_key = os.environ["OPENAI_API_KEY"]
 48 |             res = self._run(fn, args)
 49 |             self.request_args.append(args)
 50 |             self.request_results.append(res[0])
 51 |             self.request_latencies.append(res[1])
 52 |         # TODO: If we get an unexpected error here, the queue will hang
 53 |         except openai.AuthenticationError:
 54 |             logging.error("Authentication error. Skipping request.")
 55 | 
 56 |     @retry_decorator
 57 |     def _run(self, fn: Callable, args: Dict[str, object]) -> Tuple[Dict[str, object], float]:
 58 |         start = perf_counter()
 59 |         result = fn(**args)
 60 |         return result, perf_counter() - start
 61 | 
 62 |     def shutdown(self) -> None:
 63 |         r"""
 64 |         Stops the worker thread from executed and joins it.
 65 |         """
 66 |         self.data_queue.join()
 67 |         self.is_running = False
 68 |         # TODO: If we are hanging and interrupt, this line will
 69 |         #       have the following error: TypeError: 'NoneType' object is not callable
 70 |         self.worker_thread.join()
 71 | 
 72 |     def __del__(self) -> None:
 73 |         self.shutdown()
 74 | 
 75 |     def enqueue(self, callable: Callable, args: Dict[str, object]) -> None:
 76 |         r"""
 77 |         Adds another request to the queue.
 78 |         """
 79 |         self.data_queue.put((callable, args))
 80 | 
 81 |     def get_input_args(self) -> List[Dict[str, object]]:
 82 |         r"""
 83 |         Joins the queue and gets input args that lead to the result.
 84 |         """
 85 |         self.data_queue.join()
 86 |         return self.request_args
 87 | 
 88 |     def get_results(self) -> List[Dict[str, object]]:
 89 |         r"""
 90 |         Joins the queue and gets results.
 91 |         """
 92 |         self.data_queue.join()
 93 |         return self.request_results
 94 | 
 95 |     def get_latencies(self) -> List[float]:
 96 |         r"""
 97 |         Joins the queue and gets latencies.
 98 |         """
 99 |         self.data_queue.join()
100 |         return self.request_latencies
101 | 


--------------------------------------------------------------------------------
/prompttools/requests/retries.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Hegel AI, Inc.
 2 | # All rights reserved.
 3 | #
 4 | # This source code's license can be found in the
 5 | # LICENSE file in the root directory of this source tree.
 6 | 
 7 | from tenacity import (
 8 |     before_sleep_log,
 9 |     retry,
10 |     retry_if_exception_type,
11 |     stop_after_attempt,
12 |     wait_exponential,
13 | )
14 | import openai
15 | import logging
16 | 
17 | 
18 | def generate_retry_decorator(wait_lower_bound: int = 3, wait_upper_bound: int = 12, max_retry_attempts: int = 5):
19 |     r"""
20 |     Creates a retry decorator that can be used for requests. It looks for specific exceptions and waits for
21 |     certain about of time before retrying. This improves the reliability of the request queue.
22 | 
23 |     Args:
24 |         wait_lower_bound (int): lower bound to the wait time before retry, defaults to 3.
25 |         wait_upper_bound (int): upper bound to the wait time before retry, defaults to 12.
26 |         max_retry_attempts (int): maximum number of retries before stopping, defaults to 5.
27 |     """
28 |     return retry(
29 |         # For the `i`th attempt, wait 2^i seconds before retrying
30 |         # with lower and upper bound of [3s, 12s].
31 |         wait=wait_exponential(multiplier=1, min=wait_lower_bound, max=wait_upper_bound),
32 |         stop=stop_after_attempt(max_retry_attempts),
33 |         reraise=True,
34 |         retry=(  # Retry for these specific exceptions
35 |             retry_if_exception_type(openai.APIConnectionError)
36 |             | retry_if_exception_type(openai.APIError)
37 |             | retry_if_exception_type(openai.RateLimitError)
38 |             | retry_if_exception_type(openai.APIStatusError)
39 |             | retry_if_exception_type(openai.APIConnectionError)
40 |             | retry_if_exception_type(openai.APIResponseValidationError)
41 |             | retry_if_exception_type(openai.APITimeoutError)
42 |         ),
43 |         before_sleep=before_sleep_log(logging.getLogger(__name__), logging.WARNING),
44 |     )
45 | 
46 | 
47 | retry_decorator = generate_retry_decorator()
48 | 


--------------------------------------------------------------------------------
/prompttools/selector/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Hegel AI, Inc.
2 | # All rights reserved.
3 | #
4 | # This source code's license can be found in the
5 | # LICENSE file in the root directory of this source tree.
6 | 


--------------------------------------------------------------------------------
/prompttools/selector/prompt_selector.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Hegel AI, Inc.
 2 | # All rights reserved.
 3 | #
 4 | # This source code's license can be found in the
 5 | # LICENSE file in the root directory of this source tree.
 6 | 
 7 | try:
 8 |     from anthropic import HUMAN_PROMPT, AI_PROMPT
 9 | except ImportError:
10 |     HUMAN_PROMPT, AI_PROMPT = None, None
11 | 
12 | 
13 | GENERIC_TEMPLATE = """INSTRUCTION:
14 | {instruction}
15 | PROMPT:
16 | {user_input}
17 | RESPONSE:
18 | """
19 | 
20 | PALM_TEMPLATE = """{instruction}
21 | 
22 | {user_input}
23 | """
24 | 
25 | LLAMA_TEMPLATE = """<s>[INST] <<SYS>>
26 | {instruction}
27 | <</SYS>
28 | {user_input} [/INST]
29 | """
30 | 
31 | ANTHROPIC_TEMPLATE = """{HUMAN_PROMPT}{instruction}
32 | {user_input}
33 | {AI_PROMPT}"""
34 | 
35 | 
36 | class PromptSelector:
37 |     r"""
38 |     An abstraction for rendering the same prompt
39 |     for different models, e.g. OpenAI Chat models
40 |     and Llama models
41 |     """
42 | 
43 |     def __init__(self, instruction: str, user_input: object):
44 |         self.instruction = instruction
45 |         self.user_input = user_input
46 | 
47 |     def for_openai_chat(self):
48 |         return [
49 |             {"role": "system", "content": self.instruction},
50 |             {"role": "user", "content": self.user_input},
51 |         ]
52 | 
53 |     def for_openai_completion(self):
54 |         return GENERIC_TEMPLATE.format(instruction=self.instruction, user_input=self.user_input)
55 | 
56 |     def for_huggingface_hub(self):
57 |         return GENERIC_TEMPLATE.format(instruction=self.instruction, user_input=self.user_input)
58 | 
59 |     def for_llama(self):
60 |         return LLAMA_TEMPLATE.format(instruction=self.instruction, user_input=self.user_input)
61 | 
62 |     def for_anthropic(self):
63 |         return ANTHROPIC_TEMPLATE.format(
64 |             HUMAN_PROMPT=HUMAN_PROMPT, instruction=self.instruction, user_input=self.user_input, AI_PROMPT=AI_PROMPT
65 |         )
66 | 
67 |     def for_palm(self):
68 |         return PALM_TEMPLATE.format(instruction=self.instruction, user_input=self.user_input)
69 | 
70 |     def for_music_gen(self):
71 |         return GENERIC_TEMPLATE.format(instruction=self.instruction, user_input=self.user_input)
72 | 


--------------------------------------------------------------------------------
/prompttools/sentry.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Hegel AI, Inc.
 2 | # All rights reserved.
 3 | #
 4 | # This source code's license can be found in the
 5 | # LICENSE file in the root directory of this source tree.
 6 | 
 7 | 
 8 | # Sentry collects crash reports and performance numbers
 9 | # It is possible to turn off data collection using an environment variable named "SENTRY_OPT_OUT"
10 | import sentry_sdk
11 | 
12 | import os
13 | import platform
14 | import uuid
15 | import hashlib
16 | from .version import __version__
17 | 
18 | 
19 | SENTRY_DSN = "https://43fbb5a3a556ca0a879f5a08ce805d87@o4505656408211456.ingest.sentry.io/4505656412667904"
20 | 
21 | # Get a random token based on the machine uuid
22 | token = hashlib.sha256(str(uuid.getnode()).encode()).hexdigest()
23 | 
24 | 
25 | def find_certifi_path():
26 |     try:
27 |         import certifi
28 | 
29 |         return os.path.join(os.path.dirname(certifi.__file__), "cacert.pem")
30 |     except Exception:
31 |         pass
32 |     return None
33 | 
34 | 
35 | def filter_info(event, _hint):
36 |     # Remove personal info
37 |     try:
38 |         event["modules"] = None
39 |         event["extra"] = None
40 |         event["server_name"] = None
41 |     except Exception:
42 |         pass
43 |     return event
44 | 
45 | 
46 | def init_sentry():
47 |     if "SENTRY_OPT_OUT" not in os.environ:
48 |         if platform.system() == "Darwin":
49 |             # Fix CA certificate issue on latest MAC models
50 |             path = find_certifi_path()
51 |             if path is not None:
52 |                 if "SSL_CERT_FILE" not in os.environ:
53 |                     os.environ["SSL_CERT_FILE"] = path
54 |                 if "REQUESTS_CA_BUNDLE" not in os.environ:
55 |                     os.environ["REQUESTS_CA_BUNDLE"] = path
56 | 
57 |         sentry_sdk.init(
58 |             dsn=SENTRY_DSN,
59 |             release=__version__,
60 |             traces_sample_rate=0.01,
61 |             include_local_variables=False,
62 |             send_default_pii=False,
63 |             attach_stacktrace=False,
64 |             before_send=filter_info,
65 |             include_source_context=False,
66 |             # the rate at which transaction and performance data is sampled for profiling purposes
67 |             profiles_sample_rate=0.0,
68 |         )
69 |         try:
70 |             filename = os.path.join(os.environ.get("HOME", "/tmp"), ".token")
71 |             if platform.system() == "Windows":
72 |                 filename = os.path.join(os.environ.get("USERPROFILE", "c:\\"), ".token")
73 |             with open(filename, "w") as f:
74 |                 f.write(token)
75 |         except Exception:
76 |             pass
77 | 
78 |     sentry_sdk.capture_message("Initializing prompttools", "info")
79 | 


--------------------------------------------------------------------------------
/prompttools/utils/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Hegel AI, Inc.
 2 | # All rights reserved.
 3 | #
 4 | # This source code's license can be found in the
 5 | # LICENSE file in the root directory of this source tree.
 6 | 
 7 | 
 8 | from . import autoeval, expected, validate_json, validate_python, similarity
 9 | from .autoeval import autoeval_binary_scoring
10 | from .autoeval_from_expected import autoeval_from_expected_response
11 | from .autoeval_scoring import autoeval_scoring
12 | from .autoeval_with_docs import autoeval_with_documents
13 | from .chunk_text import chunk_text
14 | from .expected import compute_similarity_against_model
15 | from .moderation import apply_moderation
16 | from .ranking_correlation import ranking_correlation
17 | from .similarity import semantic_similarity, cos_similarity
18 | from .validate_json import validate_json_response
19 | from .validate_python import validate_python_response
20 | 
21 | __all__ = [
22 |     "autoeval",
23 |     "autoeval_binary_scoring",
24 |     "autoeval_from_expected_response",
25 |     "autoeval_scoring",
26 |     "autoeval_with_documents",
27 |     "chunk_text",
28 |     "compute_similarity_against_model",
29 |     "expected",
30 |     "apply_moderation",
31 |     "ranking_correlation",
32 |     "semantic_similarity",
33 |     "cos_similarity",
34 |     "similarity",
35 |     "validate_json",
36 |     "validate_json_response",
37 |     "validate_python",
38 |     "validate_python_response",
39 | ]
40 | 


--------------------------------------------------------------------------------
/prompttools/utils/autoeval.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Hegel AI, Inc.
 2 | # All rights reserved.
 3 | #
 4 | # This source code's license can be found in the
 5 | # LICENSE file in the root directory of this source tree.
 6 | 
 7 | 
 8 | import os
 9 | from typing import Dict
10 | import openai
11 | import pandas.core.series
12 | import jinja2
13 | from .error import PromptToolsUtilityError
14 | 
15 | EVALUATION_SYSTEM_PROMPT = """
16 | Determine whether or not the response is following directions.
17 | Your answer should either be "RIGHT" if the response follows directions,
18 | or "WRONG" if the model is not following directions.
19 | """
20 | 
21 | EVALUATION_USER_TEMPLATE = """
22 | PROMPT: {{prompt}}
23 | RESPONSE: {{response}}
24 | ANSWER:
25 | """
26 | 
27 | 
28 | def _get_messages(prompt: str, response: str):
29 |     environment = jinja2.Environment()
30 |     template = environment.from_string(EVALUATION_USER_TEMPLATE)
31 |     user_message = template.render({"prompt": prompt, "response": response})
32 |     return [
33 |         {"role": "system", "content": EVALUATION_SYSTEM_PROMPT},
34 |         {"role": "user", "content": user_message},
35 |     ]
36 | 
37 | 
38 | def compute(prompt: str, response: str, model: str = "gpt-4") -> float:
39 |     r"""
40 |     Uses a high quality chat model, like GPT-4, to automatically evaluate a given
41 |     prompt/response pair. Outputs can be 0 or 1.
42 | 
43 |     Args:
44 |         prompt (str): The input prompt.
45 |         response (str): The model response.
46 |         model (str): The OpenAI chat model to use for generating an expected response.
47 |             Defaults to GPT-4.
48 |     """
49 |     if not os.environ["OPENAI_API_KEY"]:
50 |         raise PromptToolsUtilityError
51 |     evaluation = openai.chat.completions.create(model=model, messages=_get_messages(prompt, response))
52 |     return 1.0 if "RIGHT" in evaluation.choices[0].message.content else 0.0
53 | 
54 | 
55 | def evaluate(prompt: str, response: str, _metadata: Dict) -> float:
56 |     r"""
57 |     Uses auto-evaluation to score the model response with "gpt-4" as the judge, returning 0.0 or 1.0.
58 | 
59 |     Args:
60 |         prompt (str): The input prompt.
61 |         response (str): The model response.
62 |         metadata (str): Not used.
63 |     """
64 |     return compute(prompt, response)
65 | 
66 | 
67 | def autoeval_binary_scoring(
68 |     row: pandas.core.series.Series,
69 |     prompt_column_name: str,
70 |     response_column_name: str = "response",
71 | ) -> float:
72 |     r"""
73 |     Uses auto-evaluation to score the model response with "gpt-4" as the judge, returning 0.0 or 1.0.
74 | 
75 |     Args:
76 |         row (pandas.core.series.Series): A row of data from the full DataFrame (including input, model response, other
77 |             metrics, etc).
78 |         prompt_column_name (str): name of the column that contains the input prompt
79 |         response_column_name (str): name of the column that contains the model's response, defaults to ``"response"``
80 |     """
81 |     return compute(row[prompt_column_name], row[response_column_name])
82 | 


--------------------------------------------------------------------------------
/prompttools/utils/autoeval_from_expected.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Hegel AI, Inc.
 2 | # All rights reserved.
 3 | #
 4 | # This source code's license can be found in the
 5 | # LICENSE file in the root directory of this source tree.
 6 | 
 7 | 
 8 | import os
 9 | import openai
10 | import jinja2
11 | import pandas
12 | from .error import PromptToolsUtilityError
13 | 
14 | EVALUATION_SYSTEM_PROMPT = """
15 | You are a grader evaluating responses to math questions.
16 | Given the PROMPT and EXPECTED, evaluate the ACTUAL answer.
17 | The ACTUAL answer should be the same as the EXPECTED.
18 | You should grade the response as either RIGHT or WRONG.
19 | If the ACTUAL answer is the same as the EXPECTED, mark it RIGHT.
20 | Otherwise, mark it WRONG.
21 | """
22 | 
23 | EVALUATION_USER_TEMPLATE = """
24 | PROMPT: {{prompt}}
25 | EXPECTED: {{expected}}
26 | ACTUAL: {{actual}}
27 | ANSWER:
28 | """
29 | 
30 | 
31 | def _get_messages(prompt: str, expected: str, response: str):
32 |     environment = jinja2.Environment()
33 |     template = environment.from_string(EVALUATION_USER_TEMPLATE)
34 |     user_message = template.render({"prompt": prompt, "expected": expected, "actual": response})
35 |     return [
36 |         {"role": "system", "content": EVALUATION_SYSTEM_PROMPT},
37 |         {"role": "user", "content": user_message},
38 |     ]
39 | 
40 | 
41 | def compute(prompt: str, expected: str, response: str, model: str = "gpt-4") -> float:
42 |     r"""
43 |     Uses a high quality chat model, like GPT-4, to automatically evaluate a given
44 |     prompt/response pair. Outputs can be 0 or 1.
45 | 
46 |     Args:
47 |         prompt (str): The input prompt.
48 |         response (str): The model response.
49 |         model (str): The OpenAI chat model to use for generating an expected response.
50 |             Defaults to GPT-4.
51 |     """
52 |     if not os.environ["OPENAI_API_KEY"]:
53 |         raise PromptToolsUtilityError("Missing API key for evaluation.")
54 |     evaluation = openai.chat.completions.create(model=model, messages=_get_messages(prompt, expected, response))
55 |     return 1.0 if "RIGHT" in evaluation.choices[0].message.content else 0.0
56 | 
57 | 
58 | def evaluate(prompt: str, response: str, metadata: dict, expected: str) -> float:
59 |     r"""
60 |     Uses auto-evaluation to score the model response.
61 |     """
62 |     return compute(prompt, expected, response)
63 | 
64 | 
65 | def autoeval_from_expected_response(
66 |     row: pandas.core.series.Series, expected: str, prompt_column_name: str, response_column_name: str = "response"
67 | ):
68 |     prompt = row[prompt_column_name]
69 |     response = row[response_column_name]
70 |     return compute(prompt, expected, response)
71 | 


--------------------------------------------------------------------------------
/prompttools/utils/autoeval_scoring.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Hegel AI, Inc.
 2 | # All rights reserved.
 3 | #
 4 | # This source code's license can be found in the
 5 | # LICENSE file in the root directory of this source tree.
 6 | 
 7 | 
 8 | import os
 9 | import pandas.core.series
10 | import jinja2
11 | 
12 | try:
13 |     import anthropic
14 | except ImportError:
15 |     anthropic = None
16 | 
17 | 
18 | AUTO_EVAL_PROMPT_TEMPLATE = """
19 | {{HUMAN_PROMPT}} Given the fact {{fact}}
20 | 
21 | Evaluate the following Answer on a scale from 1 - 7. Please only respond with an integer from 1 - 7 with no other text.
22 | Lower score means the answer is factually wrong, higher score means the answer is correct. A medium score for
23 | uncertain but not wrong.
24 | 
25 | Answer: {{model_answer}}
26 | 
27 | {{AI_PROMPT}}
28 | """
29 | 
30 | 
31 | def _generate_auto_eval_prompt(fact: str, model_answer: str):
32 |     environment = jinja2.Environment()
33 |     template = environment.from_string(AUTO_EVAL_PROMPT_TEMPLATE)
34 |     auto_eval_prompt = template.render(
35 |         {
36 |             "HUMAN_PROMPT": anthropic.HUMAN_PROMPT,
37 |             "AI_PROMPT": anthropic.AI_PROMPT,
38 |             "fact": fact,
39 |             "model_answer": model_answer,
40 |         }
41 |     )
42 |     return auto_eval_prompt
43 | 
44 | 
45 | def compute(fact: str, model_answer: str, model: str = "claude-2") -> float:
46 |     r"""
47 |     Uses a high quality chat model, like claude-2, to automatically score a given
48 |     fact/response pair. Output should be an integer ranging from 1 - 7.
49 | 
50 |     Args:
51 |         fact (str): The fact (truth). The auto-eval model will judge how close the ``response`` is
52 |             from this fact (truth).
53 |         model_answer (str): The model response.
54 |         model (str): The model that will be judging how close is the response from the truth.
55 |             Defaults to Claude 2.
56 |     """
57 |     if not os.environ["ANTHROPIC_API_KEY"]:
58 |         raise RuntimeError("Missing API key for evaluation.")
59 |     client = anthropic.Anthropic(api_key=os.environ["ANTHROPIC_API_KEY"])
60 |     completion_response = client.completions.create(
61 |         max_tokens_to_sample=100, model=model, prompt=_generate_auto_eval_prompt(fact, model_answer)
62 |     )
63 |     return int(completion_response.completion)
64 | 
65 | 
66 | def autoeval_scoring(row: pandas.core.series.Series, expected: str, response_column_name: str = "response") -> float:
67 |     r"""
68 |     Uses auto-evaluation to score the model response.
69 | 
70 |     Args:
71 |         row (pandas.core.series.Series): A row of data from the full DataFrame (including input, model response, other
72 |             metrics, etc).
73 |         expected (str): the expected response
74 |         response_column_name (str): name of the column that contains the model's response, defaults to ``"response"``
75 |     """
76 |     if anthropic is None:
77 |         raise ModuleNotFoundError(
78 |             "Package `anthropic` is required to be installed to use this experiment."
79 |             "Please use `pip install anthropic` to install the package"
80 |         )
81 |     return compute(fact=expected, model_answer=row[response_column_name])
82 | 


--------------------------------------------------------------------------------
/prompttools/utils/autoeval_with_docs.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Hegel AI, Inc.
 2 | # All rights reserved.
 3 | #
 4 | # This source code's license can be found in the
 5 | # LICENSE file in the root directory of this source tree.
 6 | 
 7 | 
 8 | import os
 9 | import openai
10 | import pandas.core.series
11 | import jinja2
12 | from .error import PromptToolsUtilityError
13 | 
14 | 
15 | EVALUATION_SYSTEM_PROMPT = """
16 | Using the provided documents, determine whether or not the response is accurate.
17 | Your answer should be an integer rating from 0 to 10, with 0 being extremely inaccurate
18 | and 10 being perfectly accurate. Only an integer should be returned in the response.
19 | """
20 | 
21 | EVALUATION_USER_TEMPLATE = """
22 | DOCUMENTS:
23 | {{documents}}
24 | 
25 | RESPONSE: {{response}}
26 | ANSWER:
27 | """
28 | 
29 | 
30 | def _get_messages(documents: list[str], response: str):
31 |     environment = jinja2.Environment()
32 |     template = environment.from_string(EVALUATION_USER_TEMPLATE)
33 |     user_message = template.render({"documents": "\n".join(documents), "response": response})
34 |     return [
35 |         {"role": "system", "content": EVALUATION_SYSTEM_PROMPT},
36 |         {"role": "user", "content": user_message},
37 |     ]
38 | 
39 | 
40 | def compute(documents: list[str], response: str, model: str = "gpt-4") -> float:
41 |     r"""
42 |     Uses a high quality chat model, like GPT-4, to automatically evaluate a given
43 |     prompt/response pair. Outputs can be 0 or 1.
44 | 
45 |     Args:
46 |         documents (list[str]): documents to provide relevant context for the model to judge
47 |         model (str): The OpenAI chat model to use for generating an expected response.
48 |             Defaults to GPT-4.
49 |     """
50 |     if not os.environ["OPENAI_API_KEY"]:
51 |         raise PromptToolsUtilityError
52 |     evaluation = openai.chat.completions.create(model=model, messages=_get_messages(documents, response))
53 |     score_text = evaluation.choices[0].message.content
54 |     return int(score_text)
55 | 
56 | 
57 | def autoeval_with_documents(
58 |     row: pandas.core.series.Series,
59 |     documents: list[str],
60 |     response_column_name: str = "response",
61 | ) -> float:
62 |     r"""
63 |     Given a list of documents, score whether the model response is accurate with "gpt-4" as the judge,
64 |     returning an integer score from 0 to 10.
65 | 
66 |     Args:
67 |         row (pandas.core.series.Series): A row of data from the full DataFrame (including input, model response, other
68 |             metrics, etc).
69 |         documents (list[str]): documents to provide relevant context for the model to judge
70 |         response_column_name (str): name of the column that contains the model's response, defaults to ``"response"``
71 |     """
72 |     return compute(documents, row[response_column_name])
73 | 


--------------------------------------------------------------------------------
/prompttools/utils/chunk_text.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Hegel AI, Inc.
 2 | # All rights reserved.
 3 | #
 4 | # This source code's license can be found in the
 5 | # LICENSE file in the root directory of this source tree.
 6 | 
 7 | 
 8 | def chunk_text(text: str, max_chunk_length: int) -> list[str]:
 9 |     r"""
10 |     Given a long string paragraph of text and a chunk max length, returns chunks of texts where each chunk's
11 |     length is smaller than the max length, without breaking up individual words (separated by space).
12 | 
13 |     Args:
14 |         text (str): source text to be chunked
15 |         max_chunk_length (int): maximum length of a chunk
16 |     """
17 | 
18 |     words = text.split()
19 |     chunks = []
20 |     current_chunk = ""
21 | 
22 |     for word in words:
23 |         if len(current_chunk) + len(word) + 1 <= max_chunk_length:
24 |             if current_chunk:
25 |                 current_chunk += " "
26 |             current_chunk += word
27 |         else:
28 |             chunks.append(current_chunk)
29 |             current_chunk = word
30 | 
31 |     if current_chunk:
32 |         chunks.append(current_chunk)
33 | 
34 |     return chunks
35 | 


--------------------------------------------------------------------------------
/prompttools/utils/error.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Hegel AI, Inc.
 2 | # All rights reserved.
 3 | #
 4 | # This source code's license can be found in the
 5 | # LICENSE file in the root directory of this source tree.
 6 | 
 7 | 
 8 | class PromptToolsUtilityError(Exception):
 9 |     r"""
10 |     An exception to throw when something goes wrong with the prompttools utility.
11 |     """
12 | 
13 |     pass
14 | 


--------------------------------------------------------------------------------
/prompttools/utils/expected.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Hegel AI, Inc.
 2 | # All rights reserved.
 3 | #
 4 | # This source code's license can be found in the
 5 | # LICENSE file in the root directory of this source tree.
 6 | 
 7 | 
 8 | import os
 9 | import openai
10 | import pandas.core.series
11 | from .error import PromptToolsUtilityError
12 | from . import similarity
13 | 
14 | 
15 | def compute(prompt: str, model: str = "gpt-4") -> str:
16 |     r"""
17 |     Computes the expected result of a given prompt by using a high
18 |     quality LLM, like GPT-4.
19 | 
20 |     Args:
21 |         prompt (str): The input prompt.
22 |         model (str): The OpenAI chat model to use for generating an expected response.
23 |             Defaults to GPT-4.
24 |     """
25 |     if not os.environ["OPENAI_API_KEY"]:
26 |         raise PromptToolsUtilityError
27 |     response = openai.chat.completions.create(
28 |         model=model,
29 |         messages=[
30 |             {"role": "user", "content": prompt},
31 |         ],
32 |     )
33 |     return response.choices[0].message.content
34 | 
35 | 
36 | def evaluate(prompt: str, response: str, model: str = "gpt-4") -> str:
37 |     r"""
38 |     Computes the similarity of a given response to the expected result
39 |     generated from a high quality LLM (by default GPT-4) using the same prompt.
40 | 
41 |     Args:
42 |         prompt (str): The input prompt.
43 |         response (str): The model response.
44 |         model (str): The OpenAI chat model to use for generating an expected response.
45 |             Defaults to GPT-4.
46 |     """
47 |     expected_response = compute(prompt, model)
48 |     return similarity.compute(response, expected_response)
49 | 
50 | 
51 | def compute_similarity_against_model(
52 |     row: pandas.core.series.Series,
53 |     prompt_column_name: str,
54 |     model: str = "gpt-4",
55 |     response_column_name: str = "response",
56 | ) -> str:
57 |     r"""
58 |     Computes the similarity of a given response to the expected result
59 |     generated from a high quality LLM (by default GPT-4) using the same prompt.
60 | 
61 |     Args:
62 |         row (pandas.core.series.Series): A row of data from the full DataFrame (including input, model response, other
63 |             metrics, etc).
64 |         prompt_column_name (str): name of the column that contains the input prompt
65 |         model (str): name of the model that will serve as the judge
66 |         response_column_name (str): name of the column that contains the model's response, defaults to ``"response"``
67 |     """
68 | 
69 |     expected_response = compute(row[prompt_column_name], model)
70 |     return similarity.compute(row[response_column_name], expected_response)
71 | 


--------------------------------------------------------------------------------
/prompttools/utils/moderation.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Hegel AI, Inc.
 2 | # All rights reserved.
 3 | #
 4 | # This source code's license can be found in the
 5 | # LICENSE file in the root directory of this source tree.
 6 | 
 7 | 
 8 | import openai
 9 | import pandas
10 | from typing import Optional, Union
11 | 
12 | 
13 | def apply_moderation(
14 |     row: pandas.core.series.Series,
15 |     text_col_name: str = "response",
16 |     moderation_model: str = "text-moderation-latest",
17 |     category_names: Optional[list[str]] = None,
18 |     category_score_names: Optional[list[str]] = None,
19 | ) -> Union[bool, dict]:
20 |     r"""
21 |     Uses OpenAI's moderation API to determine whether the text complies with OpenAI's usage policies.
22 | 
23 |     Args:
24 |         row (pandas.core.series.Series): A row of data from the full DataFrame (including input, model response, other
25 |             metrics, etc).
26 |         text_col_name (str): column name of text to be moderated
27 |         moderation_model (str): name of the OpenAI moderation model, defaults to ``"text-moderation-latest"``
28 |         category_names (Optional[list[str]]): specify the names of category flags to extract from the response and
29 |             be added as column(s) in the row, optional. (e.g. ``["harassment", "violence"]``)
30 |         category_score_names (Optional[list[str]]): specify the names of category scores to extract from the response
31 |             and be added as column(s) in the row, optional. (e.g. ``["harassment", "violence"]``)
32 | 
33 |     Returns:
34 |         A boolean flag (of whether the input violates policies), or a dict with various topic specific flags/scores.
35 |     """
36 |     text = row[text_col_name]
37 | 
38 |     moderation_response = openai.moderations.create(model=moderation_model, input=text)
39 |     flagged = moderation_response.results[0].flagged
40 |     res = {}
41 |     if category_names:
42 |         category_flags = moderation_response.results[0].categories.model_dump()
43 |         for c in category_names:
44 |             res[c] = category_flags[c]
45 |     if category_score_names:
46 |         category_scores = moderation_response.results[0].category_scores.model_dump()
47 |         for c in category_score_names:
48 |             res[f"{c}_score"] = category_scores[c]
49 |     if category_names or category_score_names:
50 |         res["moderation_flag"] = flagged
51 |         return res
52 |     else:
53 |         return flagged
54 | 


--------------------------------------------------------------------------------
/prompttools/utils/ranking_correlation.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Hegel AI, Inc.
 2 | # All rights reserved.
 3 | #
 4 | # This source code's license can be found in the
 5 | # LICENSE file in the root directory of this source tree.
 6 | 
 7 | 
 8 | try:
 9 |     import scipy.stats as stats
10 | except ImportError:
11 |     stats = None
12 | import pandas
13 | 
14 | 
15 | def ranking_correlation(
16 |     row: pandas.core.series.Series, expected_ranking: list, ranking_column_name: str = "top doc ids"
17 | ) -> float:
18 |     r"""
19 |     A simple test that compares the expected ranking for a given query with the actual ranking produced
20 |     by the embedding function being tested.
21 | 
22 |     Args:
23 |         row (pandas.core.series.Series): A row of data from the full DataFrame (including input, model response, other
24 |             metrics, etc).
25 |         expected_ranking (list): the expected list of ranking to compare
26 |         ranking_column_name (str): the column name of the actual ranking produced by the model,
27 |             defaults to ``"top doc ids"``
28 | 
29 |     Example:
30 |         >>> EXPECTED_RANKING_LIST = [
31 |         >>>     ["id1", "id3", "id2"],
32 |         >>>     ["id2", "id3", "id1"],
33 |         >>>     ["id1", "id3", "id2"],
34 |         >>>     ["id2", "id3", "id1"],
35 |         >>> ]
36 |         >>> experiment.evaluate("ranking_correlation", ranking_correlation, expected_ranking=EXPECTED_RANKING_LIST)
37 |     """
38 |     if stats is None:
39 |         raise ModuleNotFoundError(
40 |             "Package `SciPy` is required to be installed to use this evaluation method."
41 |             "Please use `pip install scipy` to install the package"
42 |         )
43 |     actual_ranking = row[ranking_column_name]
44 |     if len(expected_ranking) == 1 and len(actual_ranking) == 1:
45 |         return 1.0 if expected_ranking == actual_ranking else -1.0
46 |     correlation, _ = stats.spearmanr(actual_ranking, expected_ranking)
47 |     return correlation
48 | 


--------------------------------------------------------------------------------
/prompttools/utils/validate_json.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) Hegel AI, Inc.
  2 | # All rights reserved.
  3 | #
  4 | # This source code's license can be found in the
  5 | # LICENSE file in the root directory of this source tree.
  6 | 
  7 | 
  8 | from typing import Callable, Dict, List, Optional
  9 | import pandas.core.series
 10 | import json
 11 | import re
 12 | 
 13 | KEY_EXTRACTION_REGEX = r'"([^"]+?)"\s*:'
 14 | 
 15 | 
 16 | def strip_outer_brackets(text: str) -> str:
 17 |     r"""
 18 |     Removes all chars outside the first '{' and the last '}'. Intended to be a pre-processing
 19 |     step prior to parsing a string as JSON.
 20 | 
 21 |     Args:
 22 |         text(str): the text to process
 23 |     """
 24 |     first_brace = text.find("{")
 25 |     last_brace = text.rfind("}")
 26 |     return text[first_brace : last_brace + 1]
 27 | 
 28 | 
 29 | def sample_pre_process_fn(text: str):
 30 |     r"""
 31 |     An example pre-processing that you may use before attempting to parse a string as JSON.
 32 |     This function removes all chars outside the first '{' and the last '}'. Then,
 33 |     it removes ``"\\n"``.
 34 | 
 35 |     This function should be modified depending on your LLM's output.
 36 | 
 37 |     Args:
 38 |         text(str): the text to process
 39 |     """
 40 |     text = strip_outer_brackets(text)
 41 |     text = text.replace("\\n", "")
 42 |     return text
 43 | 
 44 | 
 45 | def validate(text: str, pre_process_fn: Optional[Callable] = None):
 46 |     r"""
 47 |     Validates that the generated text is JSON.
 48 | 
 49 |     Args:
 50 |         text (str): The generated text, which should be valid JSON.
 51 |         pre_process_fn (Callable[str, str]): a function to pre-process the text response from the LLM before attempting
 52 |             to parse the string as JSON. Look at ``validate_json.sample_pre_process_fn`` as an example.
 53 |     """
 54 |     if pre_process_fn:
 55 |         text = pre_process_fn(text)
 56 |     try:
 57 |         json.loads(text)
 58 |     except ValueError:
 59 |         return 0.0
 60 |     return 1.0
 61 | 
 62 | 
 63 | def validate_keys(text: str, valid_keys: List[str]):
 64 |     r"""
 65 |     Guarantees that all keys in the generated JSON are valid.
 66 | 
 67 |     Args:
 68 |         text (str): The generated text, which should be valid JSON.
 69 |         valid_keys (List[str]): A list of valid keys which may appear in the JSON.
 70 |     """
 71 |     match = re.search(text, KEY_EXTRACTION_REGEX)
 72 |     for group in match.groups():
 73 |         if group not in valid_keys:
 74 |             return 0.0
 75 |     return 1.0
 76 | 
 77 | 
 78 | def validate_json_response(row: pandas.core.series.Series, response_column_name: str = "response") -> float:
 79 |     r"""
 80 |     Validate whether ``response`` string is in a valid JSON format.
 81 | 
 82 |     Args:
 83 |         row (pandas.core.series.Series): A row of data from the full DataFrame (including input, model response, other
 84 |             metrics, etc).
 85 |         response_column_name (str): name of the column that contains the model's response, defaults to ``"response"``
 86 |     """
 87 |     return validate(row[response_column_name])
 88 | 
 89 | 
 90 | def evaluate(prompt: str, response: str, metadata: Dict) -> float:
 91 |     r"""
 92 |     Validate whether ``response`` string is in a valid JSON format.
 93 | 
 94 |     Args:
 95 |         prompt (str): Not used.
 96 |         response (str): the string that will be validated
 97 |         metadata (dict): Not used.
 98 |     """
 99 |     return validate(response)
100 | 


--------------------------------------------------------------------------------
/prompttools/utils/validate_python.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Hegel AI, Inc.
 2 | # All rights reserved.
 3 | #
 4 | # This source code's license can be found in the
 5 | # LICENSE file in the root directory of this source tree.
 6 | 
 7 | 
 8 | import os
 9 | from typing import Dict
10 | import pandas.core.series
11 | from .error import PromptToolsUtilityError
12 | 
13 | try:
14 |     from pylint import epylint as lint
15 | except ImportError:
16 |     lint = None
17 | 
18 | PROMPTTOOLS_TMP = "prompttools_tmp.py"
19 | 
20 | 
21 | def validate(text: str):
22 |     r"""
23 |     Validates that the generated text is python.
24 | 
25 |     Args:
26 |         text (str): The generated text, which should be valid python.
27 |     """
28 |     if lint is None:
29 |         raise RuntimeError(
30 |             "Our built-in `validate_python` function requires pylint<3.0. Please use a custom eval function."
31 |             "Feel free to open a GitHub issue or PR."
32 |         )
33 |     if os.path.isfile(PROMPTTOOLS_TMP):
34 |         raise PromptToolsUtilityError
35 |     with open(PROMPTTOOLS_TMP, "w") as f:
36 |         f.write(text)
37 |     pylint_stdout, _ = lint.py_run(PROMPTTOOLS_TMP, return_std=True)
38 |     os.remove(PROMPTTOOLS_TMP)
39 |     return 0.0 if "error" in pylint_stdout.getvalue() else 1.0
40 | 
41 | 
42 | def validate_python_response(row: pandas.core.series.Series, response_column_name: str = "response") -> float:
43 |     r"""
44 |     Validate whether ``response`` string follows Python's syntax.
45 | 
46 |     Args:
47 |         row (pandas.core.series.Series): A row of data from the full DataFrame (including input, model response, other
48 |             metrics, etc).
49 |         response_column_name (str): name of the column that contains the model's response, defaults to ``"response"``
50 |     """
51 |     return validate(row[response_column_name])
52 | 
53 | 
54 | def evaluate(prompt: str, response: str, metadata: Dict) -> float:
55 |     r"""
56 |     Validate whether ``response`` string follows Python's syntax.
57 | 
58 |     Args:
59 |         prompt (str): Not used.
60 |         response (str): the string that will be validated
61 |         metadata (dict): Not used.
62 |     """
63 |     return validate(response)
64 | 


--------------------------------------------------------------------------------
/prompttools/version.py:
--------------------------------------------------------------------------------
1 | __version__ = "0.0.46a0+5a80732"
2 | git_version = "5a807328435d269d7ed17b53f86283e116e08244"
3 | 


--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
 1 | [build-system]
 2 | requires = ["setuptools>=61.0"]
 3 | build-backend = "setuptools.build_meta"
 4 | 
 5 | [project]
 6 | name = "prompttools"
 7 | version = "0.0.46"
 8 | authors = [
 9 |   { name="Hegel AI", email="team@hegel-ai.com" },
10 | ]
11 | description = "Tools for LLM prompt testing and experimentation"
12 | readme = "README.md"
13 | requires-python = ">=3.10"
14 | classifiers = [
15 |     "Programming Language :: Python :: 3",
16 |     "License :: OSI Approved :: Apache Software License",
17 |     "Operating System :: OS Independent",
18 | ]
19 | 
20 | dynamic = ["dependencies", "license"]
21 | 
22 | [project.urls]
23 | "Homepage" = "https://github.com/hegelai/prompttools"
24 | "Bug Tracker" = "https://github.com/hegelai/prompttools"
25 | 
26 | [tool.setuptools.dynamic]
27 | dependencies = {file = ["requirements.txt"]}
28 | 
29 | [tool.black]
30 | line-length = 120
31 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | openai
 2 | tenacity
 3 | tabulate
 4 | pandas
 5 | jinja2
 6 | jupyterlab
 7 | ipywidgets
 8 | pylint
 9 | sentry-sdk>=1.23.0
10 | 


--------------------------------------------------------------------------------
/scripts/create_comment.py:
--------------------------------------------------------------------------------
 1 | from prompttools.experiment import OpenAIChatExperiment
 2 | from prompttools.selector.prompt_selector import PromptSelector
 3 | 
 4 | PROMPTTOOLS_MD_TMP = "markdown.md"
 5 | 
 6 | selectors = [
 7 |     PromptSelector("You are a helpful assistant.", "Is 17077 a prime number?"),
 8 |     PromptSelector("You are a math tutor.", "Is 17077 a prime number?"),
 9 | ]
10 | models = ["gpt-3.5-turbo", "gpt-4"]
11 | temperatures = [0.0]
12 | openai_experiment = OpenAIChatExperiment(models, selectors, temperature=temperatures)
13 | openai_experiment.run()
14 | 
15 | markdown = openai_experiment.to_markdown()
16 | with open(PROMPTTOOLS_MD_TMP, "w") as f:
17 |     f.write(markdown)
18 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | 
  3 | import distutils.command.clean
  4 | import os
  5 | import shutil
  6 | import subprocess
  7 | 
  8 | from pathlib import Path
  9 | 
 10 | from setuptools import find_packages, setup
 11 | 
 12 | ROOT_DIR = Path(__file__).parent.resolve()
 13 | 
 14 | 
 15 | def _get_requirements():
 16 |     """Get dependency requirements from `requirements.txt`."""
 17 |     req_list = []
 18 |     with Path("requirements.txt").open("r") as f:
 19 |         for line in f:
 20 |             req = line.strip()
 21 |             if len(req) == 0 or req.startswith("#"):
 22 |                 continue
 23 |             req_list.append(req)
 24 |     return req_list
 25 | 
 26 | 
 27 | def _get_version():
 28 |     """Get package version."""
 29 |     # with open(os.path.join(ROOT_DIR, "version.txt")) as f:
 30 |     #     version = f.readline().strip()
 31 |     version = "0.0.46a0"
 32 | 
 33 |     sha = "Unknown"
 34 |     try:
 35 |         sha = subprocess.check_output(["git", "rev-parse", "HEAD"], cwd=str(ROOT_DIR)).decode("ascii").strip()
 36 |     except Exception:
 37 |         pass
 38 | 
 39 |     os_build_version = os.getenv("BUILD_VERSION")
 40 |     if os_build_version:
 41 |         version = os_build_version
 42 |     elif sha != "Unknown":
 43 |         version += "+" + sha[:7]
 44 | 
 45 |     return version, sha
 46 | 
 47 | 
 48 | def _export_version(version, sha):
 49 |     version_path = ROOT_DIR / "prompttools" / "version.py"
 50 |     with open(version_path, "w") as f:
 51 |         f.write(f"__version__ = '{version}'\n")
 52 |         f.write(f"git_version = {repr(sha)}\n")
 53 | 
 54 | 
 55 | requirements = _get_requirements()
 56 | 
 57 | 
 58 | class Clean(distutils.command.clean.clean):
 59 |     def run(self):
 60 |         # Run default behavior first
 61 |         distutils.command.clean.clean.run(self)
 62 | 
 63 |         # Remove prompttools extension
 64 |         def remove_extension(pattern):
 65 |             for path in (ROOT_DIR / "prompttools").glob(pattern):
 66 |                 print(f"removing extension '{path}'")
 67 |                 path.unlink()
 68 | 
 69 |         for ext in ["so", "dylib", "pyd"]:
 70 |             remove_extension("**/*." + ext)
 71 | 
 72 |         # Remove build directory
 73 |         build_dirs = [
 74 |             ROOT_DIR / "build",  # Remove build
 75 |             ROOT_DIR / "prompttools.egg-info",  # Remove egg metadata
 76 |         ]
 77 |         for path in build_dirs:
 78 |             if path.exists():
 79 |                 print(f"removing '{path}' (and everything under it)")
 80 |                 shutil.rmtree(str(path), ignore_errors=True)
 81 | 
 82 | 
 83 | if __name__ == "__main__":
 84 |     VERSION, SHA = _get_version()
 85 |     # TODO: Exporting the version here breaks `python -m build`
 86 |     # _export_version(VERSION, SHA)
 87 | 
 88 |     print("-- Building version " + VERSION)
 89 | 
 90 |     setup(
 91 |         # Metadata
 92 |         name="prompttools",
 93 |         version=VERSION,
 94 |         description="Tools for prompts.",
 95 |         long_description=Path("README.md").read_text(encoding="utf-8"),
 96 |         long_description_content_type="text/markdown",
 97 |         url="https://github.com/hegelai/prompttools",
 98 |         author="Hegel AI",
 99 |         author_email="steve@hegel-ai.com, kevin@hegel-ai.com",
100 |         license="Proprietary",
101 |         install_requires=requirements,
102 |         python_requires=">=3.10",
103 |         classifiers=[
104 |             "Intended Audience :: Developers",
105 |             "Intended Audience :: Science/Research",
106 |             "Operating System :: MacOS :: MacOS X",
107 |             "Operating System :: Microsoft :: Windows",
108 |             "Programming Language :: Python :: 3.10",
109 |             "Programming Language :: Python :: 3.11",
110 |             "Programming Language :: Python :: Implementation :: CPython",
111 |             "Topic :: Scientific/Engineering :: Artificial Intelligence",
112 |         ],
113 |         # Package Info
114 |         packages=find_packages(exclude=["test*", "examples*", "build*"]),
115 |         zip_safe=False,
116 |         cmdclass={
117 |             "clean": Clean,
118 |         },
119 |     )
120 | 


--------------------------------------------------------------------------------
/test/app.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Hegel AI, Inc.
 2 | # All rights reserved.
 3 | #
 4 | # This source code's license can be found in the
 5 | # LICENSE file in the root directory of this source tree.
 6 | 
 7 | 
 8 | r"""
 9 | App for local testing of logger
10 | """
11 | 
12 | from flask import Flask, request
13 | import time
14 | 
15 | app = Flask(__name__)
16 | 
17 | 
18 | @app.route("/", methods=["POST"])
19 | def process_request():
20 |     time.sleep(0.1)
21 |     data = request.json
22 |     print(f"Request received and processed {data}.")
23 |     return "", 200
24 | 
25 | 
26 | if __name__ == "__main__":
27 |     app.run(debug=True)
28 | 


--------------------------------------------------------------------------------
/test/requirements.txt:
--------------------------------------------------------------------------------
1 | sentence_transformers
2 | 


--------------------------------------------------------------------------------
/test/test_experiment.py:
--------------------------------------------------------------------------------
 1 | from unittest import TestCase
 2 | 
 3 | from prompttools.experiment import (
 4 |     LlamaCppExperiment,
 5 |     HuggingFaceHubExperiment,
 6 |     OpenAIChatExperiment,
 7 |     OpenAICompletionExperiment,
 8 | )
 9 | 
10 | 
11 | class TestExperiment(TestCase):
12 |     # TODO: Currently, it only ensures importing is correct.
13 |     #       Add unit tests to verify initialization.
14 |     def test_llama_cpp_experiment(self):
15 |         pass
16 | 
17 |     def test_hugging_face_experiment(self):
18 |         pass
19 | 
20 |     def test_openai_chat_experiment(self):
21 |         pass
22 | 
23 |     def test_openai_completion_experiment(self):
24 |         pass
25 | 


--------------------------------------------------------------------------------
/test/test_harness.py:
--------------------------------------------------------------------------------
 1 | from unittest import TestCase
 2 | 
 3 | from prompttools.harness import (
 4 |     ChatHistoryExperimentationHarness,
 5 |     ChatModelComparisonHarness,
 6 |     PromptTemplateExperimentationHarness,
 7 |     SystemPromptExperimentationHarness,
 8 | )
 9 | 
10 | 
11 | class TestHarness(TestCase):
12 |     # TODO: Currently, it only ensures importing is correct.
13 |     #       Add unit tests to verify initialization.
14 |     def test_chat_history_exp_harness(self):
15 |         pass
16 | 
17 |     def test_chat_model_exp_harness(self):
18 |         pass
19 | 
20 |     def test_prrmpt_template_exp_harness(self):
21 |         pass
22 | 
23 |     def test_system_prompt_exp_harness(self):
24 |         pass
25 | 


--------------------------------------------------------------------------------
/test/test_logger.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Hegel AI, Inc.
 2 | # All rights reserved.
 3 | #
 4 | # This source code's license can be found in the
 5 | # LICENSE file in the root directory of this source tree.
 6 | 
 7 | 
 8 | if False:  # Skipping this in CI
 9 |     import openai
10 |     import prompttools.logger  # noqa: F401 Importing this line will monkey-patch `openai.chat.completions.create`
11 | 
12 | 
13 | r"""
14 | Example of using `prompttools.logger`.
15 | 
16 | All you need to do is call `import prompttools.logger` to start logging.
17 | You can optionally add `hegel_model` to your call (as seen below). This will associate
18 | this call with a specific name in the logs.
19 | 
20 | The OpenAI call is unchanged, it executes normally between your machine and OpenAI's server.
21 | 
22 | Note:
23 | You should have "HEGELAI_API_KEY" and "OPENAI_API_KEY" loaded into `os.environ`.
24 | """
25 | 
26 | if __name__ == "__main__":
27 |     if False:  # Skipping this in CI
28 |         for i in range(1):
29 |             messages = [
30 |                 {"role": "user", "content": f"What is 1 + {i}?"},
31 |             ]
32 | 
33 |             # `hegel_model` is an optional argument that allows you to tag your call with a specific name
34 |             # Logging still works without this argument
35 |             # The rest of the OpenAI call happens as normal between your machine and OpenAI's server
36 |             openai_response = openai.chat.completions.create(
37 |                 model="gpt-3.5-turbo", messages=messages, hegel_model="Math Model"
38 |             )
39 |             print(f"{openai_response = }")
40 | 
41 |         print("End")
42 | 


--------------------------------------------------------------------------------
/version.txt:
--------------------------------------------------------------------------------
1 | 0.0.46a0
2 | 


--------------------------------------------------------------------------------