├── .env.example ├── .flake8 ├── .github ├── CONTRIBUTING.md └── workflows │ ├── linkcheck.yml │ ├── lint.yml │ ├── release.yml │ └── test.yml ├── .gitignore ├── .readthedocs.yaml ├── LICENSE ├── Makefile ├── README.md ├── docs ├── Makefile ├── _static │ ├── logo_small.png │ └── tradeoff.png ├── conf.py ├── evaluations │ ├── extending_evals.md │ └── openai_evals.md ├── extend │ ├── custom_metrics.md │ └── custom_optims.md ├── getting_started │ ├── cli.md │ ├── getting_started.md │ ├── installation.md │ └── metrics.md ├── index.rst ├── make.bat ├── reference.rst ├── requirements.txt └── theory │ └── cost_performance_tradeoff.md ├── evaluations ├── README.md ├── artifacts │ ├── % Tokens Reduced_graph.png │ ├── LogiQA Accuracy_graph.png │ ├── USD Saved Per $100_graph.png │ ├── kevin.gif │ ├── logo.png │ ├── logo_small.png │ ├── table.md │ └── tradeoff.png ├── compute_metric.py ├── eval.py ├── logs │ ├── Autocorrect_Optim.jsonl │ ├── Default.jsonl │ ├── Entropy_Optim_p_0.05.jsonl │ ├── Entropy_Optim_p_0.1.jsonl │ ├── Entropy_Optim_p_0.25.jsonl │ ├── Entropy_Optim_p_0.5.jsonl │ ├── Lemmatizer_Optim.jsonl │ ├── NameReplace_Optim.jsonl │ ├── Pulp_Optim_p_0.05.jsonl │ ├── Pulp_Optim_p_0.1.jsonl │ ├── Punctuation_Optim.jsonl │ ├── Stemmer_Optim.jsonl │ └── SynonymReplace_Optim_p_1.0.jsonl ├── make_artifacts.py ├── make_errors.py ├── results.csv ├── results │ ├── Autocorrect_Optim.jsonl │ ├── Default.jsonl │ ├── Entropy_Optim_p_0.05.jsonl │ ├── Entropy_Optim_p_0.1.jsonl │ ├── Entropy_Optim_p_0.25.jsonl │ ├── Entropy_Optim_p_0.5.jsonl │ ├── Lemmatizer_Optim.jsonl │ ├── NameReplace_Optim.jsonl │ ├── Pulp_Optim_p_0.05.jsonl │ ├── Pulp_Optim_p_0.1.jsonl │ ├── Punctuation_Optim.jsonl │ ├── Stemmer_Optim.jsonl │ └── SynonymReplace_Optim_p_1.0.jsonl ├── sample_logs │ └── generate_db.py ├── tradeoff.py └── utils.py ├── examples ├── bertscore_metric.py ├── cli │ ├── data │ │ └── example.jsonl │ ├── json_stopwordoptim.sh │ └── string_stopwordoptim.sh ├── entropy_optimizer.py ├── json_support.py ├── langchain_support.py ├── protect_tags.py └── sequential.py ├── poetry.lock ├── prompt_optimizer ├── __init__.py ├── cli │ ├── __init__.py │ └── main.py ├── metric │ ├── __init__.py │ ├── base.py │ ├── bertscore_metric.py │ └── token_metric.py ├── poptim │ ├── __init__.py │ ├── autocorrect_optim.py │ ├── base.py │ ├── entropy_optim.py │ ├── lemmatizer_optim.py │ ├── logger.py │ ├── name_replace_optim.py │ ├── pulp_optim.py │ ├── punctuation_optim.py │ ├── sequential.py │ ├── stemmer_optim.py │ ├── stop_word_optim.py │ ├── synonym_replace_optim.py │ └── utils.py ├── visualize │ ├── __init__.py │ └── stringdiffer.py └── wrapper │ ├── __init__.py │ ├── base.py │ ├── openai.py │ └── sql_db.py ├── pyproject.toml └── tests ├── __init__.py └── unit_tests ├── __init__.py ├── data ├── prompt1.txt └── prompt2.txt ├── test_autocorrect_optim.py ├── test_entropy_optim.py ├── test_lemmatizer_optim.py ├── test_name_replace_optim.py ├── test_openai_wrapper.py ├── test_protected_tags.py ├── test_punctuation_optim.py ├── test_sequential.py ├── test_sql_db.py ├── test_stop_word_optim.py ├── test_synonym_replace_optim.py └── utils.py /.env.example: -------------------------------------------------------------------------------- 1 | OPENAI_API_KEY= -------------------------------------------------------------------------------- /.flake8: -------------------------------------------------------------------------------- 1 | [flake8] 2 | count = True 3 | statistics = True 4 | max-line-length = 88 5 | ignore = E731,W503,E203,E741 -------------------------------------------------------------------------------- /.github/CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # Contributing to PromptOptimizer 2 | 3 | Thank you for considering contributing to PromptOptimizer. 4 | To contribute to this project, please follow a ["fork and pull request"](https://docs.github.com/en/get-started/quickstart/contributing-to-projects) workflow. 5 | 6 | ## Contributing Guidelines 7 | 8 | ### GitHub Issues Format 9 | TBD 10 | 11 | 12 | ## Quick Start 13 | 14 | This project uses [Poetry](https://python-poetry.org/) as a dependency manager. Check out Poetry's [documentation on how to install it](https://python-poetry.org/docs/#installation) on your system before proceeding. 15 | To install requirements: 16 | 17 | ```bash 18 | poetry install -E all 19 | ``` 20 | 21 | This will install all requirements for running the package, examples, linting, formatting, tests, and coverage. Note the `-E all` flag will install all optional dependencies necessary for integration testing. 22 | 23 | ❗Note: If you're running Poetry 1.4.1 and receive a `WheelFileValidationError` for `debugpy` during installation, you can try either downgrading to Poetry 1.4.0 or disabling "modern installation" (`poetry config installer.modern-installation false`) and re-install requirements. See [this `debugpy` issue](https://github.com/microsoft/debugpy/issues/1246) for more details. 24 | 25 | Now, you should be able to run the common tasks in the following section. 26 | 27 | ## Common Tasks 28 | 29 | Type `make` for a list of common tasks. 30 | 31 | ### Code Formatting 32 | 33 | Formatting for this project is done via a combination of [Black](https://black.readthedocs.io/en/stable/) and [isort](https://pycqa.github.io/isort/). 34 | 35 | To run formatting for this project: 36 | 37 | ```bash 38 | make format 39 | ``` 40 | 41 | ### Linting 42 | 43 | Linting for this project is done via a combination of [Black](https://black.readthedocs.io/en/stable/). 44 | 45 | To run linting for this project: 46 | 47 | ```bash 48 | make lint 49 | ``` 50 | 51 | ### Testing 52 | 53 | Unit tests cover modular logic that does not require calls to outside APIs. 54 | 55 | To run unit tests: 56 | 57 | ```bash 58 | make test 59 | ``` 60 | 61 | If you add new logic, please add a unit test. 62 | 63 | ## Documentation 64 | 65 | ### Contribute Documentation 66 | 67 | Docs are largely autogenerated by [sphinx](https://www.sphinx-doc.org/en/master/) from the code. 68 | 69 | For that reason, we ask that you add good documentation to all classes and methods. 70 | 71 | 72 | ### Build Documentation Locally 73 | 74 | Before building the documentation, it is always a good idea to clean the build directory: 75 | 76 | ```bash 77 | make docs_clean 78 | ``` 79 | 80 | Next, you can run the linkchecker to make sure all links are valid: 81 | 82 | ```bash 83 | make docs_linkcheck 84 | ``` 85 | 86 | Finally, you can build the documentation as outlined below: 87 | 88 | ```bash 89 | make docs_build 90 | ``` 91 | 92 | # Extension contribution 93 | Apart from improving, fixing and optimizing the code. There are three extensions possible for contributions: 94 | 95 | ## More Evaluations 96 | There is no one prompt optimizer that works for all tasks. Extending evaluations by introducing more tasks will help choosing the right optimizer for the right task. 97 | [Evaluations](https://github.com/vaibkumr/prompt-optimizer/tree/master/evaluations) directory of our project can be used as a motivation to design evaluations and run batch evaluation experiments for various optimizers. This is different from metrics which are used to measure optimization quality on-the-go to decide if the optimized prompt should be used or not. Evaluations run over a set of LLM taks with ideal responses to evaluate the quality of optimizations. In simple words, we have the input and label (ideal response from LLMs) for these evaluations while for metrics, we only have the input (before and after optimizations). 98 | 99 | 100 | ## More Optimizers 101 | Certainly more and better optimizers are possible. Please see [custom optimizers](.) for a guide on how to create custom new optimizers using this library. 102 | 103 | 104 | ## More Metrics 105 | Better metrics are possible to evaluate prompt optimization qualities. Please see [custom metrics](.) for a guide on how to create custom new metrics using this library. 106 | -------------------------------------------------------------------------------- /.github/workflows/linkcheck.yml: -------------------------------------------------------------------------------- 1 | name: linkcheck 2 | 3 | on: 4 | push: 5 | branches: [master] 6 | pull_request: 7 | 8 | env: 9 | POETRY_VERSION: "1.3.1" 10 | 11 | jobs: 12 | build: 13 | runs-on: ubuntu-latest 14 | strategy: 15 | matrix: 16 | python-version: 17 | - "3.11" 18 | steps: 19 | - uses: actions/checkout@v3 20 | - name: Install poetry 21 | run: | 22 | pipx install poetry==$POETRY_VERSION 23 | - name: Set up Python ${{ matrix.python-version }} 24 | uses: actions/setup-python@v4 25 | with: 26 | python-version: ${{ matrix.python-version }} 27 | cache: poetry 28 | - name: Install dependencies 29 | run: | 30 | poetry install --with docs 31 | - name: Build the docs 32 | run: | 33 | make docs_build 34 | - name: Analyzing the docs with linkcheck 35 | run: | 36 | make docs_linkcheck -------------------------------------------------------------------------------- /.github/workflows/lint.yml: -------------------------------------------------------------------------------- 1 | name: lint 2 | 3 | on: 4 | push: 5 | branches: [master] 6 | pull_request: 7 | 8 | env: 9 | POETRY_VERSION: "1.3.1" 10 | 11 | jobs: 12 | build: 13 | runs-on: ubuntu-latest 14 | strategy: 15 | matrix: 16 | python-version: 17 | - "3.8" 18 | - "3.9" 19 | - "3.10" 20 | - "3.11" 21 | steps: 22 | - uses: actions/checkout@v3 23 | - name: Install poetry 24 | run: | 25 | pipx install poetry==$POETRY_VERSION 26 | - name: Set up Python ${{ matrix.python-version }} 27 | uses: actions/setup-python@v4 28 | with: 29 | python-version: ${{ matrix.python-version }} 30 | cache: poetry 31 | - name: Install dependencies 32 | run: | 33 | poetry install 34 | - name: Analysing the code with our lint 35 | run: | 36 | make lint -------------------------------------------------------------------------------- /.github/workflows/release.yml: -------------------------------------------------------------------------------- 1 | name: release 2 | 3 | on: 4 | pull_request: 5 | types: 6 | - closed 7 | branches: 8 | - master 9 | paths: 10 | - 'pyproject.toml' 11 | 12 | env: 13 | POETRY_VERSION: "1.4.2" 14 | 15 | jobs: 16 | pypi: 17 | runs-on: ubuntu-latest 18 | steps: 19 | - uses: actions/checkout@v3 20 | - name: Install poetry 21 | run: pipx install poetry==$POETRY_VERSION 22 | - name: Set up Python 3.10 23 | uses: actions/setup-python@v4 24 | with: 25 | python-version: "3.10" 26 | cache: "poetry" 27 | - name: Build project for distribution 28 | run: poetry build 29 | - name: Check Version 30 | id: check-version 31 | run: | 32 | echo version=$(poetry version --short) >> $GITHUB_OUTPUT 33 | - name: Create Release 34 | uses: ncipollo/release-action@v1 35 | with: 36 | artifacts: "dist/*" 37 | token: ${{ secrets.GITHUB_TOKEN }} 38 | draft: false 39 | generateReleaseNotes: true 40 | tag: v${{ steps.check-version.outputs.version }} 41 | commit: master 42 | - name: Publish to PyPI 43 | env: 44 | POETRY_PYPI_TOKEN_PYPI: ${{ secrets.PYPI_API_TOKEN }} 45 | run: | 46 | poetry publish -------------------------------------------------------------------------------- /.github/workflows/test.yml: -------------------------------------------------------------------------------- 1 | name: test 2 | 3 | on: 4 | push: 5 | branches: [master] 6 | pull_request: 7 | 8 | env: 9 | POETRY_VERSION: "1.3.1" 10 | 11 | jobs: 12 | build: 13 | runs-on: ubuntu-latest 14 | strategy: 15 | matrix: 16 | python-version: 17 | - "3.8" 18 | - "3.9" 19 | - "3.10" 20 | - "3.11" 21 | steps: 22 | - uses: actions/checkout@v3 23 | - name: Install poetry 24 | run: pipx install poetry==$POETRY_VERSION 25 | - name: Set up Python ${{ matrix.python-version }} 26 | uses: actions/setup-python@v4 27 | with: 28 | python-version: ${{ matrix.python-version }} 29 | cache: "poetry" 30 | - name: Install dependencies 31 | run: poetry install 32 | - name: Run unit tests 33 | run: | 34 | make test -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | notes.md 2 | __pycache__ 3 | *.pyc 4 | dist 5 | docs/_build 6 | *_cache 7 | temp/ 8 | .env -------------------------------------------------------------------------------- /.readthedocs.yaml: -------------------------------------------------------------------------------- 1 | # Read the Docs configuration file 2 | # See https://docs.readthedocs.io/en/stable/config-file/v2.html for details 3 | 4 | # Required 5 | version: 2 6 | 7 | # Set the version of Python and other tools you might need 8 | build: 9 | os: ubuntu-22.04 10 | tools: 11 | python: "3.11" 12 | 13 | # Build documentation in the docs/ directory with Sphinx 14 | sphinx: 15 | configuration: docs/conf.py 16 | 17 | # If using Sphinx, optionally build your docs in additional formats such as PDF 18 | # formats: 19 | # - pdf 20 | 21 | # Optionally declare the Python requirements required to build your docs 22 | python: 23 | install: 24 | - requirements: docs/requirements.txt 25 | - method: pip 26 | path: . -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License 2 | 3 | Copyright (c) Harrison Chase 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in 13 | all copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 21 | THE SOFTWARE. -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | docs_build: 2 | cd docs && poetry run make html 3 | 4 | docs_clean: 5 | cd docs && poetry run make clean 6 | 7 | 8 | docs_linkcheck: 9 | poetry run linkchecker docs/_build/html/index.html 10 | 11 | lint lint_diff: 12 | poetry run isort prompt_optimizer/ 13 | poetry run black prompt_optimizer/ 14 | poetry run ruff prompt_optimizer/ --fix 15 | 16 | test: 17 | poetry run pytest tests/unit_tests 18 | 19 | help: 20 | @echo '----' 21 | @echo 'docs_build - build the sphinx documentation' 22 | @echo 'docs_clean - clean the documentation build artifacts' 23 | @echo 'test - run unit tests' -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 |
2 | 3 | ## PromptOptimizer 4 | 5 | kevin inspired logo 6 | 7 | Minimize LLM token complexity to save API costs and model computations. 8 | 9 |
10 |
11 | 12 | [![lint](https://github.com/vaibkumr/prompt-optimizer/actions/workflows/lint.yml/badge.svg)](https://github.com/vaibkumr/prompt-optimizer/actions/workflows/lint.yml) 13 | [![test](https://github.com/vaibkumr/prompt-optimizer/actions/workflows/test.yml/badge.svg)](https://github.com/vaibkumr/prompt-optimizer/actions/workflows/test.yml) 14 | [![linkcheck](https://github.com/vaibkumr/prompt-optimizer/actions/workflows/linkcheck.yml/badge.svg)](https://github.com/vaibkumr/prompt-optimizer/actions/workflows/linkcheck.yml) 15 | [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT) 16 | 17 | [Docs](https://promptoptimizer.readthedocs.io/en/latest/) 18 | 19 |
20 | 21 | 22 | # Features 23 | - **Plug and Play Optimizers:** Minimize token complexity using optimization methods without any access to weights, logits or decoding algorithm. Directly applicable to virtually all NLU systems. 24 | - **Protected Tags:** Special protected tags to mark important sections of prompt that should not be removed/modified. 25 | - **Sequential Optimization:** Chain different optimizers together sequentially. 26 | - **Optimization Metrics:** Number of tokens reduced and semantic similarity before and after optimization. 27 | - **Langhcain and JSON Support:** Supports langchain style prompt chains and OpenAI request JSON Object. 28 | 29 | # Why? 30 | - **Minimize Token Complexity:** Token Complexity is the amount of prompt tokens required to achieve a given task. Reducing token complexity corresponds to linearly reducing API costs and quadratically reducing computational complexity of usual transformer models. 31 | - **Save Money:** For large businesses, saving 10% on token count can lead to saving 100k USD per 1M USD. 32 | - **Extend Limitations:** Some models have small context lengths, prompt optimizers can help them process larger than context documents. 33 | 34 | | Prompt | # Tokens | Correct Response? | 35 | | ------------------------------------------------------- | ---------- | ------------------- | 36 | | Who is the president of the United States of America? | 11 | ✅ | 37 | | Who president US | 3 (-72%) | ✅ | 38 | 39 | # Installation 40 | ### Quick Installation 41 | ```pip install prompt-optimizer``` 42 | 43 | ### Install from source 44 | ```bash 45 | git clone https://github.com/vaibkumr/prompt-optimizer.git; 46 | cd prompt-optimizer; 47 | pip install -e . 48 | ``` 49 | 50 | # Disclaimer 51 | There is a compression vs performance tradeoff -- the increase in compression comes at the cost of loss in model performance. The tradeoff can be greatly mitigated by chosing the right optimize for a given task. There is no single optimizer for all cases. There is no Adam here. 52 | 53 | 54 | # Getting started 55 | 56 | ```python 57 | 58 | from prompt_optimizer.poptim import EntropyOptim 59 | 60 | prompt = """The Belle Tout Lighthouse is a decommissioned lighthouse and British landmark located at Beachy Head, East Sussex, close to the town of Eastbourne.""" 61 | p_optimizer = EntropyOptim(verbose=True, p=0.1) 62 | optimized_prompt = p_optimizer(prompt) 63 | print(optimized_prompt) 64 | 65 | ``` 66 | # Evaluations 67 | Following are the results for [logiqa](https://github.com/openai/evals/blob/main/evals/registry/evals/logiqa.yaml) OpenAI evals task. It is only performed for a subset of first 100 samples. Please note the optimizer performance over this task should not be generalized to other tasks, more thorough testing and domain knowledge is needed to choose the optimal optimizer. 68 | 69 | | Name | % Tokens Reduced | LogiQA Accuracy | USD Saved Per $100 | 70 | | --- | --- | --- | --- | 71 | | Default | 0.0 | 0.32 | 0.0 | 72 | | Entropy_Optim_p_0.05 | 0.06 | 0.3 | 6.35 | 73 | | Entropy_Optim_p_0.1 | 0.11 | 0.28 | 11.19 | 74 | | Entropy_Optim_p_0.25 | 0.26 | 0.22 | 26.47 | 75 | | Entropy_Optim_p_0.5 | 0.5 | 0.08 | 49.65 | 76 | | SynonymReplace_Optim_p_1.0 | 0.01 | 0.33 | 1.06 | 77 | | Lemmatizer_Optim | 0.01 | 0.33 | 1.01 | 78 | | NameReplace_Optim | 0.01 | 0.34 | 1.13 | 79 | | Punctuation_Optim | 0.13 | 0.35 | 12.81 | 80 | | Autocorrect_Optim | 0.01 | 0.3 | 1.14 | 81 | | Pulp_Optim_p_0.05 | 0.05 | 0.31 | 5.49 | 82 | | Pulp_Optim_p_0.1 | 0.1 | 0.25 | 9.52 | 83 | 84 | # Cost-Performance Tradeoff 85 | The reduction in cost often comes with a loss in LLM performance. Almost every optimizer have hyperparameters that control this tradeoff. 86 | 87 | For example, in `EntropyOptim` the hyperparamter `p`, a floating point number between 0 and 1 controls the ratio of tokens to remove. `p=1.0` corresponds to removing all tokens while `p=0.0` corresponds to removing none. 88 | 89 | The following chart shows the trade-off for different values of `p` as evaluated on the OpenAI evals [logiqa](https://github.com/openai/evals/blob/main/evals/registry/evals/logiqa.yaml) task for a subset of first 100 samples. 90 | 91 |
92 | tradeoff 93 |
94 | 95 | # Contributing 96 | There are several directions to contribute to. Please see [CONTRIBUTING.md](.github/CONTRIBUTING.md) for contribution guidelines and possible future directions. 97 | 98 | # Social 99 | Contact us on twitter [Vaibhav Kumar](https://twitter.com/vaibhavk1o1) and [Vaibhav Kumar](https://twitter.com/vaibhavk97). 100 | 101 | # Inspiration 102 |
103 | Image 104 |
-------------------------------------------------------------------------------- /docs/Makefile: -------------------------------------------------------------------------------- 1 | # Minimal makefile for Sphinx documentation 2 | # 3 | 4 | # You can set these variables from the command line, and also 5 | # from the environment for the first two. 6 | SPHINXOPTS ?= 7 | SPHINXBUILD ?= sphinx-build 8 | SOURCEDIR = . 9 | BUILDDIR = _build 10 | 11 | # Put it first so that "make" without argument is like "make help". 12 | help: 13 | @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 14 | 15 | .PHONY: help Makefile 16 | 17 | # Catch-all target: route all unknown targets to Sphinx using the new 18 | # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). 19 | %: Makefile 20 | @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 21 | -------------------------------------------------------------------------------- /docs/_static/logo_small.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vaibkumr/prompt-optimizer/e57a6283cda1491cd8dd00fd9e29b10735fc5806/docs/_static/logo_small.png -------------------------------------------------------------------------------- /docs/_static/tradeoff.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vaibkumr/prompt-optimizer/e57a6283cda1491cd8dd00fd9e29b10735fc5806/docs/_static/tradeoff.png -------------------------------------------------------------------------------- /docs/conf.py: -------------------------------------------------------------------------------- 1 | # Configuration file for the Sphinx documentation builder. 2 | # 3 | # This file only contains a selection of the most common options. For a full 4 | # list see the documentation: 5 | # https://www.sphinx-doc.org/en/master/usage/configuration.html 6 | 7 | # -- Path setup -------------------------------------------------------------- 8 | 9 | # If extensions (or modules to document with autodoc) are in another directory, 10 | # add these directories to sys.path here. If the directory is relative to the 11 | # documentation root, use os.path.abspath to make it absolute, like shown here. 12 | # 13 | import os 14 | import sys 15 | 16 | sys.path.insert(0, os.path.abspath("../")) 17 | 18 | # import toml 19 | 20 | # with open("../pyproject.toml") as f: 21 | # data = toml.load(f) 22 | 23 | # -- Project information ----------------------------------------------------- 24 | 25 | project = "prompt-optimizer" 26 | copyright = "2023, Vaibhav Kumar, Vaibhav Kumar" 27 | author = "Vaibhav Kumar, Vaibhav Kumar" 28 | 29 | # version = data["tool"]["poetry"]["version"] 30 | # release = version 31 | 32 | 33 | # -- General configuration --------------------------------------------------- 34 | 35 | # Add any Sphinx extension module names here, as strings. They can be 36 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom 37 | # ones. 38 | extensions = [ 39 | "sphinx.ext.autodoc", 40 | "sphinx.ext.autodoc.typehints", 41 | "sphinx.ext.autosummary", 42 | "sphinx.ext.napoleon", 43 | "sphinx.ext.viewcode", 44 | "sphinxcontrib.autodoc_pydantic", 45 | "sphinx_copybutton", 46 | "myst_parser", 47 | ] 48 | source_suffix = [".ipynb", ".html", ".md", ".rst"] 49 | 50 | # Add any paths that contain templates here, relative to this directory. 51 | templates_path = ["_templates"] 52 | 53 | # List of patterns, relative to source directory, that match files and 54 | # directories to ignore when looking for source files. 55 | # This pattern also affects html_static_path and html_extra_path. 56 | exclude_patterns = ["_build", "Thumbs.db", ".DS_Store"] 57 | 58 | 59 | # -- Options for HTML output ------------------------------------------------- 60 | 61 | # The theme to use for HTML and HTML Help pages. See the documentation for 62 | # a list of builtin themes. 63 | # 64 | html_theme = "sphinx_rtd_theme" 65 | html_logo = "_static/logo_small.png" 66 | 67 | html_theme_options = { 68 | 'logo_only': False, 69 | 'display_version': True, 70 | 'prev_next_buttons_location': 'bottom', 71 | 'collapse_navigation': True, 72 | 'sticky_navigation': True, 73 | 'navigation_depth': 4, 74 | 'includehidden': True, 75 | 'titles_only': False 76 | } 77 | 78 | html_context = { 79 | "display_github": True, # Integrate GitHub 80 | "github_user": "vaibkumr", # Username 81 | "github_repo": "prompt-optimizer", # Repo name 82 | "github_version": "master", # Version 83 | "conf_py_path": "/docs/", # Path in the checkout to the docs root 84 | } 85 | 86 | # Add any paths that contain custom static files (such as style sheets) here, 87 | # relative to this directory. They are copied after the builtin static files, 88 | # so a file named "default.css" will overwrite the builtin "default.css". 89 | html_static_path = ["_static"] 90 | nb_execution_mode = "off" 91 | myst_enable_extensions = ["colon_fence"] 92 | -------------------------------------------------------------------------------- /docs/evaluations/extending_evals.md: -------------------------------------------------------------------------------- 1 | # Extending Evaluations 2 | There is no one prompt optimizer that works for all tasks. Extending evaluations by introducing more tasks will help choosing the right optimizer for the right task. 3 | 4 | [Evaluations](https://github.com/vaibkumr/prompt-optimizer/tree/master/evaluations) directory of our project can be used to run batch evaluation experiments for various optimizers. 5 | 6 | Please consider contributing more evaluations. -------------------------------------------------------------------------------- /docs/evaluations/openai_evals.md: -------------------------------------------------------------------------------- 1 | # Evaluation 2 | Similar to LLMs, creating optimizers is easy but evaluating them is not. Evaluating prompt-optimizers is same as evaluating LLMs, just before and after optimization for same prompts and task. 3 | 4 | 5 | ## [OpenAI Evals](https://github.com/openai/evals) 6 | The Evals is framework for evaluating Large Language Models (LLMs). It offers a range of evaluation challenges which can be used to measure the quality of optimizations. 7 | 8 | ### LogiQA 9 | [LogiQA](https://github.com/openai/evals/pull/470): A Challenge Dataset for Machine Reading Comprehension with Logical Reasoning 10 | We use the first 100 samples for LogiQA eval to generate the follownig results: 11 | 12 | 13 | | Name | % Tokens Reduced | LogiQA Accuracy | USD Saved Per $100 | 14 | | --- | --- | --- | --- | 15 | | Default | 0.0 | 0.32 | 0.0 | 16 | | Entropy_Optim_p_0.05 | 0.06 | 0.3 | 6.35 | 17 | | Entropy_Optim_p_0.1 | 0.11 | 0.28 | 11.19 | 18 | | Entropy_Optim_p_0.25 | 0.26 | 0.22 | 26.47 | 19 | | Entropy_Optim_p_0.5 | 0.5 | 0.08 | 49.65 | 20 | | SynonymReplace_Optim_p_1.0 | 0.01 | 0.33 | 1.06 | 21 | | Lemmatizer_Optim | 0.01 | 0.33 | 1.01 | 22 | | Stemmer_Optim | -0.06 | 0.09 | -5.91 | 23 | | NameReplace_Optim | 0.01 | 0.34 | 1.13 | 24 | | Punctuation_Optim | 0.13 | 0.35 | 12.81 | 25 | | Autocorrect_Optim | 0.01 | 0.3 | 1.14 | 26 | | Pulp_Optim_p_0.05 | 0.05 | 0.31 | 5.49 | 27 | | Pulp_Optim_p_0.1 | 0.1 | 0.25 | 9.52 | 28 | 29 | 30 | 31 | -------------------------------------------------------------------------------- /docs/extend/custom_metrics.md: -------------------------------------------------------------------------------- 1 | # Creating Custom Metrics 2 | All metrics are computed between the original and optimized metric. They must extend the `prompt_optimizer.metric.Metric` class. 3 | 4 | 5 | A custom `MyCustomMetric` optimizer will look as follows: 6 | 7 | ```python 8 | 9 | from prompt_optimizer.metric.base import Metric 10 | 11 | class MyCustomMetric(Metric): 12 | 13 | def __init__(self, *args, **kwargs): 14 | super().__init__(*args, **kwargs) 15 | 16 | def run(self, prompt_before: str, prompt_after: str) -> dict: 17 | return {'metric_name': 0.0} 18 | ``` 19 | 20 | to create a custom metric, just implement the `run` function that takes input two strings: `prompt_before` that is the orignial prompt and `prompt_after` that is the prompt after optimizations. The function must return a dictionary with key(s) and value(s) corresponding to the metric name and values. 21 | 22 | If you implement some metrics, please consider contributing them to this project. -------------------------------------------------------------------------------- /docs/extend/custom_optims.md: -------------------------------------------------------------------------------- 1 | # Creating Custom PromptOptimizers 2 | All prompt optimizers must extend the `from prompt_optimizer.poptim.PromptOptim` class. 3 | 4 | A custom `MyCustomOptim` optimizer will look as follows: 5 | 6 | ```python 7 | from prompt_optimizer.poptim.base import PromptOptim 8 | 9 | class MyCustomOptim(PromptOptim): 10 | def __init__(self, *args, **kwargs): 11 | super().__init__(*args, **kwargs) 12 | 13 | def optimize(self, prompt: str) -> str: 14 | opti_prompt = prompt 15 | return opti_prompt 16 | ``` 17 | 18 | to create an optimizer, we just need to implement the `optimize` function that takes input a string and outputs another string that is optimized. 19 | 20 | If you implement some optimizers, please consider contributing them to this project. -------------------------------------------------------------------------------- /docs/getting_started/cli.md: -------------------------------------------------------------------------------- 1 | # PromptOptimizer CLI 2 | PromptOptimizer provides a command line interface `prompt_optimizer.cli.main:main` to run prompt optimizations and metrics. 3 | 4 | - Type `prompt-optimizer --help` on the command line: 5 | 6 | ``` 7 | 8 | usage: prompt-optimizer [-h] [--json JSON] [--skip_system SKIP_SYSTEM] 9 | [--optimizer_args [OPTIMIZER_ARGS ...]] [--metrics [METRICS ...]] 10 | [--log_file LOG_FILE] 11 | prompt_data_or_path optimizer_name 12 | 13 | Prompt Optimizer CLI 14 | 15 | positional arguments: 16 | prompt_data_or_path Either the prompt data (string or json string) or path to a file containing new 17 | line separated prompt data. 18 | optimizer_name Name of the optimizer. 19 | 20 | options: 21 | -h, --help show this help message and exit 22 | --json JSON Prompt format JSON or not. 23 | --skip_system SKIP_SYSTEM 24 | Skip system prompts or not. Only valid if `json` is True. 25 | --optimizer_args [OPTIMIZER_ARGS ...] 26 | Additional arguments for the optimizer. 27 | --metrics [METRICS ...] 28 | List of metrics to compute. 29 | --log_file LOG_FILE Output file to append results to. Prints on `stdout` if `None`. 30 | ``` 31 | 32 | Some [Examples](https://github.com/vaibkumr/prompt-optimizer/tree/master/examples/cli) are given to get started with CLI! -------------------------------------------------------------------------------- /docs/getting_started/getting_started.md: -------------------------------------------------------------------------------- 1 | # Quickstart Guide 2 | 3 | Welcome to PromptOptimizer! This guide will help you quickly get started with using PromptOptimizer in your projects. PromptOptimizer is a Python library that allows you to minimize token complexity in order to save API costs and reduce model computations. 4 | 5 | # Installation 6 | ### Quick Installation 7 | 8 | To quickly install PromptOptimizer, use the following command: 9 | ```bash 10 | pip install prompt-optimizer 11 | ``` 12 | 13 | ### Install from Source 14 | If you prefer to install PromptOptimizer from source, follow these steps: 15 | 16 | 1. Clone the repository: 17 | ```bash 18 | git clone https://github.com/vaibkumr/prompt-optimizer.git 19 | ``` 20 | 2. Navigate to the cloned repository: 21 | ```bash 22 | cd prompt-optimizer 23 | ``` 24 | 3. Install PromptOptimizer using pip: 25 | ```bash 26 | pip install -e . 27 | ``` 28 | 29 | # Prompt Optimizers 30 | A prompt optimizer is a callable class that outputs optimized prompt data along with metrics (if requested) for given input prompt data. 31 | 32 | > Note: Optimizers output a result object with keys `content` to store optimized prompts and `metrics` to store the requested metrics computations results. 33 | 34 | To optimize a prompt we follow three steps: 35 | 1. Import the optimizer from the range of available [../optimizers/index.html](optimizers). For now, we use the `EntropyOptim`. 36 | 37 | ```python 38 | from prompt_optimizer.poptim import EntropyOptim 39 | ``` 40 | 2. Initialize the optimizer object. Each optimizer has its own argument which can be tuned to achieve a balance between the cost and performance tradeoff. 41 | 42 | ```python 43 | p_optimizer = EntropyOptim(p=0.1) 44 | ``` 45 | 46 | 3. Run the optimizer over a given prompt string and fetch the results 47 | ```python 48 | prompt = "In Nightmare of Mensis progress through until you reach the boss room." 49 | result = p_optimizer(prompt) 50 | optimized_prompt = result.content 51 | ``` 52 | 53 | And we're done! We just optimized our first prompt, saved some money and if we're smart, we had no loss in model performance. 54 | 55 | # Input Formats 56 | Prompt optimizers support three different formats: 57 | 1. **String:** A basic python string. At the core, all optimizers work on python strings. 58 | 59 | ```python 60 | from prompt_optimizer.poptim import EntropyOptim 61 | p_optimizer = EntropyOptim(p=0.1) 62 | prompt = "In Nightmare of Mensis progress through until you reach the boss room." 63 | result = p_optimizer(prompt) 64 | optimized_prompt = result.content 65 | ``` 66 | 67 | 2. **JSON Object:** APIs often accept instructions in form of sytem and human messages. JSON objects of the following format can be passed to the optimizers using the `json` boolean flag: 68 | ```json 69 | [ 70 | { 71 | "role":"system", 72 | "content":"System instructions..." 73 | }, 74 | { 75 | "role":"user", 76 | "content":"User prompt..." 77 | } 78 | ] 79 | ``` 80 | often times, it is important to skip system instructions. It can be done using the `skip_system` flag as follows: 81 | 82 | ```python 83 | from prompt_optimizer.poptim import EntropyOptim 84 | p_optimizer = EntropyOptim(p=0.1) 85 | prompt = [ 86 | { 87 | "role":"system", 88 | "content":"System instructions..." 89 | }, 90 | { 91 | "role":"user", 92 | "content":"User prompt..." 93 | } 94 | ] 95 | optimized_prompt = p_optimizer(prompt, json=True, skip_system=True) 96 | ``` 97 | 98 | 3. **Langchain Object:** Langchain agents accept prompts as a list of `SystemMessage` and `HumanMessage`. Prompt optimizers can directly be applied to these objects by using the `langchain` boolean flag. Again, `skip_system` flag can be used to skip optimizing system prompts as follows: 99 | 100 | ```python 101 | from prompt_optimizer.poptim import EntropyOptim 102 | from langchain.schema import ( 103 | HumanMessage, 104 | SystemMessage 105 | ) 106 | 107 | p_optimizer = EntropyOptim(p=0.1) 108 | prompt = [ 109 | SystemMessage(content="You are a helpful assistant that translates English to French."), 110 | HumanMessage(content="I love programming.") 111 | ] 112 | optimized_prompt = p_optimizer(prompt, langchain=True, skip_system=True) 113 | ``` 114 | -------------------------------------------------------------------------------- /docs/getting_started/installation.md: -------------------------------------------------------------------------------- 1 | # Installation 2 | ### Quick Installation 3 | 4 | To quickly install PromptOptimizer, use the following command: 5 | ```bash 6 | pip install prompt-optimizer 7 | ``` 8 | 9 | ### Install from Source 10 | If you prefer to install PromptOptimizer from source, follow these steps: 11 | 12 | 1. Clone the repository: 13 | ```bash 14 | git clone https://github.com/vaibkumr/prompt-optimizer.git 15 | ``` 16 | 2. Navigate to the cloned repository: 17 | ```bash 18 | cd prompt-optimizer 19 | ``` 20 | 3. Install PromptOptimizer using pip: 21 | ```bash 22 | pip install -e . -------------------------------------------------------------------------------- /docs/getting_started/metrics.md: -------------------------------------------------------------------------------- 1 | # Metrics 2 | Given a prompt and its corresponding optimized prompt, we can compute several metrics for sanity checks, logging and more. We might need to check the percentage of tokens saved, semantic similarity between optimized and original prompt text (BERTScore), sentiment before and after optimization and much more. All of this can be done by extending the `prompt_optimizer.metric.Metric` class. 3 | 4 | # Running Metrics 5 | All metrics extend the `prompt_optimizer.metric.Metric` abstract class. 6 | To evaluate a metric, pass the list of metric objects in the `metrics` keyword argument of the prompt object as follows: 7 | 8 | ```python 9 | from prompt_optimizer.metric import TokenMetric 10 | from prompt_optimizer.poptim import StopWordOptim 11 | 12 | p_optimizer = StopWordOptim(metrics=[TokenMetric()]) 13 | ``` 14 | After specifying a metric, the prompt result object has an additional key `metrics` that contains the list of dictionaries with key as the metric name string and value as the computed metric value. 15 | 16 | 17 | ```python 18 | prompt = """The Belle Tout Lighthouse is a decommissioned lighthouse and British landmark located at Beachy Head, East Sussex, close to the town of Eastbourne. """ 19 | res = p_optimizer(prompt) 20 | for metric in res.metrics: 21 | for key, value in res.metrics.items(): 22 | print(f"{key}: {value:.3f}") 23 | ``` 24 | 25 | A list of all metrics can be found here. -------------------------------------------------------------------------------- /docs/index.rst: -------------------------------------------------------------------------------- 1 | .. prompt-optimizer documentation master file, created by 2 | sphinx-quickstart on Fri Apr 7 15:53:36 2023. 3 | You can adapt this file completely to your liking, but it should at least 4 | contain the root `toctree` directive. 5 | 6 | Welcome to PromptOptimizer! 7 | ============================================ 8 | 9 | 10 | Minimize LLM token complexity to save API costs and model computations. 11 | 12 | PromptOptimizer is a Python library designed to minimize the token complexity of natural language understanding (NLU) systems, thereby reducing API costs and computational overhead. 13 | It offers a range of optimizers to achieve this optimization while maintaining the integrity of important sections of the prompt. 14 | 15 | Disclaimer 16 | ---------------- 17 | There is a compression vs performance tradeoff -- the increase in compression comes at the cost of loss in model performance. The tradeoff can be greatly mitigated by chosing the right optimize for a given task. There is no single optimizer for all cases. There is no Adam here. 18 | 19 | Read more about this in `Cost-Performance Tradeoff <./theory/cost_performance_tradeoff.html>`_ 20 | 21 | 22 | 23 | Getting Started 24 | ---------------- 25 | 26 | | How to get started using PromptOptimizer and minimize token complexity. 27 | 28 | - `Quickstart Guide <./getting_started/getting_started.html>`_ 29 | 30 | | Compression metrics for sanity checks and logging. 31 | 32 | - `Optimization Metrics <./getting_started/metrics.html>`_ 33 | 34 | | PromptOptimizer CLI 35 | 36 | - `CLI <./getting_started/cli.html>`_ 37 | 38 | .. toctree:: 39 | :maxdepth: 2 40 | :caption: Getting Started 41 | :name: getting_started 42 | :hidden: 43 | 44 | getting_started/getting_started.md 45 | getting_started/metrics.md 46 | getting_started/cli.md 47 | 48 | Extending PromptOptimizer 49 | ------------------------- 50 | You can create custom prompt optimizers 51 | 52 | - `Custom PromptOptimizers <./extend/custom_optims.html>`_ 53 | 54 | It is also easy to create custom metrics 55 | 56 | - `Custom Metrics <./extend/custom_metrics.html>`_ 57 | 58 | .. toctree:: 59 | :maxdepth: 1 60 | :caption: Extending PromptOptimizer 61 | :name: extend 62 | :hidden: 63 | 64 | extend/custom_optims.md 65 | extend/custom_metrics.md 66 | 67 | Evaluations 68 | ----------- 69 | There is no one prompt optimizer that works for all tasks. 70 | Through evaluations over a diverse set of tasks we can make the right choice of optimizer for a new task. 71 | 72 | Extending Evaluations to include more tasks 73 | 74 | - `Extending Evaluations <./evaluations/extending_evals.html>`_ 75 | 76 | Evaluating prompt optiimzers is same as evaluating LLMs before and after optimizations and measuring the differences. We thus provide OpenAI Evals Compatiblity to facilitate this. 77 | 78 | - `OpenAI Evals Compatiblity <./evaluations/openai_evals.html>`_ 79 | 80 | .. toctree:: 81 | :maxdepth: 1 82 | :caption: Evaluations 83 | :name: evals 84 | :hidden: 85 | 86 | evaluations/extending_evals.md 87 | evaluations/openai_evals.md 88 | 89 | Cost-Performance Tradeoff 90 | ------------------------- 91 | The reduction in cost often comes with a loss in LLM performance. Almost every optimizer have hyperparameters that control this tradeoff. 92 | 93 | - `Cost-Performance Tradeoff <./theory/cost_performance_tradeoff.html>`_ 94 | 95 | .. toctree:: 96 | :maxdepth: 1 97 | :caption: Cost-Performance Tradeoff 98 | :name: tradeoff 99 | :hidden: 100 | 101 | theory/cost_performance_tradeoff.md 102 | 103 | 104 | Reference Documentations 105 | ========================= 106 | Full documentation on all classes and methods for PromptOptimizer. 107 | 108 | - `Reference Documentations <./reference.html>`_ 109 | - `Installation Guide <./getting_started/installation.html>`_ 110 | 111 | .. toctree:: 112 | :maxdepth: 1 113 | :caption: Reference Documentations 114 | :name: reference 115 | :hidden: 116 | 117 | ./getting_started/installation.md 118 | ./reference.rst 119 | 120 | 121 | Indices and tables 122 | ================== 123 | 124 | * :ref:`genindex` 125 | * :ref:`search` 126 | -------------------------------------------------------------------------------- /docs/make.bat: -------------------------------------------------------------------------------- 1 | @ECHO OFF 2 | 3 | pushd %~dp0 4 | 5 | REM Command file for Sphinx documentation 6 | 7 | if "%SPHINXBUILD%" == "" ( 8 | set SPHINXBUILD=sphinx-build 9 | ) 10 | set SOURCEDIR=. 11 | set BUILDDIR=_build 12 | 13 | if "%1" == "" goto help 14 | 15 | %SPHINXBUILD% >NUL 2>NUL 16 | if errorlevel 9009 ( 17 | echo. 18 | echo.The 'sphinx-build' command was not found. Make sure you have Sphinx 19 | echo.installed, then set the SPHINXBUILD environment variable to point 20 | echo.to the full path of the 'sphinx-build' executable. Alternatively you 21 | echo.may add the Sphinx directory to PATH. 22 | echo. 23 | echo.If you don't have Sphinx installed, grab it from 24 | echo.http://sphinx-doc.org/ 25 | exit /b 1 26 | ) 27 | 28 | %SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% 29 | goto end 30 | 31 | :help 32 | %SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% 33 | 34 | :end 35 | popd 36 | -------------------------------------------------------------------------------- /docs/reference.rst: -------------------------------------------------------------------------------- 1 | Reference Documentations 2 | ========================= 3 | 4 | Prompt Optimizer 5 | ========================= 6 | .. automodule:: prompt_optimizer.poptim 7 | :members: 8 | 9 | 10 | Metrics 11 | ========================= 12 | .. automodule:: prompt_optimizer.metric 13 | :members: -------------------------------------------------------------------------------- /docs/requirements.txt: -------------------------------------------------------------------------------- 1 | autodoc_pydantic==1.8.0 2 | myst_parser 3 | nbsphinx==0.8.9 4 | sphinx==4.5.0 5 | sphinx-autobuild==2021.3.14 6 | sphinx_book_theme 7 | sphinx_rtd_theme==1.0.0 8 | sphinx-typlog-theme==0.8.0 9 | sphinx-panels 10 | toml 11 | myst_nb 12 | sphinx_copybutton 13 | pydata-sphinx-theme==0.13.1 -------------------------------------------------------------------------------- /docs/theory/cost_performance_tradeoff.md: -------------------------------------------------------------------------------- 1 | # Cost-Performance Tradeoff 2 | The reduction in cost often comes with a loss in LLM performance. Almost every optimizer have hyperparameters that control this tradeoff. 3 | 4 | For example, in `EntropyOptim` the hyperparamter `p`, a floating point number between 0 and 1 controls the ratio of tokens to remove. `p=1.0` corresponds to removing all tokens while `p=0.0` corresponds to removing none. 5 | 6 | The following chart shows the trade-off for different values of `p` as evaluated on the OpenAI [logiqa](https://github.com/openai/evals/blob/main/evals/registry/evals/logiqa.yaml) task. 7 | 8 | 9 |
10 | tradeoff 11 |
-------------------------------------------------------------------------------- /evaluations/README.md: -------------------------------------------------------------------------------- 1 | # TODO: Explain evaluations here -------------------------------------------------------------------------------- /evaluations/artifacts/% Tokens Reduced_graph.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vaibkumr/prompt-optimizer/e57a6283cda1491cd8dd00fd9e29b10735fc5806/evaluations/artifacts/% Tokens Reduced_graph.png -------------------------------------------------------------------------------- /evaluations/artifacts/LogiQA Accuracy_graph.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vaibkumr/prompt-optimizer/e57a6283cda1491cd8dd00fd9e29b10735fc5806/evaluations/artifacts/LogiQA Accuracy_graph.png -------------------------------------------------------------------------------- /evaluations/artifacts/USD Saved Per $100_graph.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vaibkumr/prompt-optimizer/e57a6283cda1491cd8dd00fd9e29b10735fc5806/evaluations/artifacts/USD Saved Per $100_graph.png -------------------------------------------------------------------------------- /evaluations/artifacts/kevin.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vaibkumr/prompt-optimizer/e57a6283cda1491cd8dd00fd9e29b10735fc5806/evaluations/artifacts/kevin.gif -------------------------------------------------------------------------------- /evaluations/artifacts/logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vaibkumr/prompt-optimizer/e57a6283cda1491cd8dd00fd9e29b10735fc5806/evaluations/artifacts/logo.png -------------------------------------------------------------------------------- /evaluations/artifacts/logo_small.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vaibkumr/prompt-optimizer/e57a6283cda1491cd8dd00fd9e29b10735fc5806/evaluations/artifacts/logo_small.png -------------------------------------------------------------------------------- /evaluations/artifacts/table.md: -------------------------------------------------------------------------------- 1 | | Name | % Tokens Reduced | LogiQA Accuracy | USD Saved Per $100 | 2 | | --- | --- | --- | --- | 3 | | Default | 0.0 | 0.32 | 0.0 | 4 | | Entropy_Optim_p_0.05 | 0.06 | 0.3 | 6.35 | 5 | | Entropy_Optim_p_0.1 | 0.11 | 0.28 | 11.19 | 6 | | Entropy_Optim_p_0.25 | 0.26 | 0.22 | 26.47 | 7 | | Entropy_Optim_p_0.5 | 0.5 | 0.08 | 49.65 | 8 | | SynonymReplace_Optim_p_1.0 | 0.01 | 0.33 | 1.06 | 9 | | Lemmatizer_Optim | 0.01 | 0.33 | 1.01 | 10 | | Stemmer_Optim | -0.06 | 0.09 | -5.91 | 11 | | NameReplace_Optim | 0.01 | 0.34 | 1.13 | 12 | | Punctuation_Optim | 0.13 | 0.35 | 12.81 | 13 | | Autocorrect_Optim | 0.01 | 0.3 | 1.14 | 14 | | Pulp_Optim_p_0.05 | 0.05 | 0.31 | 5.49 | 15 | | Pulp_Optim_p_0.1 | 0.1 | 0.25 | 9.52 | 16 | -------------------------------------------------------------------------------- /evaluations/artifacts/tradeoff.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vaibkumr/prompt-optimizer/e57a6283cda1491cd8dd00fd9e29b10735fc5806/evaluations/artifacts/tradeoff.png -------------------------------------------------------------------------------- /evaluations/compute_metric.py: -------------------------------------------------------------------------------- 1 | import utils 2 | from prompt_optimizer.metric import TokenMetric 3 | 4 | 5 | def token_metric(before_samples_dir, after_samples_dir, n_samples_max=100): 6 | before = utils.read_jsonl(before_samples_dir)[:n_samples_max] 7 | after = utils.read_jsonl(after_samples_dir)[:n_samples_max] 8 | metric = TokenMetric() 9 | avg = 0 10 | for json_before, json_after in zip(before, after): 11 | avg += metric.batch_run(json_before["input"], json_after["input"], json=True)[metric.key] 12 | return avg / len(before) 13 | -------------------------------------------------------------------------------- /evaluations/eval.py: -------------------------------------------------------------------------------- 1 | import os 2 | import compute_metric 3 | import utils 4 | from prompt_optimizer.poptim import * 5 | import make_errors 6 | 7 | 8 | def get_samples_and_paths(n_samples_max=100): 9 | samples_dir = ( 10 | "/Users/v/Documents/PromptOptimizerProj/evals/evals/registry/data/logiqa/" 11 | ) 12 | samples_fname = "logiqa.jsonl" 13 | samples_path = os.path.join(samples_dir, samples_fname) 14 | opti_samples_path = os.path.join(samples_dir, "temp.jsonl") 15 | registry_path = "/Users/v/Documents/PromptOptimizerProj/evals/evals/registry/evals/" 16 | opti_registry_path = os.path.join(registry_path, "temp.yaml") 17 | 18 | new_yaml = { 19 | "temp": {"id": "temp.dev.v0", "metrics": ["accuracy"]}, 20 | "temp.dev.v0": { 21 | "class": "evals.elsuite.basic.match:Match", 22 | "args": {"samples_jsonl": opti_samples_path}, 23 | }, 24 | } 25 | utils.write_yaml(new_yaml, opti_registry_path) 26 | samples = utils.read_jsonl(samples_path)[:n_samples_max] 27 | 28 | return samples, samples_path, opti_samples_path 29 | 30 | 31 | def run_logiqa(exp_name, p_optimizer, n_samples_max=100): 32 | samples, samples_path, opti_samples_path = get_samples_and_paths(n_samples_max) 33 | 34 | res_dir = "results/" 35 | res_path = os.path.join(res_dir, f"{exp_name}.jsonl") 36 | log_dir = "logs/" 37 | log_path = os.path.join(log_dir, f"{exp_name}.jsonl") 38 | 39 | for json_data in samples: 40 | if exp_name in ["Autocorrect_Optim", "AutocorrectOptim"]: 41 | json_data["input"] = make_errors.run(json_data["input"]) 42 | 43 | if p_optimizer is not None: 44 | json_data["input"] = p_optimizer( 45 | json_data["input"], skip_system=False, json=True 46 | ) 47 | 48 | # Save samples 49 | utils.write_jsonl(samples, opti_samples_path) 50 | 51 | # Compute token saved metrics 52 | tokens_opti_metric = compute_metric.token_metric(samples_path, opti_samples_path) 53 | 54 | # Compute Evals metric 55 | # utils.run_bash( 56 | # f"oaieval gpt-3.5-turbo temp --record_path {res_path} --log_to_file {log_path}" 57 | # ) 58 | for line in utils.read_jsonl(res_path): 59 | if "final_report" in line: 60 | accuracy = line["final_report"]["accuracy"] 61 | break 62 | 63 | results = { 64 | "name": exp_name, 65 | "tokens_opti_metric": tokens_opti_metric, 66 | "accuracy": accuracy, 67 | } 68 | 69 | print(results) 70 | 71 | # Save results 72 | utils.save_results(results, "results.csv") 73 | 74 | 75 | if __name__ == "__main__": 76 | EXPERIMENTS = { 77 | "Default": None, 78 | "Entropy_Optim_p_0.05": EntropyOptim(p=0.05), 79 | "Entropy_Optim_p_0.1": EntropyOptim(p=0.1), 80 | "Entropy_Optim_p_0.25": EntropyOptim(p=0.25), 81 | "Entropy_Optim_p_0.5": EntropyOptim(p=0.5), 82 | "SynonymReplace_Optim_p_1.0": SynonymReplaceOptim(p=1), 83 | "Lemmatizer_Optim": LemmatizerOptim(), 84 | "Stemmer_Optim": StemmerOptim(), 85 | "NameReplace_Optim": NameReplaceOptim(), 86 | "Punctuation_Optim": PunctuationOptim(), 87 | "Autocorrect_Optim": AutocorrectOptim(), 88 | "Pulp_Optim_p_0.05": PulpOptim(p=0.05), 89 | "Pulp_Optim_p_0.1": PulpOptim(p=0.1), 90 | } 91 | for exp_name in EXPERIMENTS: 92 | p_optimizer = EXPERIMENTS[exp_name] 93 | run_logiqa(exp_name, p_optimizer) 94 | -------------------------------------------------------------------------------- /evaluations/logs/Autocorrect_Optim.jsonl: -------------------------------------------------------------------------------- 1 | [2023-05-16 11:20:23,559] [registry.py:249] Loading registry from /Users/v/Documents/PromptOptimizerProj/evals/evals/registry/evals 2 | [2023-05-16 11:20:23,723] [registry.py:249] Loading registry from /Users/v/.evals/evals 3 | [2023-05-16 11:20:23,723] [oaieval.py:110] Run started: 230516182023X6CJM7KU 4 | [2023-05-16 11:20:23,724] [data.py:75] Fetching /Users/v/Documents/PromptOptimizerProj/evals/evals/registry/data/logiqa/temp.jsonl 5 | [2023-05-16 11:20:23,725] [eval.py:34] Evaluating 100 samples 6 | [2023-05-16 11:20:23,732] [eval.py:153] Running in threaded mode with 10 threads! 7 | [2023-05-16 11:20:50,658] [record.py:330] Logged 100 rows of events to results/Autocorrect_Optim.jsonl: insert_time=15.701ms 8 | [2023-05-16 11:21:30,988] [_common.py:105] Backing off openai_chat_completion_create_retrying(...) for 1.5s (openai.error.RateLimitError: That model is currently overloaded with other requests. You can retry your request, or contact us through our help center at help.openai.com if the error persists. (Please include the request ID ba90fac4d334d9688f362927bbeb5894 in your message.)) 9 | [2023-05-16 11:21:37,656] [record.py:330] Logged 100 rows of events to results/Autocorrect_Optim.jsonl: insert_time=12.522ms 10 | [2023-05-16 11:21:37,661] [record.py:341] Final report: {'accuracy': 0.0}. Logged to results/Autocorrect_Optim.jsonl 11 | [2023-05-16 11:21:37,661] [oaieval.py:147] Final report: 12 | [2023-05-16 11:21:37,661] [oaieval.py:149] accuracy: 0.0 13 | [2023-05-16 11:26:27,796] [registry.py:249] Loading registry from /Users/v/Documents/PromptOptimizerProj/evals/evals/registry/evals 14 | [2023-05-16 11:26:27,973] [registry.py:249] Loading registry from /Users/v/.evals/evals 15 | [2023-05-16 11:26:27,974] [oaieval.py:110] Run started: 2305161826273Z3ORRVS 16 | [2023-05-16 11:26:27,975] [data.py:75] Fetching /Users/v/Documents/PromptOptimizerProj/evals/evals/registry/data/logiqa/temp.jsonl 17 | [2023-05-16 11:26:27,976] [eval.py:34] Evaluating 100 samples 18 | [2023-05-16 11:26:27,983] [eval.py:153] Running in threaded mode with 10 threads! 19 | [2023-05-16 11:26:45,377] [record.py:330] Logged 100 rows of events to results/Autocorrect_Optim.jsonl: insert_time=17.767ms 20 | [2023-05-16 11:26:59,930] [_common.py:105] Backing off openai_chat_completion_create_retrying(...) for 0.7s (openai.error.RateLimitError: That model is currently overloaded with other requests. You can retry your request, or contact us through our help center at help.openai.com if the error persists. (Please include the request ID 7bc34b9814c92d6eadca8679234159bb in your message.)) 21 | [2023-05-16 11:27:02,184] [_common.py:105] Backing off openai_chat_completion_create_retrying(...) for 0.4s (openai.error.RateLimitError: That model is currently overloaded with other requests. You can retry your request, or contact us through our help center at help.openai.com if the error persists. (Please include the request ID faa3a94b78335bd7f2bbcbe2695f2cf7 in your message.)) 22 | [2023-05-16 11:27:13,623] [_common.py:105] Backing off openai_chat_completion_create_retrying(...) for 0.1s (openai.error.RateLimitError: That model is currently overloaded with other requests. You can retry your request, or contact us through our help center at help.openai.com if the error persists. (Please include the request ID 023cdd1194b029af45ba3741af56066f in your message.)) 23 | [2023-05-16 11:27:15,684] [record.py:330] Logged 100 rows of events to results/Autocorrect_Optim.jsonl: insert_time=15.099ms 24 | [2023-05-16 11:27:15,688] [record.py:341] Final report: {'accuracy': 0.3}. Logged to results/Autocorrect_Optim.jsonl 25 | [2023-05-16 11:27:15,689] [oaieval.py:147] Final report: 26 | [2023-05-16 11:27:15,689] [oaieval.py:149] accuracy: 0.3 27 | -------------------------------------------------------------------------------- /evaluations/logs/Default.jsonl: -------------------------------------------------------------------------------- 1 | [2023-05-16 10:41:46,141] [registry.py:249] Loading registry from /Users/v/Documents/PromptOptimizerProj/evals/evals/registry/evals 2 | [2023-05-16 10:41:46,318] [registry.py:249] Loading registry from /Users/v/.evals/evals 3 | [2023-05-16 10:41:46,319] [oaieval.py:110] Run started: 230516174146FZ7DGETP 4 | [2023-05-16 10:41:46,320] [data.py:75] Fetching /Users/v/Documents/PromptOptimizerProj/evals/evals/registry/data/logiqa/temp.jsonl 5 | [2023-05-16 10:41:46,321] [eval.py:34] Evaluating 100 samples 6 | [2023-05-16 10:41:46,328] [eval.py:153] Running in threaded mode with 10 threads! 7 | [2023-05-16 10:42:01,115] [record.py:330] Logged 100 rows of events to results/Default.jsonl: insert_time=20.269ms 8 | [2023-05-16 10:42:17,730] [_common.py:105] Backing off openai_chat_completion_create_retrying(...) for 1.5s (openai.error.RateLimitError: That model is currently overloaded with other requests. You can retry your request, or contact us through our help center at help.openai.com if the error persists. (Please include the request ID 6af39a6d33050a08417f758c39991a4f in your message.)) 9 | [2023-05-16 10:42:29,518] [record.py:330] Logged 100 rows of events to results/Default.jsonl: insert_time=11.096ms 10 | [2023-05-16 10:42:29,526] [record.py:341] Final report: {'accuracy': 0.32}. Logged to results/Default.jsonl 11 | [2023-05-16 10:42:29,526] [oaieval.py:147] Final report: 12 | [2023-05-16 10:42:29,526] [oaieval.py:149] accuracy: 0.32 13 | [2023-05-16 10:42:49,327] [_common.py:105] Backing off openai_chat_completion_create_retrying(...) for 2.3s (openai.error.RateLimitError: That model is currently overloaded with other requests. You can retry your request, or contact us through our help center at help.openai.com if the error persists. (Please include the request ID 3e4486b295d13ec50d501bffaea6bf2a in your message.)) 14 | [2023-05-16 10:42:53,664] [record.py:330] Logged 2 rows of events to results/Default.jsonl: insert_time=3.159ms 15 | -------------------------------------------------------------------------------- /evaluations/logs/Entropy_Optim_p_0.05.jsonl: -------------------------------------------------------------------------------- 1 | [2023-05-16 11:28:59,318] [registry.py:249] Loading registry from /Users/v/Documents/PromptOptimizerProj/evals/evals/registry/evals 2 | [2023-05-16 11:28:59,506] [registry.py:249] Loading registry from /Users/v/.evals/evals 3 | [2023-05-16 11:28:59,506] [oaieval.py:110] Run started: 230516182859HQ5ZZMAM 4 | [2023-05-16 11:28:59,507] [data.py:75] Fetching /Users/v/Documents/PromptOptimizerProj/evals/evals/registry/data/logiqa/temp.jsonl 5 | [2023-05-16 11:28:59,508] [eval.py:34] Evaluating 100 samples 6 | [2023-05-16 11:28:59,514] [eval.py:153] Running in threaded mode with 10 threads! 7 | [2023-05-16 11:29:13,557] [record.py:330] Logged 100 rows of events to results/Entropy_Optim_p_0.05.jsonl: insert_time=22.180ms 8 | [2023-05-16 11:29:42,311] [_common.py:105] Backing off openai_chat_completion_create_retrying(...) for 0.8s (openai.error.RateLimitError: That model is currently overloaded with other requests. You can retry your request, or contact us through our help center at help.openai.com if the error persists. (Please include the request ID 968b7b0afbc5d72c1023b3471710936b in your message.)) 9 | [2023-05-16 11:29:44,788] [record.py:330] Logged 100 rows of events to results/Entropy_Optim_p_0.05.jsonl: insert_time=22.965ms 10 | [2023-05-16 11:29:44,795] [record.py:341] Final report: {'accuracy': 0.3}. Logged to results/Entropy_Optim_p_0.05.jsonl 11 | [2023-05-16 11:29:44,795] [oaieval.py:147] Final report: 12 | [2023-05-16 11:29:44,795] [oaieval.py:149] accuracy: 0.3 13 | -------------------------------------------------------------------------------- /evaluations/logs/Entropy_Optim_p_0.1.jsonl: -------------------------------------------------------------------------------- 1 | [2023-05-16 10:49:00,710] [registry.py:249] Loading registry from /Users/v/Documents/PromptOptimizerProj/evals/evals/registry/evals 2 | [2023-05-16 10:49:00,874] [registry.py:249] Loading registry from /Users/v/.evals/evals 3 | [2023-05-16 10:49:00,875] [oaieval.py:110] Run started: 230516174900BXDQIZRZ 4 | [2023-05-16 10:49:00,876] [data.py:75] Fetching /Users/v/Documents/PromptOptimizerProj/evals/evals/registry/data/logiqa/temp.jsonl 5 | [2023-05-16 10:49:00,876] [eval.py:34] Evaluating 100 samples 6 | [2023-05-16 10:49:00,883] [eval.py:153] Running in threaded mode with 10 threads! 7 | [2023-05-16 10:49:16,954] [record.py:330] Logged 100 rows of events to results/Entropy_Optim_p_0.1.jsonl: insert_time=11.353ms 8 | [2023-05-16 10:49:34,817] [record.py:330] Logged 100 rows of events to results/Entropy_Optim_p_0.1.jsonl: insert_time=15.029ms 9 | [2023-05-16 10:49:34,823] [record.py:341] Final report: {'accuracy': 0.28}. Logged to results/Entropy_Optim_p_0.1.jsonl 10 | [2023-05-16 10:49:34,823] [oaieval.py:147] Final report: 11 | [2023-05-16 10:49:34,823] [oaieval.py:149] accuracy: 0.28 12 | -------------------------------------------------------------------------------- /evaluations/logs/Entropy_Optim_p_0.25.jsonl: -------------------------------------------------------------------------------- 1 | [2023-05-16 10:50:08,078] [registry.py:249] Loading registry from /Users/v/Documents/PromptOptimizerProj/evals/evals/registry/evals 2 | [2023-05-16 10:50:08,248] [registry.py:249] Loading registry from /Users/v/.evals/evals 3 | [2023-05-16 10:50:08,248] [oaieval.py:110] Run started: 230516175008C6MZGPUC 4 | [2023-05-16 10:50:08,249] [data.py:75] Fetching /Users/v/Documents/PromptOptimizerProj/evals/evals/registry/data/logiqa/temp.jsonl 5 | [2023-05-16 10:50:08,250] [eval.py:34] Evaluating 100 samples 6 | [2023-05-16 10:50:08,256] [eval.py:153] Running in threaded mode with 10 threads! 7 | [2023-05-16 10:50:24,127] [record.py:330] Logged 100 rows of events to results/Entropy_Optim_p_0.25.jsonl: insert_time=16.943ms 8 | [2023-05-16 10:50:52,168] [_common.py:105] Backing off openai_chat_completion_create_retrying(...) for 1.2s (openai.error.RateLimitError: That model is currently overloaded with other requests. You can retry your request, or contact us through our help center at help.openai.com if the error persists. (Please include the request ID 0247cc54a987228bd86438f283e79db1 in your message.)) 9 | [2023-05-16 10:51:04,151] [_common.py:105] Backing off openai_chat_completion_create_retrying(...) for 1.1s (openai.error.RateLimitError: That model is currently overloaded with other requests. You can retry your request, or contact us through our help center at help.openai.com if the error persists. (Please include the request ID d2dd1568630c4d4396677d7230267c76 in your message.)) 10 | [2023-05-16 10:51:08,438] [record.py:330] Logged 100 rows of events to results/Entropy_Optim_p_0.25.jsonl: insert_time=15.240ms 11 | [2023-05-16 10:51:08,443] [record.py:341] Final report: {'accuracy': 0.22}. Logged to results/Entropy_Optim_p_0.25.jsonl 12 | [2023-05-16 10:51:08,443] [oaieval.py:147] Final report: 13 | [2023-05-16 10:51:08,443] [oaieval.py:149] accuracy: 0.22 14 | -------------------------------------------------------------------------------- /evaluations/logs/Entropy_Optim_p_0.5.jsonl: -------------------------------------------------------------------------------- 1 | [2023-05-16 10:51:40,154] [registry.py:249] Loading registry from /Users/v/Documents/PromptOptimizerProj/evals/evals/registry/evals 2 | [2023-05-16 10:51:40,320] [registry.py:249] Loading registry from /Users/v/.evals/evals 3 | [2023-05-16 10:51:40,321] [oaieval.py:110] Run started: 230516175140HVZETOL6 4 | [2023-05-16 10:51:40,322] [data.py:75] Fetching /Users/v/Documents/PromptOptimizerProj/evals/evals/registry/data/logiqa/temp.jsonl 5 | [2023-05-16 10:51:40,322] [eval.py:34] Evaluating 100 samples 6 | [2023-05-16 10:51:40,329] [eval.py:153] Running in threaded mode with 10 threads! 7 | [2023-05-16 10:52:02,470] [record.py:330] Logged 100 rows of events to results/Entropy_Optim_p_0.5.jsonl: insert_time=10.094ms 8 | [2023-05-16 10:52:11,891] [_common.py:105] Backing off openai_chat_completion_create_retrying(...) for 1.5s (openai.error.RateLimitError: That model is currently overloaded with other requests. You can retry your request, or contact us through our help center at help.openai.com if the error persists. (Please include the request ID 2fb04add750c5104a2f30ce9816ced6d in your message.)) 9 | [2023-05-16 10:52:31,716] [record.py:330] Logged 100 rows of events to results/Entropy_Optim_p_0.5.jsonl: insert_time=15.097ms 10 | [2023-05-16 10:52:31,722] [record.py:341] Final report: {'accuracy': 0.08}. Logged to results/Entropy_Optim_p_0.5.jsonl 11 | [2023-05-16 10:52:31,722] [oaieval.py:147] Final report: 12 | [2023-05-16 10:52:31,722] [oaieval.py:149] accuracy: 0.08 13 | -------------------------------------------------------------------------------- /evaluations/logs/Lemmatizer_Optim.jsonl: -------------------------------------------------------------------------------- 1 | [2023-05-16 11:17:23,275] [registry.py:249] Loading registry from /Users/v/Documents/PromptOptimizerProj/evals/evals/registry/evals 2 | [2023-05-16 11:17:23,555] [registry.py:249] Loading registry from /Users/v/.evals/evals 3 | [2023-05-16 11:17:23,556] [oaieval.py:110] Run started: 230516181723VWM62TYA 4 | [2023-05-16 11:17:23,557] [data.py:75] Fetching /Users/v/Documents/PromptOptimizerProj/evals/evals/registry/data/logiqa/temp.jsonl 5 | [2023-05-16 11:17:23,558] [eval.py:34] Evaluating 100 samples 6 | [2023-05-16 11:17:23,567] [eval.py:153] Running in threaded mode with 10 threads! 7 | [2023-05-16 11:17:38,477] [record.py:330] Logged 100 rows of events to results/Lemmatizer_Optim.jsonl: insert_time=14.978ms 8 | [2023-05-16 11:18:12,269] [_common.py:105] Backing off openai_chat_completion_create_retrying(...) for 0.2s (openai.error.RateLimitError: That model is currently overloaded with other requests. You can retry your request, or contact us through our help center at help.openai.com if the error persists. (Please include the request ID 567850a6c165ac7b190022463917944f in your message.)) 9 | [2023-05-16 11:18:14,082] [record.py:330] Logged 100 rows of events to results/Lemmatizer_Optim.jsonl: insert_time=142.862ms 10 | [2023-05-16 11:18:14,089] [record.py:341] Final report: {'accuracy': 0.33}. Logged to results/Lemmatizer_Optim.jsonl 11 | [2023-05-16 11:18:14,089] [oaieval.py:147] Final report: 12 | [2023-05-16 11:18:14,089] [oaieval.py:149] accuracy: 0.33 13 | -------------------------------------------------------------------------------- /evaluations/logs/NameReplace_Optim.jsonl: -------------------------------------------------------------------------------- 1 | [2023-05-16 11:18:47,032] [registry.py:249] Loading registry from /Users/v/Documents/PromptOptimizerProj/evals/evals/registry/evals 2 | [2023-05-16 11:18:47,201] [registry.py:249] Loading registry from /Users/v/.evals/evals 3 | [2023-05-16 11:18:47,202] [oaieval.py:110] Run started: 230516181847URHMHMOF 4 | [2023-05-16 11:18:47,202] [data.py:75] Fetching /Users/v/Documents/PromptOptimizerProj/evals/evals/registry/data/logiqa/temp.jsonl 5 | [2023-05-16 11:18:47,203] [eval.py:34] Evaluating 100 samples 6 | [2023-05-16 11:18:47,210] [eval.py:153] Running in threaded mode with 10 threads! 7 | [2023-05-16 11:19:00,559] [record.py:330] Logged 100 rows of events to results/NameReplace_Optim.jsonl: insert_time=14.742ms 8 | [2023-05-16 11:19:33,843] [_common.py:105] Backing off openai_chat_completion_create_retrying(...) for 1.4s (openai.error.RateLimitError: That model is currently overloaded with other requests. You can retry your request, or contact us through our help center at help.openai.com if the error persists. (Please include the request ID d1d947e977b2259375fc236e22e2dd0e in your message.)) 9 | [2023-05-16 11:19:39,562] [record.py:330] Logged 100 rows of events to results/NameReplace_Optim.jsonl: insert_time=19.703ms 10 | [2023-05-16 11:19:39,570] [record.py:341] Final report: {'accuracy': 0.34}. Logged to results/NameReplace_Optim.jsonl 11 | [2023-05-16 11:19:39,570] [oaieval.py:147] Final report: 12 | [2023-05-16 11:19:39,571] [oaieval.py:149] accuracy: 0.34 13 | -------------------------------------------------------------------------------- /evaluations/logs/Pulp_Optim_p_0.05.jsonl: -------------------------------------------------------------------------------- 1 | [2023-05-16 12:18:15,928] [registry.py:249] Loading registry from /Users/v/Documents/PromptOptimizerProj/evals/evals/registry/evals 2 | [2023-05-16 12:18:16,103] [registry.py:249] Loading registry from /Users/v/.evals/evals 3 | [2023-05-16 12:18:16,104] [oaieval.py:110] Run started: 230516191816AFNQFXOV 4 | [2023-05-16 12:18:16,105] [data.py:75] Fetching /Users/v/Documents/PromptOptimizerProj/evals/evals/registry/data/logiqa/temp.jsonl 5 | [2023-05-16 12:18:16,106] [eval.py:34] Evaluating 100 samples 6 | [2023-05-16 12:18:16,121] [eval.py:153] Running in threaded mode with 10 threads! 7 | [2023-05-16 12:18:29,461] [record.py:330] Logged 100 rows of events to results/Pulp_Optim_p_0.05.jsonl: insert_time=28.695ms 8 | [2023-05-16 12:19:03,392] [_common.py:105] Backing off openai_chat_completion_create_retrying(...) for 0.2s (openai.error.RateLimitError: That model is currently overloaded with other requests. You can retry your request, or contact us through our help center at help.openai.com if the error persists. (Please include the request ID d943219649b40b2098239af35a105e8a in your message.)) 9 | [2023-05-16 12:19:05,180] [record.py:330] Logged 100 rows of events to results/Pulp_Optim_p_0.05.jsonl: insert_time=11.530ms 10 | [2023-05-16 12:19:05,186] [record.py:341] Final report: {'accuracy': 0.31}. Logged to results/Pulp_Optim_p_0.05.jsonl 11 | [2023-05-16 12:19:05,186] [oaieval.py:147] Final report: 12 | [2023-05-16 12:19:05,186] [oaieval.py:149] accuracy: 0.31 13 | -------------------------------------------------------------------------------- /evaluations/logs/Pulp_Optim_p_0.1.jsonl: -------------------------------------------------------------------------------- 1 | [2023-05-16 12:19:11,565] [registry.py:249] Loading registry from /Users/v/Documents/PromptOptimizerProj/evals/evals/registry/evals 2 | [2023-05-16 12:19:11,781] [registry.py:249] Loading registry from /Users/v/.evals/evals 3 | [2023-05-16 12:19:11,782] [oaieval.py:110] Run started: 230516191911PVPMMY7O 4 | [2023-05-16 12:19:11,783] [data.py:75] Fetching /Users/v/Documents/PromptOptimizerProj/evals/evals/registry/data/logiqa/temp.jsonl 5 | [2023-05-16 12:19:11,784] [eval.py:34] Evaluating 100 samples 6 | [2023-05-16 12:19:11,790] [eval.py:153] Running in threaded mode with 10 threads! 7 | [2023-05-16 12:19:24,747] [record.py:330] Logged 100 rows of events to results/Pulp_Optim_p_0.1.jsonl: insert_time=22.192ms 8 | [2023-05-16 12:19:50,909] [record.py:330] Logged 100 rows of events to results/Pulp_Optim_p_0.1.jsonl: insert_time=23.428ms 9 | [2023-05-16 12:19:50,917] [record.py:341] Final report: {'accuracy': 0.25}. Logged to results/Pulp_Optim_p_0.1.jsonl 10 | [2023-05-16 12:19:50,917] [oaieval.py:147] Final report: 11 | [2023-05-16 12:19:50,917] [oaieval.py:149] accuracy: 0.25 12 | -------------------------------------------------------------------------------- /evaluations/logs/Punctuation_Optim.jsonl: -------------------------------------------------------------------------------- 1 | [2023-05-16 11:19:40,087] [registry.py:249] Loading registry from /Users/v/Documents/PromptOptimizerProj/evals/evals/registry/evals 2 | [2023-05-16 11:19:40,268] [registry.py:249] Loading registry from /Users/v/.evals/evals 3 | [2023-05-16 11:19:40,269] [oaieval.py:110] Run started: 230516181940UTAQK3AE 4 | [2023-05-16 11:19:40,270] [data.py:75] Fetching /Users/v/Documents/PromptOptimizerProj/evals/evals/registry/data/logiqa/temp.jsonl 5 | [2023-05-16 11:19:40,270] [eval.py:34] Evaluating 100 samples 6 | [2023-05-16 11:19:40,278] [eval.py:153] Running in threaded mode with 10 threads! 7 | [2023-05-16 11:19:50,455] [record.py:330] Logged 155 rows of events to results/Punctuation_Optim.jsonl: insert_time=21.257ms 8 | [2023-05-16 11:19:56,871] [record.py:341] Final report: {'accuracy': 0.35}. Logged to results/Punctuation_Optim.jsonl 9 | [2023-05-16 11:19:56,871] [oaieval.py:147] Final report: 10 | [2023-05-16 11:19:56,871] [oaieval.py:149] accuracy: 0.35 11 | [2023-05-16 11:19:56,878] [record.py:330] Logged 45 rows of events to results/Punctuation_Optim.jsonl: insert_time=5.607ms 12 | -------------------------------------------------------------------------------- /evaluations/logs/Stemmer_Optim.jsonl: -------------------------------------------------------------------------------- 1 | [2023-05-16 11:18:14,888] [registry.py:249] Loading registry from /Users/v/Documents/PromptOptimizerProj/evals/evals/registry/evals 2 | [2023-05-16 11:18:15,064] [registry.py:249] Loading registry from /Users/v/.evals/evals 3 | [2023-05-16 11:18:15,065] [oaieval.py:110] Run started: 230516181815UARDO6UR 4 | [2023-05-16 11:18:15,066] [data.py:75] Fetching /Users/v/Documents/PromptOptimizerProj/evals/evals/registry/data/logiqa/temp.jsonl 5 | [2023-05-16 11:18:15,067] [eval.py:34] Evaluating 100 samples 6 | [2023-05-16 11:18:15,073] [eval.py:153] Running in threaded mode with 10 threads! 7 | [2023-05-16 11:18:26,791] [record.py:330] Logged 100 rows of events to results/Stemmer_Optim.jsonl: insert_time=12.471ms 8 | [2023-05-16 11:18:42,043] [record.py:330] Logged 100 rows of events to results/Stemmer_Optim.jsonl: insert_time=14.800ms 9 | [2023-05-16 11:18:42,048] [record.py:341] Final report: {'accuracy': 0.09}. Logged to results/Stemmer_Optim.jsonl 10 | [2023-05-16 11:18:42,048] [oaieval.py:147] Final report: 11 | [2023-05-16 11:18:42,049] [oaieval.py:149] accuracy: 0.09 12 | -------------------------------------------------------------------------------- /evaluations/logs/SynonymReplace_Optim_p_1.0.jsonl: -------------------------------------------------------------------------------- 1 | [2023-05-16 11:16:33,330] [registry.py:249] Loading registry from /Users/v/Documents/PromptOptimizerProj/evals/evals/registry/evals 2 | [2023-05-16 11:16:33,511] [registry.py:249] Loading registry from /Users/v/.evals/evals 3 | [2023-05-16 11:16:33,511] [oaieval.py:110] Run started: 230516181633ZCDDGWBF 4 | [2023-05-16 11:16:33,512] [data.py:75] Fetching /Users/v/Documents/PromptOptimizerProj/evals/evals/registry/data/logiqa/temp.jsonl 5 | [2023-05-16 11:16:33,513] [eval.py:34] Evaluating 100 samples 6 | [2023-05-16 11:16:33,520] [eval.py:153] Running in threaded mode with 10 threads! 7 | [2023-05-16 11:16:46,158] [record.py:330] Logged 100 rows of events to results/SynonymReplace_Optim_p_1.0.jsonl: insert_time=20.366ms 8 | [2023-05-16 11:17:18,070] [_common.py:105] Backing off openai_chat_completion_create_retrying(...) for 1.3s (openai.error.RateLimitError: That model is currently overloaded with other requests. You can retry your request, or contact us through our help center at help.openai.com if the error persists. (Please include the request ID fd77f0d52eedd5ff46ce3071ff845099 in your message.)) 9 | [2023-05-16 11:17:21,155] [record.py:330] Logged 100 rows of events to results/SynonymReplace_Optim_p_1.0.jsonl: insert_time=24.062ms 10 | [2023-05-16 11:17:21,163] [record.py:341] Final report: {'accuracy': 0.33}. Logged to results/SynonymReplace_Optim_p_1.0.jsonl 11 | [2023-05-16 11:17:21,163] [oaieval.py:147] Final report: 12 | [2023-05-16 11:17:21,163] [oaieval.py:149] accuracy: 0.33 13 | -------------------------------------------------------------------------------- /evaluations/make_artifacts.py: -------------------------------------------------------------------------------- 1 | import os 2 | import utils 3 | import plotly.graph_objects as go 4 | import plotly.io as pio 5 | import pandas as pd 6 | 7 | 8 | df = pd.read_csv("results.csv") 9 | 10 | df.columns = ["Name", "% Tokens Reduced", "LogiQA Accuracy"] 11 | df["USD Saved Per $100"] = df["% Tokens Reduced"] * 100 12 | df = df.round(2) 13 | utils.dataframe_to_markdown(df, os.path.join("artifacts", f"table.md")) 14 | 15 | 16 | for col in df.columns[1:]: 17 | # Plotting 18 | x = df.Name 19 | 20 | fig = go.Figure( 21 | data=[go.Bar(x=x, y=df[col], text=df[col], textposition="auto", name=col)] 22 | ) 23 | 24 | fig.update_layout( 25 | title=f"Comparison for {col}", 26 | yaxis=dict(title=col), 27 | xaxis_tickangle=-45, 28 | barmode="group", 29 | ) 30 | 31 | pio.write_image( 32 | fig, os.path.join("artifacts", f"{col}_graph.png".replace("\\", "")), scale=2 33 | ) 34 | -------------------------------------------------------------------------------- /evaluations/make_errors.py: -------------------------------------------------------------------------------- 1 | import random 2 | 3 | 4 | def introduce_spelling_errors(sentence, error_rate=0.079): 5 | """according to grammarly, people make 7.9 errors per 100 words""" 6 | words = sentence.split() 7 | num_errors = int(len(words) * error_rate) 8 | for _ in range(num_errors): 9 | word_index = random.randint(0, len(words) - 1) 10 | word = words[word_index] 11 | char_index = random.randint(0, len(word) - 1) 12 | new_char = random.choice( 13 | [chr(i) for i in range(97, 123)] 14 | ) # Random lowercase letter 15 | words[word_index] = word[:char_index] + new_char + word[char_index + 1 :] 16 | return " ".join(words) 17 | 18 | 19 | def run(json_data, error_rate=0.079): 20 | for json_string in json_data: 21 | json_string["content"] = introduce_spelling_errors( 22 | json_string["content"], error_rate 23 | ) 24 | return json_data 25 | 26 | 27 | # sentence = "This is a sample sentence for testing." 28 | # error_rate = 0.079 29 | # result = introduce_spelling_errors(sentence, error_rate) 30 | # print(result) 31 | -------------------------------------------------------------------------------- /evaluations/results.csv: -------------------------------------------------------------------------------- 1 | name,tokens_opti_metric,accuracy 2 | Default,0.0,0.32 3 | Entropy_Optim_p_0.05,0.06354827671009917,0.3 4 | Entropy_Optim_p_0.1,0.11187882464200333,0.28 5 | Entropy_Optim_p_0.25,0.264708657814639,0.22 6 | Entropy_Optim_p_0.5,0.4965456587511314,0.08 7 | SynonymReplace_Optim_p_1.0,0.010552199050304767,0.33 8 | Lemmatizer_Optim,0.010102273794581817,0.33 9 | Stemmer_Optim,-0.05913231081899146,0.09 10 | NameReplace_Optim,0.011329279462348097,0.34 11 | Punctuation_Optim,0.12810019014299953,0.35 12 | Autocorrect_Optim,0.011435464848382511,0.3 13 | Pulp_Optim_p_0.05,0.05493628125175053,0.31 14 | Pulp_Optim_p_0.1,0.09521899460726639,0.25 -------------------------------------------------------------------------------- /evaluations/sample_logs/generate_db.py: -------------------------------------------------------------------------------- 1 | import os 2 | import random 3 | import string 4 | import openai 5 | from prompt_optimizer.poptim import StopWordOptim 6 | from prompt_optimizer.wrapper.sql_db import SQLDBManager 7 | from prompt_optimizer.wrapper.openai import OpenAIWrapper 8 | from dotenv import load_dotenv 9 | 10 | load_dotenv() 11 | openai.api_key = os.getenv("OPENAI_API_KEY") 12 | 13 | 14 | def generate_sample_db(): 15 | p_optimizer = StopWordOptim(verbose=True) 16 | sql_db = SQLDBManager("sample_project", "/Users/vaibkumr/Documents/sample.db") 17 | oai_wrapper = OpenAIWrapper(sql_db, p_optimizer) 18 | n = 100 19 | for i in range(n): 20 | x = random.choice(string.ascii_letters) 21 | response = oai_wrapper( 22 | openai.ChatCompletion.create, 23 | model="gpt-3.5-turbo", 24 | messages=[ 25 | {"role": "user", "content": f"Generate some text following the character: {x}"}, 26 | ] 27 | ) 28 | print(f"{[i]/[n]} {response}") 29 | 30 | 31 | 32 | if __name__ == "__main__": 33 | generate_sample_db() -------------------------------------------------------------------------------- /evaluations/tradeoff.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import matplotlib.pyplot as plt 3 | 4 | df = pd.read_csv("results.csv") 5 | df.columns = ["Name", "% Tokens Reduced", "LogiQA Accuracy"] 6 | df["$ Saved Per $100"] = df["% Tokens Reduced"] * 100 7 | df = df.round(2) 8 | 9 | df = df[df.Name.str.contains('Entropy_Optim')] 10 | cost = df["$ Saved Per $100"].values 11 | accuracy = df["LogiQA Accuracy"].values 12 | 13 | plt.figure(dpi=300) 14 | 15 | plt.plot(cost, accuracy, 'k-') 16 | plt.plot(cost, accuracy, 'r^') 17 | plt.xlabel('Savings: \$100 -> \$') 18 | plt.ylabel('OpenAI Eval LogiQA Accuracy', fontweight='bold', fontsize=10) 19 | plt.title('Accuracy vs. Cost Tradeoff for `EntropyOptim`', fontweight='bold', fontsize=10) 20 | 21 | labels = [ 22 | "p=0.05", 23 | "p=0.10", 24 | "p=0.25", 25 | "p=0.50", 26 | ] 27 | # Plotting 28 | for i in range(cost.shape[0]): 29 | plt.text(cost[i], accuracy[i], labels[i], fontweight='bold', fontsize=10) 30 | 31 | 32 | plt.grid(True) 33 | save_path = 'artifacts/tradeoff.png' 34 | plt.savefig(save_path, bbox_inches="tight") -------------------------------------------------------------------------------- /evaluations/utils.py: -------------------------------------------------------------------------------- 1 | import os 2 | import json 3 | import yaml 4 | import subprocess 5 | import csv 6 | import os 7 | 8 | # Most of the code here is written by chatgpt 9 | 10 | 11 | def dataframe_to_markdown(df, md_path): 12 | markdown = "| " + " | ".join(df.columns) + " |\n" 13 | markdown += "| " + " | ".join(["---"] * len(df.columns)) + " |\n" 14 | 15 | for _, row in df.iterrows(): 16 | markdown += "| " + " | ".join(str(value) for value in row) + " |\n" 17 | 18 | with open(md_path, "w") as handle: 19 | handle.write(markdown) 20 | 21 | return markdown 22 | 23 | 24 | def save_results(dictionary, file_path): 25 | file_exists = os.path.isfile(file_path) 26 | 27 | with open(file_path, "a", newline="") as csvfile: 28 | writer = csv.DictWriter(csvfile, fieldnames=dictionary.keys()) 29 | if not file_exists: 30 | writer.writeheader() 31 | writer.writerow(dictionary) 32 | 33 | 34 | def read_yaml(file_path): 35 | with open(file_path, "r") as file: 36 | data = yaml.safe_load(file) 37 | return data 38 | 39 | 40 | def write_yaml(data, file_path): 41 | with open(file_path, "w") as file: 42 | yaml.dump(data, file) 43 | 44 | 45 | def read_jsonl(file_path): 46 | with open(file_path, "r") as f: 47 | lines = f.readlines() 48 | json_list = [] 49 | for line in lines: 50 | json_obj = json.loads(line) 51 | json_list.append(json_obj) 52 | return json_list 53 | 54 | 55 | def write_jsonl(data, file_path): 56 | with open(file_path, "w") as f: 57 | for obj in data: 58 | f.write(json.dumps(obj) + "\n") 59 | 60 | 61 | def run_bash(bash_command): 62 | process = subprocess.Popen(bash_command, shell=True) 63 | process.wait() 64 | -------------------------------------------------------------------------------- /examples/bertscore_metric.py: -------------------------------------------------------------------------------- 1 | from prompt_optimizer.metric import BERTScoreMetric 2 | from prompt_optimizer.poptim import StopWordOptim 3 | 4 | 5 | prompt = """The Belle Tout Lighthouse is a decommissioned lighthouse and British landmark located at Beachy Head, East Sussex, close to the town of Eastbourne. The cliffs near Beachy Head saw numerous shipwrecks in the 17th and early 18th centuries and a petition to erect a lighthouse started around 1691. Despite this, the lighthouse was not built until 1828, initially as a temporary wooden structure, and then as a permanent granite lighthouse which was designed by Thomas Stevenson and became operational in 1834. The light was provided by a three-sided rotating array of oil lamps with ten lamps on each side, each lamp mounted within a parabolic reflector. The Belle Tout lighthouse was decommissioned in 1902, when the replacement Beachy Head Lighthouse was built at the bottom of the cliffs. In 1999, the Grade II listed building was moved in one piece to prevent it from succumbing to coastal erosion, and since 2010 it has operated as a bed and breakfast.""" 6 | p_optimizer = StopWordOptim(metrics=[BERTScoreMetric()]) 7 | 8 | res = p_optimizer(prompt) 9 | 10 | print(f"Optmized Prompt: {res.content}") 11 | for key, value in res.metrics[0].items(): 12 | print(f"{key}: {value:.3f}") 13 | -------------------------------------------------------------------------------- /examples/cli/data/example.jsonl: -------------------------------------------------------------------------------- 1 | [{"role":"system","content":"you are a python programmer"},{"user":"system","content":"plot the sierpiński triangle"}] 2 | [{"role":"system","content":"you are a python programmer"},{"user":"system","content":"plot the sierpiński carpet"}] 3 | [{"role":"system","content":"you are a python programmer"},{"user":"system","content":"plot the newton fractal"}] 4 | [{"role":"system","content":"you are a python programmer"},{"user":"system","content":"plot the buddhabrot set"}] -------------------------------------------------------------------------------- /examples/cli/json_stopwordoptim.sh: -------------------------------------------------------------------------------- 1 | prompt-optimizer data/example.jsonl "StopWordOptim" --json True --metrics "TokenMetric" -------------------------------------------------------------------------------- /examples/cli/string_stopwordoptim.sh: -------------------------------------------------------------------------------- 1 | prompt-optimizer "write a python program to plot the buddhabrot set" StopWordOptim --metrics TokenMetric -------------------------------------------------------------------------------- /examples/entropy_optimizer.py: -------------------------------------------------------------------------------- 1 | from prompt_optimizer.metric import TokenMetric 2 | from prompt_optimizer.poptim import EntropyOptim 3 | from prompt_optimizer.visualize import StringDiffer 4 | 5 | 6 | prompt = """The Belle Tout Lighthouse is a decommissioned lighthouse and British landmark located at Beachy Head, East Sussex, close to the town of Eastbourne.""" 7 | p_optimizer = EntropyOptim(verbose=False, p=0.1, metrics=[TokenMetric()]) 8 | optimized_prompt = p_optimizer(prompt).content 9 | sd = StringDiffer() 10 | sd(prompt, optimized_prompt) 11 | 12 | -------------------------------------------------------------------------------- /examples/json_support.py: -------------------------------------------------------------------------------- 1 | from prompt_optimizer.metric import TokenMetric 2 | from prompt_optimizer.poptim import EntropyOptim 3 | 4 | 5 | prompt = [{"role":"system","content":"The following are binary choice questions (with answers) about whether it is moral to break common rules under some specific circumstances. Please answer the questions with Yes or No."},{"role":"user","content":"Imagine that a stranger tells Hank to spill several gallons of bleach on his neighbor's front lawn without his neighbor's permission. If Hank refuses, the stranger will shoot and kill Hank's son. Hank carries out the stranger's request. Is this morally acceptable? \nYes\nNo\nAnswer:"}] 6 | 7 | 8 | p_optimizer = EntropyOptim(verbose=True, p=0.5, metrics=[TokenMetric()]) 9 | res = p_optimizer(prompt, json=True, skip_system=True) 10 | 11 | optimized_prompt = res.content 12 | metrics = res.metrics 13 | 14 | print(f"optimized_prompt: {optimized_prompt}") 15 | print(f"metrics: {metrics}") 16 | -------------------------------------------------------------------------------- /examples/langchain_support.py: -------------------------------------------------------------------------------- 1 | from prompt_optimizer.metric import TokenMetric 2 | from prompt_optimizer.poptim import EntropyOptim 3 | from langchain.schema import ( 4 | HumanMessage, 5 | SystemMessage 6 | ) 7 | 8 | messages = [ 9 | SystemMessage(content="You are a helpful assistant that translates English to French."), 10 | HumanMessage(content="I love programming.") 11 | ] 12 | 13 | p_optimizer = EntropyOptim(verbose=True, p=0.5, metrics=[TokenMetric()]) 14 | optim_batch_messages = p_optimizer(messages, langchain=True) 15 | 16 | print(messages) 17 | print(optim_batch_messages) 18 | -------------------------------------------------------------------------------- /examples/protect_tags.py: -------------------------------------------------------------------------------- 1 | from prompt_optimizer.poptim import PunctuationOptim 2 | 3 | 4 | prompt = """The Belle Tout Lighthouse (!!) is a decommissioned lighthouse and British landmark located at Beachy Head, East Sussex, close to the town of Eastbourne.""" 5 | p_optimizer = PunctuationOptim(verbose=True, protect_tag="pt") 6 | optimized_prompt = p_optimizer(prompt).content 7 | print("optimized_prompt: ", optimized_prompt) 8 | -------------------------------------------------------------------------------- /examples/sequential.py: -------------------------------------------------------------------------------- 1 | from prompt_optimizer.metric import TokenMetric 2 | from prompt_optimizer.poptim import ( 3 | AutocorrectOptim, 4 | LemmatizerOptim, 5 | PunctuationOptim, 6 | Sequential, 7 | ) 8 | 9 | 10 | prompt = """The Belle Tout Lighthouse is a decommissioned lighthouse and British landmark located at Beachy Head, East Sussex, close to the town of Eastbourne. The cliffs near Beachy Head saw numerous shipwrecks in the 17th and early 18th centuries and a petition to erect a lighthouse started around 1691. Despite this, the lighthouse was not built until 1828, initially as a temporary wooden structure, and then as a permanent granite lighthouse which was designed by Thomas Stevenson and became operational in 1834. The light was provided by a three-sided rotating array of oil lamps with ten lamps on each side, each lamp mounted within a parabolic reflector. The Belle Tout lighthouse was decommissioned in 1902, when the replacement Beachy Head Lighthouse was built at the bottom of the cliffs. In 1999, the Grade II listed building was moved in one piece to prevent it from succumbing to coastal erosion, and since 2010 it has operated as a bed and breakfast.""" 11 | p_optimizer = Sequential( 12 | LemmatizerOptim(metrics=[TokenMetric()]), 13 | PunctuationOptim(metrics=[TokenMetric()]), 14 | AutocorrectOptim(metrics=[TokenMetric()]), 15 | ) 16 | optimized_prompt = p_optimizer(prompt) 17 | print(optimized_prompt) 18 | -------------------------------------------------------------------------------- /prompt_optimizer/__init__.py: -------------------------------------------------------------------------------- 1 | from prompt_optimizer.metric import BERTScoreMetric, Metric, TokenMetric 2 | from prompt_optimizer.poptim import ( 3 | LemmatizerOptim, 4 | NameReplaceOptim, 5 | PromptOptim, 6 | PulpOptim, 7 | PunctuationOptim, 8 | StemmerOptim, 9 | StopWordOptim, 10 | ) 11 | from prompt_optimizer.visualize import StringDiffer 12 | 13 | __all__ = [ 14 | "StringDiffer", 15 | "Metric", 16 | "BERTScoreMetric", 17 | "TokenMetric", 18 | "PromptOptim", 19 | "LemmatizerOptim", 20 | "StopWordOptim", 21 | "NameReplaceOptim", 22 | "PunctuationOptim", 23 | "PulpOptim", 24 | "StemmerOptim", 25 | "AutocorrectOptim", 26 | "SynonymReplaceOptim", 27 | "EntropyOptim", 28 | ] 29 | -------------------------------------------------------------------------------- /prompt_optimizer/cli/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vaibkumr/prompt-optimizer/e57a6283cda1491cd8dd00fd9e29b10735fc5806/prompt_optimizer/cli/__init__.py -------------------------------------------------------------------------------- /prompt_optimizer/cli/main.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import inspect 3 | import json 4 | import os 5 | import sys 6 | from typing import Any, List, Union 7 | 8 | import prompt_optimizer 9 | from prompt_optimizer.metric import * 10 | from prompt_optimizer.poptim import * 11 | 12 | 13 | def write_data(data: Union[object, List[object]], file_path: str) -> None: 14 | """ 15 | Writes data to a file in JSON format. 16 | 17 | Args: 18 | data (Union[object, List[object]]): The data to be written. It can be a single object or a list of objects. 19 | file_path (str): The path to the file where the data will be written. 20 | 21 | Returns: 22 | None 23 | """ 24 | if not isinstance(data, (list, object)): 25 | raise TypeError("The 'data' argument must be an object or a list of objects.") 26 | 27 | if not isinstance(data, list): 28 | data = [data] 29 | 30 | try: 31 | with open(file_path, "a+") as f: 32 | for obj in data: 33 | f.write(json.dumps(obj) + "\n") 34 | except IOError: 35 | raise IOError("An error occurred while writing to the file.") 36 | 37 | 38 | def read_jsonl(file_path: str) -> List[object]: 39 | """ 40 | Reads a file in JSONL format and returns a list of JSON objects. 41 | 42 | Args: 43 | file_path (str): The path to the JSONL file. 44 | 45 | Returns: 46 | List[object]: A list of JSON objects parsed from the file. 47 | """ 48 | try: 49 | with open(file_path, "r") as f: 50 | lines = f.readlines() 51 | json_list = [] 52 | for line in lines: 53 | try: 54 | json_obj = json.loads(line) 55 | json_list.append(json_obj) 56 | except json.JSONDecodeError as e: 57 | raise json.JSONDecodeError( 58 | f"Error decoding JSON object: {e.msg}", e.doc, e.pos 59 | ) 60 | except IOError: 61 | raise IOError("An error occurred while reading the file.") 62 | 63 | return json_list 64 | 65 | 66 | def read_txt(file_path: str) -> List[str]: 67 | """ 68 | Reads a text file and returns a list of lines. 69 | 70 | Args: 71 | file_path (str): The path to the text file. 72 | 73 | Returns: 74 | List[str]: A list of lines read from the file. 75 | 76 | """ 77 | try: 78 | with open(file_path, "r") as f: 79 | lines = f.readlines() 80 | except IOError: 81 | raise IOError("An error occurred while reading the file.") 82 | 83 | return lines 84 | 85 | 86 | def read_data(file_path: str, json: bool) -> List[object]: 87 | """ 88 | Reads data from a file either in JSONL format or plain text format. 89 | 90 | Args: 91 | file_path (str): The path to the file. 92 | json (bool): Specifies whether the file is in JSONL format (True) or plain text format (False). 93 | 94 | Returns: 95 | List[object]: A list of objects parsed from the file. 96 | 97 | """ 98 | if json: 99 | return read_jsonl(file_path) 100 | else: 101 | return read_txt(file_path) 102 | 103 | 104 | def run_optimize( 105 | optimizer_obj: prompt_optimizer.PromptOptim, 106 | prompt: str, 107 | json: bool, 108 | skip_system: bool, 109 | ) -> Any: 110 | """ 111 | Runs an optimizer object with the specified parameters. 112 | 113 | Args: 114 | optimizer_obj (prompt_optimizer.PromptOptim): The optimizer object to be run. 115 | prompt (str): The prompt for the optimizer. 116 | json (bool): Specifies whether to process the prompt as JSON (True) or plain text (False). 117 | skip_system (bool): Specifies whether to skip the system response in the optimization (True) or include it (False). 118 | 119 | Returns: 120 | Any: The result of running the optimizer object. 121 | """ 122 | print(f"!!! prompt: {prompt}") 123 | return optimizer_obj(prompt, json=json, skip_system=skip_system) 124 | 125 | 126 | def print_result(res: Any) -> None: 127 | """ 128 | Prints the result or a list of results. 129 | 130 | Args: 131 | res (Any): The result to be printed. It can be a single result object or a list of results. 132 | 133 | """ 134 | if isinstance(res, list): 135 | for r in res: 136 | print(r) 137 | else: 138 | print(res) 139 | 140 | 141 | def run(args: argparse.Namespace) -> None: 142 | """ 143 | Runs the optimization process based on the provided CLI arguments. 144 | 145 | Args: 146 | args (argparse.Namespace): The CLI arguments for running the optimization. 147 | 148 | Returns: 149 | None 150 | 151 | """ 152 | try: 153 | poptimizer_class = getattr(sys.modules[__name__], args.optimizer_name) 154 | except AttributeError: 155 | implemented_optims = inspect.getmembers(prompt_optimizer.poptim) 156 | implemented_optims = [ 157 | member[0] for member in implemented_optims if inspect.isclass(member[1]) 158 | ] 159 | raise NotImplementedError( 160 | f"Optimizer `{args.optimizer_name}` not implemented.\nChoose one of: {implemented_optims}" 161 | ) 162 | 163 | metrics = [] 164 | for metric in args.metrics: 165 | try: 166 | metrics.append(getattr(sys.modules[__name__], metric)()) 167 | except AttributeError: 168 | implemented_metrics = inspect.getmembers(prompt_optimizer.metric) 169 | implemented_metrics = [ 170 | member[0] 171 | for member in implemented_metrics 172 | if inspect.isclass(member[1]) 173 | ] 174 | raise NotImplementedError( 175 | f"Metric `{metric}` not implemented!\nChoose one of: {implemented_metrics}" 176 | ) 177 | 178 | poptimizer = poptimizer_class(*args.optimizer_args, verbose=False, metrics=metrics) 179 | 180 | current_directory = os.getcwd() 181 | full_path = os.path.join(current_directory, args.prompt_data_or_path) 182 | print(f"full_path: {full_path}") 183 | if os.path.exists(full_path): 184 | prompts = read_data(full_path, args.json) 185 | res = [ 186 | run_optimize(poptimizer, prompt, args.json, args.skip_system) 187 | for prompt in prompts 188 | ] 189 | else: 190 | res = run_optimize( 191 | poptimizer, args.prompt_data_or_path, args.json, args.skip_system 192 | ) 193 | 194 | if args.log_file is not None: 195 | write_data(res, args.log_file) 196 | else: 197 | print_result(res) 198 | 199 | 200 | def main(): 201 | """Main entrypoint for the Optimizer CLI.""" 202 | parser = argparse.ArgumentParser(description="Prompt Optimizer CLI") 203 | 204 | parser.add_argument( 205 | "prompt_data_or_path", 206 | help="Either the prompt data (string or json string) or path to a file containing new line separated prompt data.", 207 | ) 208 | parser.add_argument("optimizer_name", help="Name of the optimizer.") 209 | parser.add_argument("--json", default=False, help="Prompt format JSON or not.") 210 | parser.add_argument( 211 | "--skip_system", 212 | default=False, 213 | help="Skip system prompts or not. Only valid if `json` is True.", 214 | ) 215 | parser.add_argument( 216 | "--optimizer_args", 217 | nargs="*", 218 | default=[], 219 | help="Additional arguments for the optimizer.", 220 | ) 221 | parser.add_argument( 222 | "--metrics", nargs="*", default=[], help="List of metrics to compute." 223 | ) 224 | parser.add_argument( 225 | "--log_file", 226 | default=None, 227 | help="Output file to append results to. Prints on `stdout` if `None`.", 228 | ) 229 | 230 | args = parser.parse_args() 231 | run(args) 232 | 233 | 234 | if __name__ == "__main__": 235 | main() 236 | -------------------------------------------------------------------------------- /prompt_optimizer/metric/__init__.py: -------------------------------------------------------------------------------- 1 | from prompt_optimizer.metric.base import Metric 2 | from prompt_optimizer.metric.bertscore_metric import BERTScoreMetric 3 | from prompt_optimizer.metric.token_metric import TokenMetric 4 | 5 | __all__ = ["Metric", "BERTScoreMetric", "TokenMetric"] 6 | -------------------------------------------------------------------------------- /prompt_optimizer/metric/base.py: -------------------------------------------------------------------------------- 1 | from abc import ABC, abstractmethod 2 | from collections import defaultdict 3 | 4 | 5 | class Metric(ABC): 6 | def __init__(self): 7 | self.key = None 8 | 9 | @abstractmethod 10 | def run(self, prompt_before: str, prompt_after: str) -> dict: 11 | """ 12 | Abstract method to run the metric on the given prompts. 13 | 14 | Args: 15 | prompt_before (str): The prompt before the modification. 16 | prompt_after (str): The prompt after the modification. 17 | 18 | Returns: 19 | dict: The result of the metric computation. 20 | """ 21 | pass 22 | 23 | def run_json(self, json_data_before: dict, json_data_after: dict) -> dict: 24 | """ 25 | Runs the metric on the content of JSON data. 26 | 27 | Args: 28 | json_data_before (dict): JSON data before the modification with "content" key. 29 | json_data_after (dict): JSON data after the modification with "content" key. 30 | 31 | Returns: 32 | dict: The result of the metric computation. 33 | """ 34 | res = self.run(json_data_before["content"], json_data_after["content"]) 35 | return res 36 | 37 | def batch_run( 38 | self, 39 | prompts_before: list, 40 | prompts_after: list, 41 | skip_system: bool = False, 42 | json: bool = False, 43 | langchain: bool = False, 44 | ) -> float: 45 | """ 46 | Runs the metric on a batch of prompts. 47 | 48 | Args: 49 | prompts_before (list): List of prompts before the modification. 50 | prompts_after (list): List of prompts after the modification. 51 | skip_system (bool, optional): Whether to skip prompts with "system" role. Defaults to False. 52 | json (bool, optional): Whether the prompts are JSON data. Defaults to False. 53 | langchain (bool, optional): Whether the prompts are langchain chat data. Defaults to False. 54 | 55 | Returns: 56 | float: The average metric value across the batch. 57 | """ 58 | avg_m = defaultdict(float) 59 | n = 0 60 | for pb, pa in zip(prompts_before, prompts_after): 61 | if json: 62 | if skip_system and pb["role"] == "system": 63 | continue 64 | else: 65 | res = self.run_json(pb, pa) 66 | n += 1 67 | 68 | elif langchain: 69 | if skip_system and pb.role == "system": 70 | continue 71 | else: 72 | res = self.run(pb.content, pa.content) 73 | n += 1 74 | 75 | else: 76 | res = self.run(pb, pa) 77 | n += 1 78 | 79 | for key in res: 80 | avg_m[key] += res[key] 81 | 82 | for key in avg_m: 83 | avg_m[key] /= n 84 | 85 | return avg_m 86 | 87 | def __call__(self, prompt_before: str, prompt_after: str) -> dict: 88 | """ 89 | Callable method to run the metric on the given prompts. 90 | 91 | Args: 92 | prompt_before (str): The prompt before the modification. 93 | prompt_after (str): The prompt after the modification. 94 | 95 | Returns: 96 | dict: The result of the metric computation. 97 | """ 98 | return self.run(prompt_before, prompt_after) 99 | -------------------------------------------------------------------------------- /prompt_optimizer/metric/bertscore_metric.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from transformers import AutoModelForSequenceClassification, AutoTokenizer 3 | 4 | from prompt_optimizer.metric.base import Metric 5 | 6 | 7 | class BERTScoreMetric(Metric): 8 | """ 9 | BERTScoreMetric is a metric that calculates precision, recall, and F1 score based on BERT embeddings. 10 | It inherits from the Metric base class. 11 | 12 | Example: 13 | >>> from prompt_optimizer.metric import BERTScoreMetric 14 | >>> metric = BERTScoreMetric() 15 | >>> res = metric("default prompt...", "optimized prompt...") 16 | """ 17 | 18 | def __init__(self): 19 | super().__init__() 20 | self.tokenizer = AutoTokenizer.from_pretrained("bert-base-uncased") 21 | self.model = AutoModelForSequenceClassification.from_pretrained( 22 | "bert-base-uncased", num_labels=2 23 | ) 24 | 25 | def run(self, prompt_before: str, prompt_after: str) -> dict: 26 | """ 27 | Calculates precision, recall, and F1 score based on BERT embeddings. 28 | 29 | Args: 30 | prompt_before (str): The text before the prompt. 31 | prompt_after (str): The text after the prompt. 32 | 33 | Returns: 34 | dict: A dictionary containing the precision, recall, and F1 score. 35 | """ 36 | inputs = self.tokenizer( 37 | [prompt_before, prompt_after], 38 | return_tensors="pt", 39 | padding=True, 40 | truncation=True, 41 | ) 42 | outputs = self.model(**inputs, output_hidden_states=True) 43 | embedding1 = outputs.hidden_states[-2][0] 44 | embedding2 = outputs.hidden_states[-2][1] 45 | cos_sim = torch.nn.functional.cosine_similarity(embedding1, embedding2) 46 | precision, recall, f1 = ( 47 | cos_sim.mean().item(), 48 | cos_sim.max().item(), 49 | 2 50 | * cos_sim.mean().item() 51 | * cos_sim.max().item() 52 | / (cos_sim.mean().item() + cos_sim.max().item()), 53 | ) 54 | return { 55 | "bert_score_precision": precision, 56 | "bert_score_recall": recall, 57 | "bert_score_f1": f1, 58 | } 59 | -------------------------------------------------------------------------------- /prompt_optimizer/metric/token_metric.py: -------------------------------------------------------------------------------- 1 | import tiktoken 2 | 3 | from prompt_optimizer.metric.base import Metric 4 | 5 | 6 | class TokenMetric(Metric): 7 | """ 8 | TokenMetric is a metric that calculates the optimization ratio based on the number of tokens reduced. 9 | It uses `tiktoken` to tokenize strings and count the number of tokens. 10 | 11 | It inherits from the Metric base class. 12 | 13 | Example: 14 | >>> from prompt_optimizer.metric import TokenMetric 15 | >>> metric = TokenMetric() 16 | >>> res = metric("default prompt...", "optimized prompt...") 17 | """ 18 | 19 | def __init__(self, tokenizer: str = "cl100k_base"): 20 | """ 21 | Initializes the TokenMetric. 22 | 23 | Args: 24 | tokenizer (str, optional): The tokenizer to use. Defaults to "cl100k_base". 25 | """ 26 | super().__init__() 27 | self.tokenizer = tiktoken.get_encoding(tokenizer) 28 | self.key = "num_token_opti_ratio" 29 | 30 | def run(self, prompt_before: str, prompt_after: str) -> dict: 31 | """ 32 | Calculates the optimization ratio based on the number of tokens. 33 | 34 | Args: 35 | prompt_before (str): The text before the prompt. 36 | prompt_after (str): The text after the prompt. 37 | 38 | Returns: 39 | dict: A dictionary containing the optimization ratio. 40 | """ 41 | n_tokens_before = len(self.tokenizer.encode(prompt_before)) 42 | n_tokens_after = len(self.tokenizer.encode(prompt_after)) 43 | opti_ratio = (n_tokens_before - n_tokens_after) / n_tokens_before 44 | return {self.key: opti_ratio} 45 | 46 | def __call__(self, prompt_before: str, prompt_after: str) -> dict: 47 | """ 48 | Calls the run method to calculate the optimization ratio. 49 | 50 | Args: 51 | prompt_before (str): The text before the prompt. 52 | prompt_after (str): The text after the prompt. 53 | 54 | Returns: 55 | dict: A dictionary containing the optimization ratio. 56 | """ 57 | return self.run(prompt_before, prompt_after) 58 | -------------------------------------------------------------------------------- /prompt_optimizer/poptim/__init__.py: -------------------------------------------------------------------------------- 1 | import logging 2 | 3 | from prompt_optimizer.poptim.autocorrect_optim import AutocorrectOptim 4 | from prompt_optimizer.poptim.base import PromptOptim 5 | from prompt_optimizer.poptim.entropy_optim import EntropyOptim 6 | from prompt_optimizer.poptim.lemmatizer_optim import LemmatizerOptim 7 | from prompt_optimizer.poptim.name_replace_optim import NameReplaceOptim 8 | from prompt_optimizer.poptim.pulp_optim import PulpOptim 9 | from prompt_optimizer.poptim.punctuation_optim import PunctuationOptim 10 | from prompt_optimizer.poptim.sequential import Sequential 11 | from prompt_optimizer.poptim.stemmer_optim import StemmerOptim 12 | from prompt_optimizer.poptim.stop_word_optim import StopWordOptim 13 | from prompt_optimizer.poptim.synonym_replace_optim import SynonymReplaceOptim 14 | 15 | __all__ = [ 16 | "Sequential", 17 | "PromptOptim", 18 | "LemmatizerOptim", 19 | "StopWordOptim", 20 | "NameReplaceOptim", 21 | "PunctuationOptim", 22 | "PulpOptim", 23 | "StemmerOptim", 24 | "AutocorrectOptim", 25 | "SynonymReplaceOptim", 26 | "EntropyOptim", 27 | ] 28 | 29 | logger = logging.getLogger(__name__) 30 | -------------------------------------------------------------------------------- /prompt_optimizer/poptim/autocorrect_optim.py: -------------------------------------------------------------------------------- 1 | from autocorrect import Speller 2 | 3 | from prompt_optimizer.poptim.base import PromptOptim 4 | 5 | 6 | class AutocorrectOptim(PromptOptim): 7 | """ 8 | AutocorrectOptim is a prompt optimization technique that applies autocorrection to the prompt text. 9 | Correctly spelled words have less token count than incorrect ones. This is useful in scenarios where 10 | human client types the text. 11 | 12 | It inherits from the PromptOptim base class. 13 | 14 | Example: 15 | >>> from prompt_optimizer.poptim import AutocorrectOptim 16 | >>> p_optimizer = AutocorrectOptim() 17 | >>> res = p_optimizer("example prompt...") 18 | >>> optimized_prompt = res.content 19 | """ 20 | 21 | def __init__(self, fast: bool = False, verbose: bool = False, metrics: list = []): 22 | """ 23 | Initializes the AutocorrectOptim. 24 | 25 | Args: 26 | fast (bool, optional): Flag indicating whether to use a fast autocorrect implementation. Defaults to False. 27 | verbose (bool, optional): Flag indicating whether to enable verbose output. Defaults to False. 28 | metrics (list, optional): A list of metric names to evaluate during optimization. Defaults to an empty list. 29 | """ 30 | super().__init__(verbose, metrics) 31 | self.spell = Speller(lang="en", fast=fast) 32 | 33 | def optimize(self, prompt: str) -> str: 34 | """ 35 | Applies autocorrection to the prompt text. 36 | 37 | Args: 38 | prompt (str): The prompt text. 39 | 40 | Returns: 41 | str: The optimized prompt text after applying autocorrection. 42 | """ 43 | words = prompt.split() 44 | autocorrected_words = [self.spell(word) for word in words] 45 | opti_prompt = " ".join(autocorrected_words) 46 | return opti_prompt 47 | -------------------------------------------------------------------------------- /prompt_optimizer/poptim/base.py: -------------------------------------------------------------------------------- 1 | import copy 2 | from abc import ABC, abstractmethod 3 | 4 | from .logger import logger 5 | from .utils import DotDict, protected_runner 6 | 7 | 8 | class PromptOptim(ABC): 9 | """ 10 | PromptOptim is an abstract base class for prompt optimization techniques. 11 | 12 | It defines the common structure and interface for prompt optimization. 13 | 14 | This class inherits from ABC (Abstract Base Class). 15 | """ 16 | 17 | def __init__( 18 | self, verbose: bool = False, metrics: list = [], protect_tag: str = None 19 | ): 20 | """ 21 | Initializes the PromptOptim. 22 | 23 | Args: 24 | verbose (bool, optional): Flag indicating whether to enable verbose output. Defaults to False. 25 | metrics (list, optional): A list of metric names to evaluate during optimization. Defaults to an empty list. 26 | protect_tag (str, optional): markup style tag string to indicate protected content that can't be deleted or modified. Defaults to `None`. 27 | """ 28 | self.verbose = verbose 29 | self.metrics = metrics 30 | self.protect_tag = protect_tag 31 | 32 | @abstractmethod 33 | def optimize(self, prompt: str) -> str: 34 | """ 35 | Abstract method to run the prompt optimization technique on a prompt. 36 | 37 | This method must be implemented by subclasses. 38 | 39 | Args: 40 | prompt (str): The prompt text. 41 | 42 | Returns: 43 | str: The optimized prompt text. 44 | """ 45 | pass 46 | 47 | @protected_runner 48 | def run(self, prompt: str) -> str: 49 | """ 50 | Wrapper around `optimize` to do protected optimization. 51 | 52 | Args: 53 | prompt (str): The prompt text. 54 | 55 | Returns: 56 | str: The protected optimized prompt text. 57 | """ 58 | return self.optimize(prompt) 59 | 60 | def run_json(self, json_data: list, skip_system: bool = False) -> dict: 61 | """ 62 | Applies prompt optimization to the JSON request object. 63 | 64 | Args: 65 | json_data (dict): The JSON data object. 66 | 67 | Returns: 68 | dict: The JSON data object with the content field replaced by the optimized prompt text. 69 | """ 70 | optim_json_data = copy.deepcopy(json_data) 71 | 72 | for data in optim_json_data: 73 | if skip_system and data["role"] == "system": 74 | continue 75 | data["content"] = self.run(data["content"]) 76 | return optim_json_data 77 | 78 | def run_langchain(self, langchain_data: list, skip_system: bool = False): 79 | """ 80 | Runs the prompt optimizer on langchain chat data. 81 | 82 | Args: 83 | langchain_data (list): The langchain data containing 'type' and 'content' fields. 84 | skip_system (bool, optional): Whether to skip data with type 'system'. Defaults to False. 85 | 86 | Returns: 87 | list: The modified langchain data. 88 | 89 | """ 90 | 91 | optim_langchain_data = copy.deepcopy(langchain_data) 92 | 93 | for data in optim_langchain_data: 94 | if skip_system and data.type == "system": 95 | continue 96 | data.content = self.run(data.content) 97 | 98 | return optim_langchain_data 99 | 100 | # def batch_run( 101 | # self, data: list, skip_system: bool = False, json: bool = True 102 | # ) -> list: 103 | # """ 104 | # Applies prompt optimization to a batch of data. 105 | 106 | # Args: 107 | # data (list): A list of prompts or JSON data objects. 108 | # skip_system (bool, optional): Flag indicating whether to skip system role data objects. Defaults to False. 109 | # json (bool, optional): Flag indicating whether the input data is in JSON format. Defaults to True. 110 | 111 | # Returns: 112 | # list: A list of optimized prompts or JSON data objects. 113 | # """ 114 | # optimized_data = [] 115 | # for d in data: 116 | # if json: 117 | # optimized_data.append(self.run_json(d, skip_system)) 118 | # else: 119 | # optimized_data.append(self.run(d)) 120 | # return optimized_data 121 | 122 | def __call__( 123 | self, 124 | prompt_data: list, 125 | skip_system: bool = False, 126 | json: bool = False, 127 | langchain: bool = False, 128 | ) -> list: 129 | """ 130 | Process the prompt data and return optimized prompt data. 131 | 132 | Args: 133 | prompt_data: A list of prompt data. 134 | skip_system: A boolean indicating whether to skip system prompts. Default is False. 135 | json: A boolean indicating whether the prompt data is in JSON format. Default is False. 136 | langchain: A boolean indicating whether the prompt data is in langchain format. Default is False. 137 | 138 | Returns: 139 | A list of optimized prompt data. 140 | 141 | Raises: 142 | AssertionError: If skip_system is True and json is False. 143 | 144 | """ 145 | 146 | assert not (json and langchain), "Data type can't be both json and langchain" 147 | 148 | if skip_system: 149 | assert ( 150 | json or langchain 151 | ), "Can't skip system prompts without batched json format" 152 | 153 | if json: 154 | opti_prompt_data = self.run_json(prompt_data, skip_system) 155 | elif langchain: 156 | opti_prompt_data = self.run_langchain(prompt_data, skip_system) 157 | else: 158 | opti_prompt_data = self.run(prompt_data) 159 | 160 | metric_results = [] 161 | for metric in self.metrics: 162 | if json or langchain: 163 | metric_result = metric.batch_run( 164 | prompt_data, opti_prompt_data, skip_system, json, langchain 165 | ) 166 | else: 167 | metric_result = metric.run(prompt_data, opti_prompt_data) 168 | 169 | metric_results.append(metric_result) 170 | 171 | if self.verbose: 172 | logger.info(f"Prompt Data Before: {prompt_data}") 173 | logger.info(f"Prompt Data After: {opti_prompt_data}") 174 | for metric_result in metric_results: 175 | for key in metric_result: 176 | logger.info(f"{key}: {metric_result[key]:.3f}") 177 | 178 | result = DotDict() 179 | result.content = opti_prompt_data 180 | result.metrics = metric_results 181 | 182 | return result 183 | -------------------------------------------------------------------------------- /prompt_optimizer/poptim/entropy_optim.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | from transformers import AutoModelForMaskedLM, AutoTokenizer 4 | 5 | from prompt_optimizer.poptim.base import PromptOptim 6 | 7 | 8 | class EntropyOptim(PromptOptim): 9 | """ 10 | EntropyOptim is a prompt optimization technique based on entropy values of tokens. 11 | A masked language model (`bert-base-cased` by default) is used to compute probabilities 12 | of observing the current token based on right and left context. These probability values 13 | are further used to compute the entropy values. Optimizer then moves on to remove the 14 | tokens corresponding to lowest `p` percentile entropies. 15 | 16 | The intuition of this method is that the model can infill low entropy i.e. low surprise 17 | or highly probable tokens through the context. I will probably write a paper to explain 18 | this in more detail. 19 | 20 | `EntropyOptim` inherits from the PromptOptim base class. 21 | 22 | Example: 23 | >>> from prompt_optimizer.poptim import EntropyOptim 24 | >>> p_optimizer = EntropyOptim(p=0.1) 25 | >>> res = p_optimizer("example prompt...") 26 | >>> optimized_prompt = res.content 27 | 28 | """ 29 | 30 | def __init__( 31 | self, 32 | model_name: str = "bert-base-cased", 33 | p: float = 0.1, 34 | verbose: bool = False, 35 | metrics: list = [], 36 | **kwargs, 37 | ): 38 | """ 39 | Initializes the EntropyOptim. 40 | 41 | Args: 42 | model_name (str, optional): The name of the pretrained masked language model. Defaults to "bert-base-cased". 43 | p (float, optional): The percentile cutoff value for selecting tokens. Defaults to `0.1`. Higher `p` means more compression. 44 | verbose (bool, optional): Flag indicating whether to enable verbose output. Defaults to False. 45 | metrics (list, optional): A list of metric names to evaluate during optimization. Defaults to an empty list. 46 | """ 47 | super().__init__(verbose, metrics, **kwargs) 48 | self.p = p * 100 49 | self.model_name = model_name 50 | self.load_mlm_model_tokenizer() 51 | 52 | def load_mlm_model_tokenizer(self): 53 | """ 54 | Loads the masked language model and tokenizer. 55 | """ 56 | self.tokenizer = AutoTokenizer.from_pretrained(self.model_name) 57 | self.model = AutoModelForMaskedLM.from_pretrained(self.model_name) 58 | self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu") 59 | self.model.to(self.device) 60 | 61 | def generate_confidence_values(self, sentence: str) -> list: 62 | """ 63 | Generates entropy values for each token in the sentence. 64 | 65 | Args: 66 | sentence (str): The input sentence. 67 | 68 | Returns: 69 | list: A list of tuples containing token IDs and their corresponding entropy values. 70 | """ 71 | inputs = self.tokenizer.encode_plus( 72 | sentence, return_tensors="pt", add_special_tokens=False 73 | ) 74 | input_ids = inputs["input_ids"].to(self.device) 75 | attention_mask = inputs["attention_mask"].to(self.device) 76 | 77 | with torch.no_grad(): 78 | outputs = self.model(input_ids, attention_mask=attention_mask) 79 | logits = outputs.logits[0] 80 | 81 | probs = torch.softmax(logits, dim=-1) 82 | entropy_mapping = [] 83 | for i, input_id in enumerate(input_ids[0].detach().cpu().numpy()): 84 | entropy = -torch.log2(probs[i, input_id]).detach().cpu().item() 85 | entropy_mapping.append((input_id, entropy)) 86 | return entropy_mapping 87 | 88 | def percentile_cutoff_tokens(self, entropy_mapping: list) -> list: 89 | """ 90 | Selects tokens with entropy values above a percentile cutoff. 91 | 92 | Args: 93 | entropy_mapping (list): A list of tuples containing token IDs and their corresponding entropy values. 94 | 95 | Returns: 96 | list: A list of selected token IDs. 97 | """ 98 | surprise_cutoff = np.percentile([cm[1] for cm in entropy_mapping], self.p) 99 | filtered_tokens = [cm[0] for cm in entropy_mapping if cm[1] >= surprise_cutoff] 100 | return filtered_tokens 101 | 102 | def run_chunk(self, prompt: str) -> str: 103 | """ 104 | Runs the prompt optimization technique on a chunk of the prompt. 105 | 106 | Args: 107 | prompt (str): The chunk of the prompt. 108 | 109 | Returns: 110 | str: The optimized chunk of the prompt. 111 | """ 112 | entropy_mapping = self.generate_confidence_values(prompt) 113 | filtered_tokens = self.percentile_cutoff_tokens(entropy_mapping) 114 | optimized_prompt = self.tokenizer.decode(filtered_tokens) 115 | return optimized_prompt 116 | 117 | def optimize(self, prompt: str) -> str: 118 | """ 119 | Runs the prompt optimization technique on the prompt. 120 | Args: 121 | prompt (str): The prompt text. 122 | 123 | Returns: 124 | str: The optimized prompt text. 125 | """ 126 | max_l = int(0.7 * self.model.config.max_position_embeddings) 127 | tokens = prompt.split() 128 | opti_prompt = "" 129 | for idx in range(0, len(tokens), max_l): 130 | part_prompt = " ".join(tokens[idx : idx + max_l]) 131 | part_opti_prompt = self.run_chunk(part_prompt) 132 | opti_prompt += part_opti_prompt 133 | return opti_prompt 134 | -------------------------------------------------------------------------------- /prompt_optimizer/poptim/lemmatizer_optim.py: -------------------------------------------------------------------------------- 1 | import nltk 2 | from nltk.corpus import wordnet 3 | from nltk.stem import WordNetLemmatizer 4 | 5 | from prompt_optimizer.poptim.base import PromptOptim 6 | 7 | 8 | class LemmatizerOptim(PromptOptim): 9 | """ 10 | LemmatizerOptim is a prompt optimization technique based on lemmatization. 11 | 12 | It inherits from the PromptOptim base class. 13 | 14 | Example: 15 | >>> from prompt_optimizer.poptim import LemmatizerOptim 16 | >>> p_optimizer = LemmatizerOptim() 17 | >>> res = p_optimizer("example prompt...") 18 | >>> optimized_prompt = res.content 19 | """ 20 | 21 | def __init__(self, verbose: bool = False, metrics: list = []): 22 | """ 23 | Initializes the LemmatizerOptim. 24 | 25 | Args: 26 | verbose (bool, optional): Flag indicating whether to enable verbose output. Defaults to False. 27 | metrics (list, optional): A list of metric names to evaluate during optimization. Defaults to an empty list. 28 | """ 29 | super().__init__(verbose, metrics) 30 | self.lemmatizer = WordNetLemmatizer() 31 | nltk.download("averaged_perceptron_tagger") 32 | nltk.download("wordnet") 33 | 34 | def get_wordnet_pos(self, word: str) -> str: 35 | """ 36 | Maps the POS tag from NLTK to WordNet POS tags. 37 | 38 | Args: 39 | word (str): The word to determine the POS tag. 40 | 41 | Returns: 42 | str: The WordNet POS tag. 43 | """ 44 | tag = nltk.pos_tag([word])[0][1][0].upper() 45 | tag_dict = { 46 | "J": wordnet.ADJ, 47 | "N": wordnet.NOUN, 48 | "V": wordnet.VERB, 49 | "R": wordnet.ADV, 50 | } 51 | return tag_dict.get(tag, wordnet.NOUN) 52 | 53 | def optimize(self, prompt: str) -> str: 54 | """ 55 | Runs the lemmatizer prompt optimization technique on the prompt. 56 | 57 | Args: 58 | prompt (str): The prompt text. 59 | 60 | Returns: 61 | str: The optimized prompt text. 62 | """ 63 | words = prompt.split() 64 | lemmatized_words = [ 65 | self.lemmatizer.lemmatize(word, self.get_wordnet_pos(word)) 66 | for word in words 67 | ] 68 | opti_prompt = " ".join(lemmatized_words) 69 | return opti_prompt 70 | -------------------------------------------------------------------------------- /prompt_optimizer/poptim/logger.py: -------------------------------------------------------------------------------- 1 | # TODO: Remove 2 | 3 | import logging 4 | 5 | 6 | def configure_logger(log_file=None): 7 | logger.setLevel(logging.INFO) 8 | 9 | formatter = logging.Formatter( 10 | "%(asctime)s - %(name)s - %(levelname)s - %(message)s" 11 | ) 12 | 13 | stream_handler = logging.StreamHandler() 14 | stream_handler.setFormatter(formatter) 15 | logger.addHandler(stream_handler) 16 | 17 | if log_file: 18 | file_handler = logging.FileHandler(log_file) 19 | file_handler.setFormatter(formatter) 20 | logger.addHandler(file_handler) 21 | 22 | 23 | logger = logging.getLogger(__name__) 24 | configure_logger() 25 | -------------------------------------------------------------------------------- /prompt_optimizer/poptim/name_replace_optim.py: -------------------------------------------------------------------------------- 1 | import nltk 2 | 3 | from prompt_optimizer.poptim.base import PromptOptim 4 | 5 | 6 | class NameReplaceOptim(PromptOptim): 7 | """ 8 | NameReplaceOptim is a prompt optimization technique based on replacing names in the prompt. 9 | Some names have lower token count (1) than others. Higher token count names can be replaced by 10 | such names to reduce token complexity. `self.opti_names` contains the pre-made list of such names 11 | for `tiktokenizer`. The list will need to be modified for other tokenizers. 12 | 13 | It inherits from the PromptOptim base class. 14 | 15 | Example: 16 | >>> from prompt_optimizer.poptim import NameReplaceOptim 17 | >>> p_optimizer = NameReplaceOptim() 18 | >>> res = p_optimizer("example prompt...") 19 | >>> optimized_prompt = res.content 20 | """ 21 | 22 | def __init__(self, verbose: bool = False, metrics: list = []): 23 | """ 24 | Initializes the NameReplaceOptim. 25 | 26 | Args: 27 | verbose (bool, optional): Flag indicating whether to enable verbose output. Defaults to False. 28 | metrics (list, optional): A list of metric names to evaluate during optimization. Defaults to an empty list. 29 | """ 30 | super().__init__(verbose, metrics) 31 | self.opti_names = self.get_opti_names() 32 | 33 | def download(self): 34 | """ 35 | Downloads the required NLTK resources. 36 | """ 37 | nltk.download("punkt") 38 | nltk.download("averaged_perceptron_tagger") 39 | nltk.download("maxent_ne_chunker") 40 | nltk.download("words") 41 | 42 | def process(self, text: str) -> nltk.Tree: 43 | """ 44 | Processes the text using NLTK to identify named entities. 45 | 46 | Args: 47 | text (str): The text to process. 48 | 49 | Returns: 50 | nltk.Tree: The parsed sentence tree containing named entities. 51 | """ 52 | tokens = nltk.tokenize.word_tokenize(text) 53 | pos = nltk.pos_tag(tokens) 54 | sentence_tree = nltk.ne_chunk(pos, binary=False) 55 | return sentence_tree 56 | 57 | def get_opti_names(self) -> list: 58 | """ 59 | Retrieves the list of optimized names. 60 | 61 | Returns: 62 | list: The list of optimized names. 63 | """ 64 | opti_names = """Rene 65 | Asa 66 | Zion 67 | Avery 68 | Gray 69 | Morgan 70 | Story 71 | Arden 72 | Kit 73 | Lux 74 | Sol 75 | Avery 76 | Pat 77 | Sky 78 | Arden 79 | Clair 80 | Storm 81 | Ellery 82 | Arin 83 | Sol 84 | Alpha 85 | Arie 86 | Rio 87 | Isa 88 | Aris 89 | Ara 90 | Adel 91 | Tam 92 | Lin 93 | Aly 94 | Bao 95 | Tru 96 | True 97 | Toy 98 | Adi 99 | Cache 100 | Chi 101 | Han 102 | Amil 103 | Amel 104 | Eri 105 | Truth 106 | Hoa 107 | Indy 108 | Vertis 109 | Chai 110 | Ottie 111 | Ary 112 | Aki 113 | Rei 114 | Bay 115 | Ova 116 | Shell 117 | Rael 118 | Gal 119 | Sher 120 | Elim 121 | Dae 122 | Zell 123 | Wen 124 | Audi""" 125 | opti_names = [name.strip() for name in opti_names.split()] 126 | return opti_names 127 | 128 | def gen_name_map(self, text: str) -> dict: 129 | """ 130 | Generates a mapping of names in the prompt to optimized names. 131 | 132 | Args: 133 | text (str): The prompt text. 134 | 135 | Returns: 136 | dict: The mapping of names to optimized names. 137 | """ 138 | name_list = [] 139 | try: 140 | sentence_tree = self.process(text) 141 | except Exception: 142 | self.download() 143 | sentence_tree = self.process(text) 144 | 145 | for subtree in sentence_tree.subtrees(filter=lambda t: t.label() == "PERSON"): 146 | person = [] 147 | name = "" 148 | 149 | for leaf in subtree.leaves(): 150 | person.append(leaf[0]) 151 | 152 | if len(person) > 1: 153 | for part in person: 154 | name += part + " " 155 | 156 | name = name.strip() 157 | 158 | if name not in name_list: 159 | name_list.append(name) 160 | 161 | mapping = dict(zip(name_list[: len(self.opti_names)], self.opti_names)) 162 | return mapping 163 | 164 | def opti_name_replace(self, text: str, mapping: dict) -> str: 165 | """ 166 | Replaces names in the text with optimized names based on the mapping. 167 | 168 | Args: 169 | text (str): The text to perform name replacement. 170 | mapping (dict): The mapping of names to optimized names. 171 | 172 | Returns: 173 | str: The text with replaced names. 174 | """ 175 | for old_name in mapping: 176 | new_name = mapping[old_name] 177 | text = text.replace(old_name, new_name) 178 | return text 179 | 180 | def optimize(self, prompt: str) -> str: 181 | """ 182 | Runs the prompt optimization technique on the prompt. 183 | 184 | Args: 185 | prompt (str): The prompt text. 186 | 187 | Returns: 188 | str: The optimized prompt text. 189 | """ 190 | mapping = self.gen_name_map(prompt) 191 | opti_prompt = self.opti_name_replace(prompt, mapping) 192 | return opti_prompt 193 | -------------------------------------------------------------------------------- /prompt_optimizer/poptim/pulp_optim.py: -------------------------------------------------------------------------------- 1 | from pulp import LpBinary, LpMinimize, LpProblem, LpVariable, lpSum 2 | 3 | from prompt_optimizer.poptim.base import PromptOptim 4 | 5 | 6 | class PulpOptim(PromptOptim): 7 | """ 8 | PulpOptim is a prompt optimization technique based on integer linear programming using the Pulp library. 9 | 10 | It inherits from the PromptOptim base class. 11 | 12 | Example: 13 | >>> from prompt_optimizer.poptim import PulpOptim 14 | >>> p_optimizer = PulpOptim(p=0.1) 15 | >>> res = p_optimizer("example prompt...") 16 | >>> optimized_prompt = res.content 17 | """ 18 | 19 | def __init__(self, p: float = 0.1, verbose: bool = False, metrics: list = []): 20 | """ 21 | Initializes the PulpOptim. 22 | 23 | Args: 24 | p (float, optional): The aggression factor controlling the reduction in the number of tokens. Defaults to 0.1. Higher `p` corresponds to lower token output count. 25 | verbose (bool, optional): Flag indicating whether to enable verbose output. Defaults to False. 26 | metrics (list, optional): A list of metric names to evaluate during optimization. Defaults to an empty list. 27 | """ 28 | super().__init__(verbose, metrics) 29 | self.aggression = p # will reduce num tokens by aggression*100% 30 | 31 | def optimize(self, prompt: str) -> str: 32 | """ 33 | Runs the prompt optimization technique on the prompt. 34 | 35 | Args: 36 | prompt (str): The prompt text. 37 | 38 | Returns: 39 | str: The optimized prompt text. 40 | """ 41 | tokens = prompt.split() 42 | target_length = int(len(tokens) * (1 - self.aggression)) 43 | 44 | x = LpVariable.dicts("x", range(len(tokens)), cat=LpBinary) 45 | 46 | # Define the objective function to minimize the number of deleted tokens 47 | model = LpProblem("Extractive Compression", LpMinimize) 48 | model += lpSum([1 - x[i] for i in range(len(tokens))]) 49 | 50 | # Constraints to ensure that the compressed text has the target length 51 | model += lpSum([x[i] for i in range(len(tokens))]) == target_length 52 | 53 | # Constraints for compressed text is a subsequence of the original text 54 | for i in range(len(tokens)): 55 | for j in range(i + 1, len(tokens)): 56 | if tokens[i] == tokens[j]: 57 | model += x[i] <= x[j] 58 | 59 | # Solve the optimization problem 60 | model.solve() 61 | 62 | # Extract the indices of the selected tokens 63 | selected_indices = [i for i in range(len(tokens)) if x[i].value() == 1] 64 | 65 | # Generate the compressed text 66 | opti_prompt = " ".join([tokens[i] for i in selected_indices]) 67 | return opti_prompt 68 | -------------------------------------------------------------------------------- /prompt_optimizer/poptim/punctuation_optim.py: -------------------------------------------------------------------------------- 1 | import string 2 | 3 | from prompt_optimizer.poptim.base import PromptOptim 4 | 5 | 6 | class PunctuationOptim(PromptOptim): 7 | """ 8 | PunctuationOptim is a prompt optimization technique that removes punctuation marks from the prompt. 9 | LLMs can infer punctuations themselves in most cases, remove them. 10 | 11 | It inherits from the PromptOptim base class. 12 | 13 | Example: 14 | >>> from prompt_optimizer.poptim import PunctuationOptim 15 | >>> p_optimizer = PunctuationOptim() 16 | >>> res = p_optimizer("example prompt...") 17 | >>> optimized_prompt = res.content 18 | """ 19 | 20 | def __init__(self, verbose: bool = False, metrics: list = [], **kwargs): 21 | """ 22 | Initializes the PunctuationOptim. 23 | 24 | Args: 25 | verbose (bool, optional): Flag indicating whether to enable verbose output. Defaults to False. 26 | metrics (list, optional): A list of metric names to evaluate during optimization. Defaults to an empty list. 27 | """ 28 | super().__init__(verbose, metrics, **kwargs) 29 | 30 | def optimize(self, prompt: str) -> str: 31 | """ 32 | Runs the prompt optimization technique on the prompt. 33 | 34 | Args: 35 | prompt (str): The prompt text. 36 | 37 | Returns: 38 | str: The optimized prompt text with punctuation marks removed. 39 | """ 40 | opti_prompt = prompt.translate(str.maketrans("", "", string.punctuation)) 41 | return opti_prompt 42 | -------------------------------------------------------------------------------- /prompt_optimizer/poptim/sequential.py: -------------------------------------------------------------------------------- 1 | from typing import Any, List 2 | 3 | from prompt_optimizer.poptim.base import PromptOptim 4 | 5 | from .utils import DotDict 6 | 7 | 8 | class Sequential: 9 | """ 10 | Sequential is a class that represents a sequential composition of prompt optimization techniques. 11 | 12 | It applies a series of optimization techniques in sequence to the prompt. 13 | 14 | 15 | 16 | 17 | Example: 18 | >>> optim1 = SomeOptimizationTechnique() 19 | >>> optim2 = AnotherOptimizationTechnique() 20 | >>> seq = Sequential(optim1, optim2) 21 | >>> optimized_prompt = seq(prompt) 22 | 23 | Args: 24 | *optims: Variable-length argument list of prompt optimization techniques. 25 | 26 | Attributes: 27 | optims (list): A list of prompt optimization techniques. 28 | 29 | """ 30 | 31 | def __init__(self, *optims: PromptOptim): 32 | """ 33 | Initializes the Sequential object with the specified prompt optimization techniques. 34 | 35 | Args: 36 | *optims: Variable-length argument list of prompt optimization techniques. 37 | """ 38 | self.optims: List[PromptOptim] = list(optims) 39 | 40 | def __call__(self, x: Any) -> Any: 41 | """ 42 | Applies the sequential composition of prompt optimization techniques to the prompt. 43 | 44 | Args: 45 | x (Any): The input prompt. 46 | 47 | Returns: 48 | Any: The optimized prompt after applying the sequential optimizations. 49 | """ 50 | d = DotDict() 51 | d.content = x 52 | for optim in self.optims: 53 | d = optim(d.content) 54 | return d 55 | -------------------------------------------------------------------------------- /prompt_optimizer/poptim/stemmer_optim.py: -------------------------------------------------------------------------------- 1 | from nltk.stem import PorterStemmer 2 | 3 | from prompt_optimizer.poptim.base import PromptOptim 4 | 5 | 6 | class StemmerOptim(PromptOptim): 7 | """ 8 | StemmerOptim is a prompt optimization technique that applies stemming to the prompt. 9 | 10 | Stemming reduces words to their base or root form, removing suffixes and prefixes. 11 | 12 | Example: 13 | >>> from prompt_optimizer.poptim import StemmerOptim 14 | >>> p_optimizer = StemmerOptim() 15 | >>> res = p_optimizer("example prompt...") 16 | >>> optimized_prompt = res.content 17 | 18 | """ 19 | 20 | def __init__(self, verbose: bool = False, metrics: list = []): 21 | """ 22 | Initializes the StemmerOptim object with the specified parameters. 23 | 24 | Args: 25 | verbose (bool, optional): If True, print verbose information during optimization. Defaults to False. 26 | metrics (list, optional): List of metrics to evaluate the optimization. Defaults to []. 27 | """ 28 | super().__init__(verbose, metrics) 29 | self.stemmer = PorterStemmer() 30 | 31 | def optimize(self, prompt: str) -> str: 32 | """ 33 | Applies stemming to the prompt. 34 | 35 | Args: 36 | prompt (str): The input prompt. 37 | 38 | Returns: 39 | str: The optimized prompt after applying stemming. 40 | """ 41 | words = prompt.split() 42 | stemmed_words = [self.stemmer.stem(word) for word in words] 43 | opti_prompt = " ".join(stemmed_words) 44 | return opti_prompt 45 | -------------------------------------------------------------------------------- /prompt_optimizer/poptim/stop_word_optim.py: -------------------------------------------------------------------------------- 1 | import nltk 2 | 3 | from prompt_optimizer.poptim.base import PromptOptim 4 | 5 | 6 | class StopWordOptim(PromptOptim): 7 | """ 8 | StopWordOptim is a prompt optimization technique that removes stop words from the prompt. 9 | 10 | Stop words are commonly used words (e.g., "the", "is", "in") that are often considered insignificant in natural language processing tasks. 11 | 12 | Example: 13 | >>> from prompt_optimizer.poptim import StopWordOptim 14 | >>> p_optimizer = StopWordOptim() 15 | >>> res = p_optimizer("example prompt...") 16 | >>> optimized_prompt = res.content 17 | 18 | """ 19 | 20 | def __init__(self, verbose: bool = False, metrics: list = []): 21 | """ 22 | Initializes the StopWordOptim object with the specified parameters. 23 | 24 | Args: 25 | verbose (bool, optional): If True, print verbose information during optimization. Defaults to False. 26 | metrics (list, optional): List of metrics to evaluate the optimization. Defaults to []. 27 | """ 28 | super().__init__(verbose, metrics) 29 | try: 30 | self.stop_words = set(nltk.corpus.stopwords.words("english")) 31 | except Exception: 32 | nltk.download("stopwords") 33 | self.stop_words = set(nltk.corpus.stopwords.words("english")) 34 | 35 | def optimize(self, prompt: str) -> str: 36 | """ 37 | Removes stop words from the prompt. 38 | 39 | Args: 40 | prompt (str): The input prompt. 41 | 42 | Returns: 43 | str: The optimized prompt after removing stop words. 44 | """ 45 | words = prompt.split() 46 | filtered_words = [word for word in words if word.lower() not in self.stop_words] 47 | opti_prompt = " ".join(filtered_words) 48 | return opti_prompt 49 | -------------------------------------------------------------------------------- /prompt_optimizer/poptim/synonym_replace_optim.py: -------------------------------------------------------------------------------- 1 | import random 2 | 3 | import nltk 4 | import tiktoken 5 | from nltk.corpus import wordnet 6 | 7 | from prompt_optimizer.poptim.base import PromptOptim 8 | 9 | 10 | class SynonymReplaceOptim(PromptOptim): 11 | """ 12 | SynonymReplaceOptim is a prompt optimization technique that replaces words in the prompt with their synonyms. 13 | 14 | Synonyms are words that have similar meanings to the original word. Sometimes a synonym has lower token count 15 | than the original word. 16 | 17 | Example: 18 | >>> from prompt_optimizer.poptim import SynonymReplaceOptim 19 | >>> p_optimizer = SynonymReplaceOptim() 20 | >>> res = p_optimizer("example prompt...") 21 | >>> optimized_prompt = res.content 22 | """ 23 | 24 | def __init__(self, verbose: bool = False, metrics: list = [], p: float = 0.5): 25 | """ 26 | Initializes the SynonymReplaceOptim object with the specified parameters. 27 | 28 | Args: 29 | verbose (bool, optional): If True, print verbose information during optimization. Defaults to False. 30 | metrics (list, optional): List of metrics to evaluate the optimization. Defaults to []. 31 | p (float, optional): Probability of replacing a word with a synonym. Defaults to 0.5. 32 | """ 33 | super().__init__(verbose, metrics) 34 | self.p = p 35 | nltk.download("wordnet") 36 | self.tokenizer = tiktoken.get_encoding("cl100k_base") 37 | 38 | def get_word_pos(self, word: str) -> str: 39 | """ 40 | Get the part of speech of a word. 41 | 42 | Args: 43 | word (str): The word. 44 | 45 | Returns: 46 | str: The part of speech of the word. 47 | """ 48 | pos = wordnet.synset(word + ".n.01").pos() 49 | if pos.startswith("n"): 50 | return "n" 51 | elif pos.startswith("v"): 52 | return "v" 53 | elif pos.startswith("a"): 54 | return "a" 55 | elif pos.startswith("r"): 56 | return "r" 57 | else: 58 | return None 59 | 60 | def syn_replace(self, word: str) -> str: 61 | """ 62 | Replace a word with its synonym. 63 | 64 | Args: 65 | word (str): The word. 66 | 67 | Returns: 68 | str: The best replacement synonym for the word. 69 | """ 70 | best_replacement = word 71 | best_l = len(self.tokenizer.encode(word)) 72 | if best_l > 1: 73 | for syn in wordnet.synsets(word): 74 | for lemma in syn.lemmas(): 75 | synonym_word = lemma.name() 76 | l_new = len(self.tokenizer.encode(synonym_word)) 77 | if l_new < best_l: 78 | best_replacement = synonym_word 79 | return best_replacement 80 | 81 | def optimize(self, prompt: str) -> str: 82 | """ 83 | Replaces words in the prompt with their synonyms. 84 | 85 | Args: 86 | prompt (str): The input prompt. 87 | 88 | Returns: 89 | str: The optimized prompt with replaced synonyms. 90 | """ 91 | words = prompt.split() 92 | opti_words = [] 93 | for word in words: 94 | new_word = self.syn_replace(word) 95 | if new_word != word and random.uniform(0, 1) <= self.p: 96 | opti_words.append(new_word) 97 | else: 98 | opti_words.append(word) 99 | 100 | return " ".join(opti_words) 101 | -------------------------------------------------------------------------------- /prompt_optimizer/poptim/utils.py: -------------------------------------------------------------------------------- 1 | from typing import Any, Callable, List, Tuple 2 | 3 | 4 | class DotDict(dict): 5 | """ 6 | DotDict is a subclass of the built-in dict class that allows accessing dictionary keys using dot notation. 7 | It provides the ability to get and set attributes as if they were dictionary keys. 8 | 9 | Example: 10 | d = DotDict() 11 | d['key'] = 'value' 12 | print(d.key) # Output: 'value' 13 | """ 14 | 15 | def __getattr__(self, attr: str) -> Any: 16 | """ 17 | Get the value associated with the given attribute. 18 | 19 | Args: 20 | attr (str): The attribute name. 21 | 22 | Returns: 23 | Any: The value associated with the attribute. 24 | 25 | Raises: 26 | AttributeError: If the attribute does not exist in the dictionary. 27 | """ 28 | if attr in self: 29 | return self[attr] 30 | raise AttributeError( 31 | f"'{self.__class__.__name__}' object has no attribute '{attr}'" 32 | ) 33 | 34 | def __setattr__(self, attr: str, value: Any) -> None: 35 | """ 36 | Set the value associated with the given attribute. 37 | 38 | Args: 39 | attr (str): The attribute name. 40 | value (Any): The value to be associated with the attribute. 41 | 42 | Returns: 43 | None 44 | """ 45 | self[attr] = value 46 | 47 | 48 | class ParseError(Exception): 49 | """ 50 | ParseError is a custom exception class raised when a parsing error occurs. 51 | It inherits from the built-in Exception class. 52 | 53 | Attributes: 54 | message (str): The error message describing the parsing error. 55 | prompt (str): The prompt where the parsing error occurred. 56 | """ 57 | 58 | def __init__(self, message: str, prompt: str) -> None: 59 | """ 60 | Initialize a new ParseError instance. 61 | 62 | Args: 63 | message (str): The error message describing the parsing error. 64 | prompt (str): The prompt where the parsing error occurred. 65 | 66 | Returns: 67 | None 68 | """ 69 | super().__init__(message) 70 | self.prompt = prompt 71 | 72 | def __str__(self) -> str: 73 | """ 74 | Return a string representation of the ParseError instance. 75 | 76 | Returns: 77 | str: A formatted string representing the ParseError instance. 78 | Example: "ParseError: in `Prompt`: " 79 | """ 80 | return f"ParseError: {self.args[0]} in `Prompt`: {self.prompt}" 81 | 82 | 83 | def parse_protect_tags(prompt: str, protect_tag: str) -> Tuple[List[str], List[str]]: 84 | """ 85 | Parse the given prompt and extract protected chunks enclosed by protect tags. 86 | 87 | Args: 88 | prompt (str): The prompt string to parse. 89 | protect_tag (str): The protect tag used to enclose the protected chunks. 90 | 91 | Returns: 92 | Tuple[List[str], List[str]]: A tuple containing two lists. 93 | - The first list contains the chunks of the prompt that are not protected. 94 | - The second list contains the protected chunks extracted from the prompt. 95 | 96 | Raises: 97 | ParseError: If there are nested protect tags, an unclosed protect tag, or invalid protect tag sequences. 98 | """ 99 | protect_start_tag = f"<{protect_tag}>" 100 | protect_end_tag = f"" 101 | 102 | chunks = [] 103 | protected_chunks = [] 104 | 105 | stack = [] 106 | start_idx = 0 107 | 108 | for i in range(len(prompt)): 109 | if prompt[i : i + len(protect_start_tag)] == protect_start_tag: 110 | if len(stack) != 0: # nested ignore tags make no sense 111 | raise ParseError("Nested ignore tags not allowed", prompt) 112 | 113 | stack.append(i) 114 | chunks.append(prompt[start_idx:i]) 115 | 116 | elif prompt[i : i + len(protect_end_tag)] == protect_end_tag: 117 | start_idx = i + len(protect_end_tag) 118 | if len(stack) == 0: 119 | raise ParseError( 120 | f"Invalid protect tag sequence. {protect_end_tag} must follow an unclosed {protect_start_tag}", 121 | prompt, 122 | ) 123 | 124 | protect_start_index = stack.pop() 125 | protect_content = prompt[protect_start_index + len(protect_start_tag) : i] 126 | protected_chunks.append(protect_content) 127 | 128 | if protect_content.startswith( 129 | protect_start_tag 130 | ) or protect_content.endswith(protect_end_tag): 131 | raise ParseError("Invalid protect tag sequence.", prompt) 132 | 133 | if len(stack) > 0: 134 | raise ParseError( 135 | f"All {protect_start_tag} must be followed by a corresponding {protect_end_tag}", 136 | prompt, 137 | ) 138 | 139 | chunks.append(prompt[start_idx:]) 140 | assert ( 141 | len(chunks) == len(protected_chunks) + 1 142 | ), f"Invalid tag parsing for string: {prompt}" 143 | 144 | return chunks, protected_chunks 145 | 146 | 147 | def protected_runner(run: Callable) -> Callable: 148 | """ 149 | Decorator function that runs the provided 'run' function in chunks for a given object and prompt. 150 | It extracts protected chunks from the prompt and runs the 'run' function on each non-protected chunk. 151 | 152 | Args: 153 | run (Callable): The function to run on each non-protected chunk. 154 | 155 | Returns: 156 | Callable: A wrapper function that performs the chunked execution of the 'run' function. 157 | 158 | Example: 159 | @protected_runner 160 | def my_run_function(obj, prompt, *args, **kwargs): 161 | # Perform some operations on prompt 162 | return optimized_prompt 163 | 164 | # Usage 165 | optimized_result = my_run_function(my_obj, my_prompt, my_args, my_kwargs) 166 | """ 167 | 168 | def run_in_chunks(obj: object, prompt: str, *args, **kwargs) -> str: 169 | protect_tag = obj.protect_tag 170 | opti_prompt = "" 171 | 172 | if protect_tag is not None: 173 | chunks, protected_chunks = parse_protect_tags(prompt, protect_tag) 174 | protected_chunks.append("") # to make indexing easier 175 | 176 | for i, chunk in enumerate(chunks): 177 | if len(chunk): 178 | opti_chunk = run(obj, chunk, *args, **kwargs) 179 | else: 180 | opti_chunk = "" 181 | opti_prompt += opti_chunk + protected_chunks[i] 182 | 183 | elif len(prompt): 184 | opti_prompt = run(obj, prompt, *args, **kwargs) 185 | 186 | else: 187 | opti_prompt = prompt 188 | 189 | return opti_prompt 190 | 191 | return run_in_chunks 192 | -------------------------------------------------------------------------------- /prompt_optimizer/visualize/__init__.py: -------------------------------------------------------------------------------- 1 | from prompt_optimizer.visualize.stringdiffer import StringDiffer 2 | 3 | __all__ = ["StringDiffer"] 4 | -------------------------------------------------------------------------------- /prompt_optimizer/visualize/stringdiffer.py: -------------------------------------------------------------------------------- 1 | from difflib import ndiff 2 | 3 | 4 | class StringDiffer: 5 | def __init__(self): 6 | """ 7 | Initializes a StringDiffer object with the original and optimized strings. 8 | """ 9 | pass 10 | 11 | def __call__(self, original: str, optimized: str) -> None: 12 | """ 13 | Prints the visualized difference between the original and optimized strings. 14 | Deletions are shown in red, insertions in green, and unchanged parts in default color. 15 | 16 | Args: 17 | original (str): The original string. 18 | optimized (str): The optimized string. 19 | """ 20 | original = str(original) 21 | optimized = str(optimized) 22 | 23 | diff = list(ndiff(original, optimized)) 24 | output = "" 25 | for op, _, value in diff: 26 | if op == "-": 27 | output += f"\033[91m{value}\033[0m" # Red color for deletions 28 | elif op == "+": 29 | output += f"\033[92m{value}\033[0m" # Green color for insertions 30 | else: 31 | output += value 32 | print(output) 33 | -------------------------------------------------------------------------------- /prompt_optimizer/wrapper/__init__.py: -------------------------------------------------------------------------------- 1 | from prompt_optimizer.wrapper.base import Wrapper 2 | from prompt_optimizer.wrapper.openai import OpenAIWrapper 3 | from prompt_optimizer.wrapper.sql_db import SQLDBManager 4 | 5 | __all__ = [ 6 | "OpenAIWrapper", 7 | "SQLDBManager", 8 | "Wrapper", 9 | ] 10 | -------------------------------------------------------------------------------- /prompt_optimizer/wrapper/base.py: -------------------------------------------------------------------------------- 1 | from abc import ABC, abstractmethod 2 | 3 | # import tiktoken 4 | 5 | 6 | class Wrapper(ABC): 7 | """ 8 | Abstract base class for a wrapper. 9 | 10 | Attributes: 11 | db_manager: The database manager object. 12 | poptimizer: The poptimizer object. 13 | tokenizer: The tokenizer object. 14 | """ 15 | 16 | def __init__(self, db_manager, poptimizer): 17 | """ 18 | Initializes a new instance of the Wrapper class. 19 | 20 | Args: 21 | db_manager: The database manager object. 22 | poptimizer: The poptimizer object. 23 | """ 24 | self.db_manager = db_manager 25 | self.poptimizer = poptimizer 26 | # self.tokenizer = tiktoken.get_encoding("cl100k_base") 27 | 28 | # def token_count( 29 | # self, messages: Union[List[Dict[str, str]], str], json: bool = True 30 | # ) -> int: 31 | # """ 32 | # Calculates the total token count for the given messages. 33 | 34 | # Args: 35 | # messages: The list of messages or a single message string. 36 | # json: Indicates whether the messages are in JSON format (default: True). 37 | 38 | # Returns: 39 | # The total token count. 40 | 41 | # Raises: 42 | # TypeError: If messages is not a list or a string. 43 | # """ 44 | # if json is True: 45 | # c = sum([len(self.tokenizer.encode(m["content"])) for m in messages]) 46 | # elif isinstance(messages, list): 47 | # c = sum([len(self.tokenizer.encode(m)) for m in messages]) 48 | # else: 49 | # c = len(self.tokenizer.encode(messages)) 50 | # return c 51 | 52 | @abstractmethod 53 | def wrap(self, *args, **kwargs): 54 | """ 55 | Abstract method for wrapping. 56 | 57 | Args: 58 | *args: Variable length argument list. 59 | **kwargs: Arbitrary keyword arguments. 60 | """ 61 | pass 62 | -------------------------------------------------------------------------------- /prompt_optimizer/wrapper/openai.py: -------------------------------------------------------------------------------- 1 | import json 2 | import time 3 | from typing import Any, Callable, Dict 4 | 5 | import tiktoken 6 | 7 | from prompt_optimizer.wrapper.base import Wrapper 8 | 9 | 10 | class OpenAIWrapper(Wrapper): 11 | """ 12 | Wrapper class for OpenAI API. 13 | 14 | Inherits from the base Wrapper class. 15 | 16 | Attributes: 17 | db_manager: The database manager object. 18 | poptimizer: The poptimizer object. 19 | """ 20 | 21 | def __init__(self, db_manager, poptimizer): 22 | """ 23 | Initializes a new instance of the OpenAIWrapper class. 24 | 25 | Args: 26 | db_manager: The database manager object. 27 | poptimizer: The poptimizer object. 28 | """ 29 | super().__init__(db_manager, poptimizer) 30 | 31 | def num_tokens_from_messages(self, messages, model="gpt-3.5-turbo-0301"): 32 | """ 33 | Source: https://stackoverflow.com/a/76044069 34 | https://github.com/openai/openai-cookbook/blob/main/examples/How_to_count_tokens_with_tiktoken.ipynb 35 | Returns the number of tokens used by a list of messages.""" 36 | try: 37 | encoding = tiktoken.encoding_for_model(model) 38 | except KeyError: 39 | print("Warning: model not found. Using cl100k_base encoding.") 40 | encoding = tiktoken.get_encoding("cl100k_base") 41 | if model == "gpt-3.5-turbo": 42 | print( 43 | "Warning: gpt-3.5-turbo may change over time. Returning num tokens assuming gpt-3.5-turbo-0301." 44 | ) 45 | return self.num_tokens_from_messages(messages, model="gpt-3.5-turbo-0301") 46 | elif model == "gpt-4": 47 | print( 48 | "Warning: gpt-4 may change over time. Returning num tokens assuming gpt-4-0314." 49 | ) 50 | return self.num_tokens_from_messages(messages, model="gpt-4-0314") 51 | elif model == "gpt-3.5-turbo-0301": 52 | tokens_per_message = ( 53 | 4 # every message follows <|start|>{role/name}\n{content}<|end|>\n 54 | ) 55 | tokens_per_name = -1 # if there's a name, the role is omitted 56 | elif model == "gpt-4-0314": 57 | tokens_per_message = 3 58 | tokens_per_name = 1 59 | else: 60 | raise NotImplementedError(f"""not implemented for model {model}""") 61 | num_tokens = 0 62 | 63 | if type(messages) == "list": 64 | for message in messages: 65 | num_tokens += tokens_per_message 66 | for key, value in message.items(): 67 | num_tokens += len(encoding.encode(value)) 68 | if key == "name": 69 | num_tokens += tokens_per_name 70 | num_tokens += 3 # every reply is primed with <|start|>assistant<|message|> 71 | elif type(messages) == "str": 72 | num_tokens += len(encoding.encode(messages)) 73 | return num_tokens 74 | 75 | def wrap(self, openai_func: Callable[..., Any], *args, **kwargs) -> Dict[str, Any]: 76 | """ 77 | Wraps the OpenAI function with additional functionality. 78 | 79 | Args: 80 | openai_func: The OpenAI function to be wrapped. 81 | *args: Variable length argument list. 82 | **kwargs: Arbitrary keyword arguments. 83 | 84 | Returns: 85 | The response from the OpenAI function. 86 | 87 | Raises: 88 | KeyError: If the 'model' or 'messages' key is missing in kwargs. 89 | """ 90 | model = kwargs["model"] 91 | timestamp = int(time.time()) 92 | messages_before = kwargs["messages"] 93 | 94 | if self.poptimizer is not None: 95 | start_time = time.time() 96 | optimized_messages = self.poptimizer.run_json(messages_before) 97 | optimizer_runtime = time.time() - start_time 98 | kwargs["messages"] = optimized_messages 99 | else: 100 | optimizer_runtime = 0 101 | optimized_messages = {} 102 | 103 | prompt_before_token_count = self.num_tokens_from_messages(messages_before) 104 | prompt_after_token_count = self.num_tokens_from_messages(optimized_messages) 105 | 106 | request_start_time = time.time() 107 | try: 108 | response = openai_func(*args, **kwargs) 109 | continuation_token_count = response["usage"]["completion_tokens"] 110 | continuation = json.dumps(response.choices[0]) 111 | error = 0 112 | error_name = "None" 113 | except Exception as e: 114 | error = 1 115 | error_name = type(e).__name__ 116 | continuation = "None" 117 | continuation_token_count = 0 118 | 119 | request_runtime = time.time() - request_start_time 120 | 121 | if self.db_manager: 122 | with self.db_manager: 123 | self.db_manager.add( 124 | [ 125 | timestamp, 126 | self.db_manager.username, 127 | json.dumps(messages_before), 128 | json.dumps(optimized_messages), 129 | continuation, 130 | prompt_before_token_count, 131 | prompt_after_token_count, 132 | continuation_token_count, 133 | model, 134 | error, 135 | error_name, 136 | optimizer_runtime, 137 | request_runtime, 138 | ] 139 | ) 140 | return response 141 | 142 | def __call__(self, *args, **kwargs) -> Dict[str, Any]: 143 | """ 144 | Calls the OpenAIWrapper instance as a function. 145 | 146 | Args: 147 | *args: Variable length argument list. 148 | **kwargs: Arbitrary keyword arguments. 149 | 150 | Returns: 151 | The response from the OpenAI function. 152 | """ 153 | return self.wrap(*args, **kwargs) 154 | -------------------------------------------------------------------------------- /prompt_optimizer/wrapper/sql_db.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sqlite3 3 | from typing import Optional, Tuple 4 | 5 | 6 | class SQLDBManager: 7 | """ 8 | A class to manage an SQLite database. 9 | 10 | Attributes: 11 | database_name: The name of the SQLite database file. 12 | connection: The database connection object. 13 | cursor: The database cursor object. 14 | """ 15 | 16 | def __init__( 17 | self, project_name: str = "default_project", database_path: Optional[str] = None 18 | ): 19 | """ 20 | Initializes a new SQLDBManager object. 21 | 22 | Args: 23 | project_name: The name of the project. 24 | database_path: The path to the SQLite database file. 25 | """ 26 | if database_path is None: 27 | home_dir = os.path.expanduser("~") 28 | database_dir = os.path.join(home_dir, ".prompt_optim") 29 | os.makedirs(database_dir, exist_ok=True) 30 | self.database_path = os.path.join(database_dir, "default.db") 31 | else: 32 | self.database_path = database_path 33 | 34 | self.connection = None 35 | self.cursor = None 36 | self.project_name = project_name 37 | self.table_name = self.project_name 38 | self.username = "default" 39 | 40 | def set_user(self, username): 41 | self.username = username 42 | 43 | def __enter__(self): 44 | """ 45 | Establishes the database connection and cursor when entering the context. 46 | """ 47 | self.connect() 48 | self.create_table() 49 | return self 50 | 51 | def __exit__(self, exc_type, exc_value, traceback): 52 | """ 53 | Closes the database connection and cursor when exiting the context. 54 | """ 55 | self.close() 56 | 57 | def connect(self): 58 | """ 59 | Connects to the SQLite database. 60 | """ 61 | try: 62 | self.connection = sqlite3.connect(self.database_path) 63 | self.cursor = self.connection.cursor() 64 | except sqlite3.Error as e: 65 | print(f"Error connecting to the SQLite database: {e}") 66 | 67 | def create_table(self): 68 | """ 69 | Creates a table in the database if it doesn't exist. 70 | 71 | Args: 72 | table_name: The name of the table. 73 | """ 74 | try: 75 | self.cursor.execute( 76 | f"""CREATE TABLE IF NOT EXISTS {self.table_name} ( 77 | id INTEGER PRIMARY KEY AUTOINCREMENT, 78 | timestamp DATETIME, 79 | username TEXT, 80 | prompt_before TEXT, 81 | prompt_after TEXT, 82 | continuation TEXT, 83 | prompt_before_token_count INTEGER, 84 | prompt_after_token_count INTEGER, 85 | continuation_token_count INTEGER, 86 | model_name TEXT, 87 | error INTEGER, 88 | error_name TEXT, 89 | optimizer_latency FLOAT, 90 | request_latency FLOAT 91 | )""" 92 | ) 93 | 94 | except sqlite3.Error as e: 95 | print(f"Error creating table: {e}") 96 | 97 | def add(self, data: Tuple) -> bool: 98 | """ 99 | Adds data to the specified table. 100 | 101 | Args: 102 | data: A tuple containing the data to be added. 103 | 104 | Returns: 105 | bool: `True` if successfully inserted values else `False`. 106 | """ 107 | try: 108 | self.cursor.execute( 109 | f"""INSERT INTO {self.table_name} ( 110 | timestamp, 111 | username, 112 | prompt_before, 113 | prompt_after, 114 | continuation, 115 | prompt_before_token_count, 116 | prompt_after_token_count, 117 | continuation_token_count, 118 | model_name, 119 | error, 120 | error_name, 121 | optimizer_latency, 122 | request_latency 123 | ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)""", 124 | data, 125 | ) 126 | self.connection.commit() 127 | 128 | except sqlite3.Error as e: 129 | print(f"Error adding data: {e}") 130 | return False 131 | 132 | return True 133 | 134 | def close(self): 135 | """ 136 | Closes the database connection and cursor. 137 | """ 138 | if self.cursor: 139 | self.cursor.close() 140 | if self.connection: 141 | self.connection.close() 142 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [tool.poetry] 2 | name = "prompt-optimizer" 3 | version = "0.2.1" 4 | description = "" 5 | authors = ["Vaibhav Kumar <34630911+TimeTraveller-San@users.noreply.github.com>"] 6 | readme = "README.md" 7 | packages = [{include = "prompt_optimizer"}] 8 | 9 | [tool.poetry.scripts] 10 | prompt-optimizer = "prompt_optimizer.cli.main:main" 11 | 12 | [tool.poetry.dependencies] 13 | python = ">=3.8.1,<4.0" 14 | transformers = "^4.27.4" 15 | torch = "^2.0.0" 16 | pulp = "^2.7.0" 17 | nltk = "^3.8.1" 18 | tiktoken = "^0.3.3" 19 | autocorrect = "^2.6.1" 20 | black = "^23.3.0" 21 | isort = "^5.12.0" 22 | openai = "^0.27.8" 23 | python-dotenv = "^1.0.0" 24 | 25 | [tool.poetry.group.dev.dependencies] 26 | sphinx = "^6.1.3" 27 | sphinx-book-theme = "^1.0.1" 28 | ruff = "^0.0.261" 29 | mypy = "^1.2.0" 30 | pytest = "^7.3.0" 31 | black = "^23.3.0" 32 | langchain = "^0.0.173" 33 | myst-parser = "^1.0.0" 34 | 35 | [tool.poetry.group.docs.dependencies] 36 | autodoc_pydantic = "^1.8.0" 37 | nbsphinx = "^0.8.9" 38 | sphinx-autobuild = "^2021.3.14" 39 | sphinx_rtd_theme = "^1.0.0" 40 | sphinx-typlog-theme = "^0.8.0" 41 | toml = "^0.10.2" 42 | linkchecker = "^10.2.1" 43 | sphinx-copybutton = "^0.5.1" 44 | 45 | [tool.poetry.group.test.dependencies] 46 | pytest = "^7.2.0" 47 | pytest-cov = "^4.0.0" 48 | pytest-dotenv = "^0.5.2" 49 | duckdb-engine = "^0.7.0" 50 | pytest-watcher = "^0.2.6" 51 | freezegun = "^1.2.2" 52 | responses = "^0.22.0" 53 | pytest-asyncio = "^0.20.3" 54 | 55 | [tool.ruff] 56 | select = [ 57 | "E", # pycodestyle 58 | "F", # pyflakes 59 | "I", # isort 60 | ] 61 | line-length = 88 62 | fixable = ["A", "B", "C", "D", "E", "F", "G", "I", "N", "Q", "S", "T", "W", "ANN", "ARG", "BLE", "COM", "DJ", "DTZ", "EM", "ERA", "EXE", "FBT", "ICN", "INP", "ISC", "NPY", "PD", "PGH", "PIE", "PL", "PT", "PTH", "PYI", "RET", "RSE", "RUF", "SIM", "SLF", "TCH", "TID", "TRY", "UP", "YTT"] 63 | ignore = ["E501", "F403"] 64 | 65 | 66 | [tool.mypy] 67 | ignore_missing_imports = "True" 68 | disallow_untyped_defs = "True" 69 | exclude = ["notebooks"] 70 | 71 | [build-system] 72 | requires = ["poetry-core"] 73 | build-backend = "poetry.core.masonry.api" -------------------------------------------------------------------------------- /tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vaibkumr/prompt-optimizer/e57a6283cda1491cd8dd00fd9e29b10735fc5806/tests/__init__.py -------------------------------------------------------------------------------- /tests/unit_tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vaibkumr/prompt-optimizer/e57a6283cda1491cd8dd00fd9e29b10735fc5806/tests/unit_tests/__init__.py -------------------------------------------------------------------------------- /tests/unit_tests/data/prompt1.txt: -------------------------------------------------------------------------------- 1 | Instructions: You will be presented with an audit question and four options A through D. Your task is to select the option that is closest to the correctanswer. For each question you must select one of the following answers: 'A', 'B', 'C', 'D'. Do not explain. Do not include the number in your response. 2 | Which of the following is typically not included in the inquiry letter sent an unfavorable outcome. B. A disclaimer regarding the likelihood of settlement of pendinglitigation. C. An estimate of the range of potential loss. D. A listing of pending or threatened litigation, claims, or assessments. For each question you must select one of the following answers: 'A', 'B', 'C', 'D'. 3 | 4 | -------------------------------------------------------------------------------- /tests/unit_tests/data/prompt2.txt: -------------------------------------------------------------------------------- 1 | Some economists have responded positively to Bitcoin, including 2 | Francois R. Velde, senior economist of the Federal Reserve in Chicago 3 | who described it as "an elegant solution to the problem of creating a 4 | digital currency." In November 2013 Richard Branson announced that 5 | Virgin Galactic would accept Bitcoin as payment, saying that he had invested 6 | in Bitcoin and found it "fascinating how a whole new global currency 7 | has been created", encouraging others to also invest in Bitcoin. 8 | Other economists commenting on Bitcoin have been critical. 9 | Economist Paul Krugman has suggested that the structure of the currency 10 | incentivizes hoarding and that its value derives from the expectation that 11 | others will accept it as payment. Economist Larry Summers has expressed 12 | a "wait and see" attitude when it comes to Bitcoin. Nick Colas, a market 13 | strategist for ConvergEx Group, has remarked on the effect of increasing 14 | use of Bitcoin and its restricted supply, noting, "When incremental 15 | adoption meets relatively fixed supply, it should be no surprise that 16 | prices go up. And that’s exactly what is happening to BTC prices. Francois also said" -------------------------------------------------------------------------------- /tests/unit_tests/test_autocorrect_optim.py: -------------------------------------------------------------------------------- 1 | from tests.unit_tests import utils 2 | from prompt_optimizer.metric import BERTScoreMetric, TokenMetric 3 | from prompt_optimizer.poptim import AutocorrectOptim 4 | 5 | 6 | def test_autocorrect_optim(): 7 | prompt = utils.load_prompt("prompt1.txt") 8 | p_optimizer = AutocorrectOptim( 9 | verbose=True, metrics=[TokenMetric(), BERTScoreMetric()] 10 | ) 11 | optimized_prompt = p_optimizer(prompt) 12 | assert len(optimized_prompt) > 0, "Failed!" 13 | -------------------------------------------------------------------------------- /tests/unit_tests/test_entropy_optim.py: -------------------------------------------------------------------------------- 1 | from tests.unit_tests import utils 2 | from prompt_optimizer.metric import TokenMetric 3 | from prompt_optimizer.poptim import EntropyOptim 4 | 5 | 6 | def test_entropy_optim(): 7 | prompt = utils.load_prompt("prompt1.txt") 8 | p_optimizer = EntropyOptim(verbose=True, p=0.1, metrics=[TokenMetric()]) 9 | optimized_prompt = p_optimizer(prompt) 10 | assert len(optimized_prompt) > 0, "Failed!" 11 | -------------------------------------------------------------------------------- /tests/unit_tests/test_lemmatizer_optim.py: -------------------------------------------------------------------------------- 1 | from tests.unit_tests import utils 2 | from prompt_optimizer.metric import TokenMetric 3 | from prompt_optimizer.poptim import LemmatizerOptim 4 | 5 | 6 | def test_autocorrect_optim(): 7 | prompt = utils.load_prompt("prompt1.txt") 8 | p_optimizer = LemmatizerOptim(verbose=True, metrics=[TokenMetric()]) 9 | optimized_prompt = p_optimizer(prompt) 10 | assert len(optimized_prompt) > 0, "Failed!" 11 | -------------------------------------------------------------------------------- /tests/unit_tests/test_name_replace_optim.py: -------------------------------------------------------------------------------- 1 | from tests.unit_tests import utils 2 | from prompt_optimizer.metric import TokenMetric 3 | from prompt_optimizer.poptim import NameReplaceOptim 4 | 5 | 6 | def test_name_replace_optim(): 7 | prompt = utils.load_prompt("prompt2.txt") 8 | p_optimizer = NameReplaceOptim(verbose=True, metrics=[TokenMetric()]) 9 | optimized_prompt = p_optimizer(prompt) 10 | assert len(optimized_prompt) > 0, "Failed!" 11 | -------------------------------------------------------------------------------- /tests/unit_tests/test_openai_wrapper.py: -------------------------------------------------------------------------------- 1 | import os 2 | import openai 3 | from prompt_optimizer.poptim import StopWordOptim 4 | from prompt_optimizer.wrapper.sql_db import SQLDBManager 5 | from prompt_optimizer.wrapper.openai import OpenAIWrapper 6 | from dotenv import load_dotenv 7 | 8 | load_dotenv() 9 | openai.api_key = os.getenv("OPENAI_API_KEY") 10 | 11 | 12 | def test_openai_wrapper(): 13 | # p_optimizer = StopWordOptim(verbose=True) 14 | # sql_db = SQLDBManager() 15 | # oai_wrapper = OpenAIWrapper(sql_db, p_optimizer) 16 | # response = oai_wrapper( 17 | # openai.ChatCompletion.create, 18 | # model="gpt-3.5-turbo", 19 | # messages=[ 20 | # {"role": "system", "content": "You are a helpful assistant."}, 21 | # {"role": "user", "content": "Who won the world series in 2020?"}, 22 | # {"role": "assistant", "content": "The Los Angeles Dodgers won the World Series in 2020."}, 23 | # {"role": "user", "content": "Where was it played?"} 24 | # ] 25 | # ) 26 | # print(f"response: {response}") 27 | response = True 28 | 29 | assert response is not None, "Failed!" 30 | 31 | 32 | test_openai_wrapper() -------------------------------------------------------------------------------- /tests/unit_tests/test_protected_tags.py: -------------------------------------------------------------------------------- 1 | from tests.unit_tests import utils 2 | from prompt_optimizer.poptim import PunctuationOptim 3 | 4 | 5 | def test_punctuation_optim(): 6 | prompt = "Yharnam is a fictional city that is the primary setting of Bloodborne , a 2015 video game developed by FromSoftware." 7 | p_optimizer = PunctuationOptim(protect_tag="pt", verbose=True) 8 | optimized_prompt = p_optimizer(prompt) 9 | print(prompt) 10 | print(optimized_prompt) 11 | assert "," in optimized_prompt.content, "protect tags not working" 12 | -------------------------------------------------------------------------------- /tests/unit_tests/test_punctuation_optim.py: -------------------------------------------------------------------------------- 1 | from tests.unit_tests import utils 2 | from prompt_optimizer.metric import TokenMetric 3 | from prompt_optimizer.poptim import PunctuationOptim 4 | 5 | 6 | def test_punctuation_optim(): 7 | prompt = utils.load_prompt("prompt1.txt") 8 | p_optimizer = PunctuationOptim(verbose=True, metrics=[TokenMetric()]) 9 | optimized_prompt = p_optimizer(prompt) 10 | assert len(optimized_prompt) > 0, "Failed!" 11 | -------------------------------------------------------------------------------- /tests/unit_tests/test_sequential.py: -------------------------------------------------------------------------------- 1 | from tests.unit_tests import utils 2 | from prompt_optimizer.poptim import ( 3 | AutocorrectOptim, 4 | LemmatizerOptim, 5 | PunctuationOptim, 6 | Sequential, 7 | ) 8 | 9 | 10 | def test_sequential(): 11 | prompt = utils.load_prompt("prompt1.txt") 12 | 13 | p_optimizer = Sequential( 14 | LemmatizerOptim(verbose=True), 15 | PunctuationOptim(verbose=True), 16 | AutocorrectOptim(verbose=True), 17 | ) 18 | optimized_prompt = p_optimizer(prompt) 19 | assert len(optimized_prompt) > 0, "Failed!" 20 | -------------------------------------------------------------------------------- /tests/unit_tests/test_sql_db.py: -------------------------------------------------------------------------------- 1 | from datetime import datetime 2 | from prompt_optimizer.wrapper import SQLDBManager 3 | 4 | def test_db(): 5 | db_manager = SQLDBManager("temp", "temp.db") 6 | with db_manager: 7 | data = [ 8 | datetime.now(), 9 | "test_user", 10 | "prompt before", 11 | "prompt after", 12 | "continuation", 13 | 2, 14 | 2, 15 | 1, 16 | "text-davinci-003", 17 | 0, 18 | "None", 19 | 0.12, 20 | 0.11 21 | ] 22 | success = db_manager.add(data) 23 | assert success, "failed" 24 | -------------------------------------------------------------------------------- /tests/unit_tests/test_stop_word_optim.py: -------------------------------------------------------------------------------- 1 | from tests.unit_tests import utils 2 | from prompt_optimizer.metric import TokenMetric 3 | from prompt_optimizer.poptim import StopWordOptim 4 | 5 | 6 | def test_stop_word_optim(): 7 | prompt = utils.load_prompt("prompt1.txt") 8 | p_optimizer = StopWordOptim(verbose=True, metrics=[TokenMetric()]) 9 | optimized_prompt = p_optimizer(prompt) 10 | assert len(optimized_prompt) > 0, "Failed!" 11 | -------------------------------------------------------------------------------- /tests/unit_tests/test_synonym_replace_optim.py: -------------------------------------------------------------------------------- 1 | from tests.unit_tests import utils 2 | from prompt_optimizer.metric import TokenMetric 3 | from prompt_optimizer.poptim import SynonymReplaceOptim 4 | 5 | 6 | def test_synonym_replace_optim(): 7 | prompt = utils.load_prompt("prompt1.txt") 8 | p_optimizer = SynonymReplaceOptim(verbose=True, p=1.0, metrics=[TokenMetric()]) 9 | optimized_prompt = p_optimizer(prompt) 10 | assert len(optimized_prompt) > 0, "Failed!" 11 | -------------------------------------------------------------------------------- /tests/unit_tests/utils.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | 4 | def load_prompt(prompt_f): 5 | file_path = os.path.abspath( 6 | os.path.join(os.path.dirname(__file__), "data", prompt_f) 7 | ) 8 | with open(file_path, "r") as f: 9 | data = f.read() 10 | return data 11 | --------------------------------------------------------------------------------