├── .env.example
├── .flake8
├── .github
├── CONTRIBUTING.md
└── workflows
│ ├── linkcheck.yml
│ ├── lint.yml
│ ├── release.yml
│ └── test.yml
├── .gitignore
├── .readthedocs.yaml
├── LICENSE
├── Makefile
├── README.md
├── docs
├── Makefile
├── _static
│ ├── logo_small.png
│ └── tradeoff.png
├── conf.py
├── evaluations
│ ├── extending_evals.md
│ └── openai_evals.md
├── extend
│ ├── custom_metrics.md
│ └── custom_optims.md
├── getting_started
│ ├── cli.md
│ ├── getting_started.md
│ ├── installation.md
│ └── metrics.md
├── index.rst
├── make.bat
├── reference.rst
├── requirements.txt
└── theory
│ └── cost_performance_tradeoff.md
├── evaluations
├── README.md
├── artifacts
│ ├── % Tokens Reduced_graph.png
│ ├── LogiQA Accuracy_graph.png
│ ├── USD Saved Per $100_graph.png
│ ├── kevin.gif
│ ├── logo.png
│ ├── logo_small.png
│ ├── table.md
│ └── tradeoff.png
├── compute_metric.py
├── eval.py
├── logs
│ ├── Autocorrect_Optim.jsonl
│ ├── Default.jsonl
│ ├── Entropy_Optim_p_0.05.jsonl
│ ├── Entropy_Optim_p_0.1.jsonl
│ ├── Entropy_Optim_p_0.25.jsonl
│ ├── Entropy_Optim_p_0.5.jsonl
│ ├── Lemmatizer_Optim.jsonl
│ ├── NameReplace_Optim.jsonl
│ ├── Pulp_Optim_p_0.05.jsonl
│ ├── Pulp_Optim_p_0.1.jsonl
│ ├── Punctuation_Optim.jsonl
│ ├── Stemmer_Optim.jsonl
│ └── SynonymReplace_Optim_p_1.0.jsonl
├── make_artifacts.py
├── make_errors.py
├── results.csv
├── results
│ ├── Autocorrect_Optim.jsonl
│ ├── Default.jsonl
│ ├── Entropy_Optim_p_0.05.jsonl
│ ├── Entropy_Optim_p_0.1.jsonl
│ ├── Entropy_Optim_p_0.25.jsonl
│ ├── Entropy_Optim_p_0.5.jsonl
│ ├── Lemmatizer_Optim.jsonl
│ ├── NameReplace_Optim.jsonl
│ ├── Pulp_Optim_p_0.05.jsonl
│ ├── Pulp_Optim_p_0.1.jsonl
│ ├── Punctuation_Optim.jsonl
│ ├── Stemmer_Optim.jsonl
│ └── SynonymReplace_Optim_p_1.0.jsonl
├── sample_logs
│ └── generate_db.py
├── tradeoff.py
└── utils.py
├── examples
├── bertscore_metric.py
├── cli
│ ├── data
│ │ └── example.jsonl
│ ├── json_stopwordoptim.sh
│ └── string_stopwordoptim.sh
├── entropy_optimizer.py
├── json_support.py
├── langchain_support.py
├── protect_tags.py
└── sequential.py
├── poetry.lock
├── prompt_optimizer
├── __init__.py
├── cli
│ ├── __init__.py
│ └── main.py
├── metric
│ ├── __init__.py
│ ├── base.py
│ ├── bertscore_metric.py
│ └── token_metric.py
├── poptim
│ ├── __init__.py
│ ├── autocorrect_optim.py
│ ├── base.py
│ ├── entropy_optim.py
│ ├── lemmatizer_optim.py
│ ├── logger.py
│ ├── name_replace_optim.py
│ ├── pulp_optim.py
│ ├── punctuation_optim.py
│ ├── sequential.py
│ ├── stemmer_optim.py
│ ├── stop_word_optim.py
│ ├── synonym_replace_optim.py
│ └── utils.py
├── visualize
│ ├── __init__.py
│ └── stringdiffer.py
└── wrapper
│ ├── __init__.py
│ ├── base.py
│ ├── openai.py
│ └── sql_db.py
├── pyproject.toml
└── tests
├── __init__.py
└── unit_tests
├── __init__.py
├── data
├── prompt1.txt
└── prompt2.txt
├── test_autocorrect_optim.py
├── test_entropy_optim.py
├── test_lemmatizer_optim.py
├── test_name_replace_optim.py
├── test_openai_wrapper.py
├── test_protected_tags.py
├── test_punctuation_optim.py
├── test_sequential.py
├── test_sql_db.py
├── test_stop_word_optim.py
├── test_synonym_replace_optim.py
└── utils.py
/.env.example:
--------------------------------------------------------------------------------
1 | OPENAI_API_KEY=
--------------------------------------------------------------------------------
/.flake8:
--------------------------------------------------------------------------------
1 | [flake8]
2 | count = True
3 | statistics = True
4 | max-line-length = 88
5 | ignore = E731,W503,E203,E741
--------------------------------------------------------------------------------
/.github/CONTRIBUTING.md:
--------------------------------------------------------------------------------
1 | # Contributing to PromptOptimizer
2 |
3 | Thank you for considering contributing to PromptOptimizer.
4 | To contribute to this project, please follow a ["fork and pull request"](https://docs.github.com/en/get-started/quickstart/contributing-to-projects) workflow.
5 |
6 | ## Contributing Guidelines
7 |
8 | ### GitHub Issues Format
9 | TBD
10 |
11 |
12 | ## Quick Start
13 |
14 | This project uses [Poetry](https://python-poetry.org/) as a dependency manager. Check out Poetry's [documentation on how to install it](https://python-poetry.org/docs/#installation) on your system before proceeding.
15 | To install requirements:
16 |
17 | ```bash
18 | poetry install -E all
19 | ```
20 |
21 | This will install all requirements for running the package, examples, linting, formatting, tests, and coverage. Note the `-E all` flag will install all optional dependencies necessary for integration testing.
22 |
23 | ❗Note: If you're running Poetry 1.4.1 and receive a `WheelFileValidationError` for `debugpy` during installation, you can try either downgrading to Poetry 1.4.0 or disabling "modern installation" (`poetry config installer.modern-installation false`) and re-install requirements. See [this `debugpy` issue](https://github.com/microsoft/debugpy/issues/1246) for more details.
24 |
25 | Now, you should be able to run the common tasks in the following section.
26 |
27 | ## Common Tasks
28 |
29 | Type `make` for a list of common tasks.
30 |
31 | ### Code Formatting
32 |
33 | Formatting for this project is done via a combination of [Black](https://black.readthedocs.io/en/stable/) and [isort](https://pycqa.github.io/isort/).
34 |
35 | To run formatting for this project:
36 |
37 | ```bash
38 | make format
39 | ```
40 |
41 | ### Linting
42 |
43 | Linting for this project is done via a combination of [Black](https://black.readthedocs.io/en/stable/).
44 |
45 | To run linting for this project:
46 |
47 | ```bash
48 | make lint
49 | ```
50 |
51 | ### Testing
52 |
53 | Unit tests cover modular logic that does not require calls to outside APIs.
54 |
55 | To run unit tests:
56 |
57 | ```bash
58 | make test
59 | ```
60 |
61 | If you add new logic, please add a unit test.
62 |
63 | ## Documentation
64 |
65 | ### Contribute Documentation
66 |
67 | Docs are largely autogenerated by [sphinx](https://www.sphinx-doc.org/en/master/) from the code.
68 |
69 | For that reason, we ask that you add good documentation to all classes and methods.
70 |
71 |
72 | ### Build Documentation Locally
73 |
74 | Before building the documentation, it is always a good idea to clean the build directory:
75 |
76 | ```bash
77 | make docs_clean
78 | ```
79 |
80 | Next, you can run the linkchecker to make sure all links are valid:
81 |
82 | ```bash
83 | make docs_linkcheck
84 | ```
85 |
86 | Finally, you can build the documentation as outlined below:
87 |
88 | ```bash
89 | make docs_build
90 | ```
91 |
92 | # Extension contribution
93 | Apart from improving, fixing and optimizing the code. There are three extensions possible for contributions:
94 |
95 | ## More Evaluations
96 | There is no one prompt optimizer that works for all tasks. Extending evaluations by introducing more tasks will help choosing the right optimizer for the right task.
97 | [Evaluations](https://github.com/vaibkumr/prompt-optimizer/tree/master/evaluations) directory of our project can be used as a motivation to design evaluations and run batch evaluation experiments for various optimizers. This is different from metrics which are used to measure optimization quality on-the-go to decide if the optimized prompt should be used or not. Evaluations run over a set of LLM taks with ideal responses to evaluate the quality of optimizations. In simple words, we have the input and label (ideal response from LLMs) for these evaluations while for metrics, we only have the input (before and after optimizations).
98 |
99 |
100 | ## More Optimizers
101 | Certainly more and better optimizers are possible. Please see [custom optimizers](.) for a guide on how to create custom new optimizers using this library.
102 |
103 |
104 | ## More Metrics
105 | Better metrics are possible to evaluate prompt optimization qualities. Please see [custom metrics](.) for a guide on how to create custom new metrics using this library.
106 |
--------------------------------------------------------------------------------
/.github/workflows/linkcheck.yml:
--------------------------------------------------------------------------------
1 | name: linkcheck
2 |
3 | on:
4 | push:
5 | branches: [master]
6 | pull_request:
7 |
8 | env:
9 | POETRY_VERSION: "1.3.1"
10 |
11 | jobs:
12 | build:
13 | runs-on: ubuntu-latest
14 | strategy:
15 | matrix:
16 | python-version:
17 | - "3.11"
18 | steps:
19 | - uses: actions/checkout@v3
20 | - name: Install poetry
21 | run: |
22 | pipx install poetry==$POETRY_VERSION
23 | - name: Set up Python ${{ matrix.python-version }}
24 | uses: actions/setup-python@v4
25 | with:
26 | python-version: ${{ matrix.python-version }}
27 | cache: poetry
28 | - name: Install dependencies
29 | run: |
30 | poetry install --with docs
31 | - name: Build the docs
32 | run: |
33 | make docs_build
34 | - name: Analyzing the docs with linkcheck
35 | run: |
36 | make docs_linkcheck
--------------------------------------------------------------------------------
/.github/workflows/lint.yml:
--------------------------------------------------------------------------------
1 | name: lint
2 |
3 | on:
4 | push:
5 | branches: [master]
6 | pull_request:
7 |
8 | env:
9 | POETRY_VERSION: "1.3.1"
10 |
11 | jobs:
12 | build:
13 | runs-on: ubuntu-latest
14 | strategy:
15 | matrix:
16 | python-version:
17 | - "3.8"
18 | - "3.9"
19 | - "3.10"
20 | - "3.11"
21 | steps:
22 | - uses: actions/checkout@v3
23 | - name: Install poetry
24 | run: |
25 | pipx install poetry==$POETRY_VERSION
26 | - name: Set up Python ${{ matrix.python-version }}
27 | uses: actions/setup-python@v4
28 | with:
29 | python-version: ${{ matrix.python-version }}
30 | cache: poetry
31 | - name: Install dependencies
32 | run: |
33 | poetry install
34 | - name: Analysing the code with our lint
35 | run: |
36 | make lint
--------------------------------------------------------------------------------
/.github/workflows/release.yml:
--------------------------------------------------------------------------------
1 | name: release
2 |
3 | on:
4 | pull_request:
5 | types:
6 | - closed
7 | branches:
8 | - master
9 | paths:
10 | - 'pyproject.toml'
11 |
12 | env:
13 | POETRY_VERSION: "1.4.2"
14 |
15 | jobs:
16 | pypi:
17 | runs-on: ubuntu-latest
18 | steps:
19 | - uses: actions/checkout@v3
20 | - name: Install poetry
21 | run: pipx install poetry==$POETRY_VERSION
22 | - name: Set up Python 3.10
23 | uses: actions/setup-python@v4
24 | with:
25 | python-version: "3.10"
26 | cache: "poetry"
27 | - name: Build project for distribution
28 | run: poetry build
29 | - name: Check Version
30 | id: check-version
31 | run: |
32 | echo version=$(poetry version --short) >> $GITHUB_OUTPUT
33 | - name: Create Release
34 | uses: ncipollo/release-action@v1
35 | with:
36 | artifacts: "dist/*"
37 | token: ${{ secrets.GITHUB_TOKEN }}
38 | draft: false
39 | generateReleaseNotes: true
40 | tag: v${{ steps.check-version.outputs.version }}
41 | commit: master
42 | - name: Publish to PyPI
43 | env:
44 | POETRY_PYPI_TOKEN_PYPI: ${{ secrets.PYPI_API_TOKEN }}
45 | run: |
46 | poetry publish
--------------------------------------------------------------------------------
/.github/workflows/test.yml:
--------------------------------------------------------------------------------
1 | name: test
2 |
3 | on:
4 | push:
5 | branches: [master]
6 | pull_request:
7 |
8 | env:
9 | POETRY_VERSION: "1.3.1"
10 |
11 | jobs:
12 | build:
13 | runs-on: ubuntu-latest
14 | strategy:
15 | matrix:
16 | python-version:
17 | - "3.8"
18 | - "3.9"
19 | - "3.10"
20 | - "3.11"
21 | steps:
22 | - uses: actions/checkout@v3
23 | - name: Install poetry
24 | run: pipx install poetry==$POETRY_VERSION
25 | - name: Set up Python ${{ matrix.python-version }}
26 | uses: actions/setup-python@v4
27 | with:
28 | python-version: ${{ matrix.python-version }}
29 | cache: "poetry"
30 | - name: Install dependencies
31 | run: poetry install
32 | - name: Run unit tests
33 | run: |
34 | make test
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | notes.md
2 | __pycache__
3 | *.pyc
4 | dist
5 | docs/_build
6 | *_cache
7 | temp/
8 | .env
--------------------------------------------------------------------------------
/.readthedocs.yaml:
--------------------------------------------------------------------------------
1 | # Read the Docs configuration file
2 | # See https://docs.readthedocs.io/en/stable/config-file/v2.html for details
3 |
4 | # Required
5 | version: 2
6 |
7 | # Set the version of Python and other tools you might need
8 | build:
9 | os: ubuntu-22.04
10 | tools:
11 | python: "3.11"
12 |
13 | # Build documentation in the docs/ directory with Sphinx
14 | sphinx:
15 | configuration: docs/conf.py
16 |
17 | # If using Sphinx, optionally build your docs in additional formats such as PDF
18 | # formats:
19 | # - pdf
20 |
21 | # Optionally declare the Python requirements required to build your docs
22 | python:
23 | install:
24 | - requirements: docs/requirements.txt
25 | - method: pip
26 | path: .
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | The MIT License
2 |
3 | Copyright (c) Harrison Chase
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in
13 | all copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21 | THE SOFTWARE.
--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
1 | docs_build:
2 | cd docs && poetry run make html
3 |
4 | docs_clean:
5 | cd docs && poetry run make clean
6 |
7 |
8 | docs_linkcheck:
9 | poetry run linkchecker docs/_build/html/index.html
10 |
11 | lint lint_diff:
12 | poetry run isort prompt_optimizer/
13 | poetry run black prompt_optimizer/
14 | poetry run ruff prompt_optimizer/ --fix
15 |
16 | test:
17 | poetry run pytest tests/unit_tests
18 |
19 | help:
20 | @echo '----'
21 | @echo 'docs_build - build the sphinx documentation'
22 | @echo 'docs_clean - clean the documentation build artifacts'
23 | @echo 'test - run unit tests'
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 |
2 |
3 | ## PromptOptimizer
4 |
5 |

6 |
7 | Minimize LLM token complexity to save API costs and model computations.
8 |
9 |
10 |
11 |
12 | [](https://github.com/vaibkumr/prompt-optimizer/actions/workflows/lint.yml)
13 | [](https://github.com/vaibkumr/prompt-optimizer/actions/workflows/test.yml)
14 | [](https://github.com/vaibkumr/prompt-optimizer/actions/workflows/linkcheck.yml)
15 | [](https://opensource.org/licenses/MIT)
16 |
17 | [Docs](https://promptoptimizer.readthedocs.io/en/latest/)
18 |
19 |
20 |
21 |
22 | # Features
23 | - **Plug and Play Optimizers:** Minimize token complexity using optimization methods without any access to weights, logits or decoding algorithm. Directly applicable to virtually all NLU systems.
24 | - **Protected Tags:** Special protected tags to mark important sections of prompt that should not be removed/modified.
25 | - **Sequential Optimization:** Chain different optimizers together sequentially.
26 | - **Optimization Metrics:** Number of tokens reduced and semantic similarity before and after optimization.
27 | - **Langhcain and JSON Support:** Supports langchain style prompt chains and OpenAI request JSON Object.
28 |
29 | # Why?
30 | - **Minimize Token Complexity:** Token Complexity is the amount of prompt tokens required to achieve a given task. Reducing token complexity corresponds to linearly reducing API costs and quadratically reducing computational complexity of usual transformer models.
31 | - **Save Money:** For large businesses, saving 10% on token count can lead to saving 100k USD per 1M USD.
32 | - **Extend Limitations:** Some models have small context lengths, prompt optimizers can help them process larger than context documents.
33 |
34 | | Prompt | # Tokens | Correct Response? |
35 | | ------------------------------------------------------- | ---------- | ------------------- |
36 | | Who is the president of the United States of America? | 11 | ✅ |
37 | | Who president US | 3 (-72%) | ✅ |
38 |
39 | # Installation
40 | ### Quick Installation
41 | ```pip install prompt-optimizer```
42 |
43 | ### Install from source
44 | ```bash
45 | git clone https://github.com/vaibkumr/prompt-optimizer.git;
46 | cd prompt-optimizer;
47 | pip install -e .
48 | ```
49 |
50 | # Disclaimer
51 | There is a compression vs performance tradeoff -- the increase in compression comes at the cost of loss in model performance. The tradeoff can be greatly mitigated by chosing the right optimize for a given task. There is no single optimizer for all cases. There is no Adam here.
52 |
53 |
54 | # Getting started
55 |
56 | ```python
57 |
58 | from prompt_optimizer.poptim import EntropyOptim
59 |
60 | prompt = """The Belle Tout Lighthouse is a decommissioned lighthouse and British landmark located at Beachy Head, East Sussex, close to the town of Eastbourne."""
61 | p_optimizer = EntropyOptim(verbose=True, p=0.1)
62 | optimized_prompt = p_optimizer(prompt)
63 | print(optimized_prompt)
64 |
65 | ```
66 | # Evaluations
67 | Following are the results for [logiqa](https://github.com/openai/evals/blob/main/evals/registry/evals/logiqa.yaml) OpenAI evals task. It is only performed for a subset of first 100 samples. Please note the optimizer performance over this task should not be generalized to other tasks, more thorough testing and domain knowledge is needed to choose the optimal optimizer.
68 |
69 | | Name | % Tokens Reduced | LogiQA Accuracy | USD Saved Per $100 |
70 | | --- | --- | --- | --- |
71 | | Default | 0.0 | 0.32 | 0.0 |
72 | | Entropy_Optim_p_0.05 | 0.06 | 0.3 | 6.35 |
73 | | Entropy_Optim_p_0.1 | 0.11 | 0.28 | 11.19 |
74 | | Entropy_Optim_p_0.25 | 0.26 | 0.22 | 26.47 |
75 | | Entropy_Optim_p_0.5 | 0.5 | 0.08 | 49.65 |
76 | | SynonymReplace_Optim_p_1.0 | 0.01 | 0.33 | 1.06 |
77 | | Lemmatizer_Optim | 0.01 | 0.33 | 1.01 |
78 | | NameReplace_Optim | 0.01 | 0.34 | 1.13 |
79 | | Punctuation_Optim | 0.13 | 0.35 | 12.81 |
80 | | Autocorrect_Optim | 0.01 | 0.3 | 1.14 |
81 | | Pulp_Optim_p_0.05 | 0.05 | 0.31 | 5.49 |
82 | | Pulp_Optim_p_0.1 | 0.1 | 0.25 | 9.52 |
83 |
84 | # Cost-Performance Tradeoff
85 | The reduction in cost often comes with a loss in LLM performance. Almost every optimizer have hyperparameters that control this tradeoff.
86 |
87 | For example, in `EntropyOptim` the hyperparamter `p`, a floating point number between 0 and 1 controls the ratio of tokens to remove. `p=1.0` corresponds to removing all tokens while `p=0.0` corresponds to removing none.
88 |
89 | The following chart shows the trade-off for different values of `p` as evaluated on the OpenAI evals [logiqa](https://github.com/openai/evals/blob/main/evals/registry/evals/logiqa.yaml) task for a subset of first 100 samples.
90 |
91 |
92 |

93 |
94 |
95 | # Contributing
96 | There are several directions to contribute to. Please see [CONTRIBUTING.md](.github/CONTRIBUTING.md) for contribution guidelines and possible future directions.
97 |
98 | # Social
99 | Contact us on twitter [Vaibhav Kumar](https://twitter.com/vaibhavk1o1) and [Vaibhav Kumar](https://twitter.com/vaibhavk97).
100 |
101 | # Inspiration
102 |
103 |

104 |
--------------------------------------------------------------------------------
/docs/Makefile:
--------------------------------------------------------------------------------
1 | # Minimal makefile for Sphinx documentation
2 | #
3 |
4 | # You can set these variables from the command line, and also
5 | # from the environment for the first two.
6 | SPHINXOPTS ?=
7 | SPHINXBUILD ?= sphinx-build
8 | SOURCEDIR = .
9 | BUILDDIR = _build
10 |
11 | # Put it first so that "make" without argument is like "make help".
12 | help:
13 | @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
14 |
15 | .PHONY: help Makefile
16 |
17 | # Catch-all target: route all unknown targets to Sphinx using the new
18 | # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS).
19 | %: Makefile
20 | @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
21 |
--------------------------------------------------------------------------------
/docs/_static/logo_small.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vaibkumr/prompt-optimizer/e57a6283cda1491cd8dd00fd9e29b10735fc5806/docs/_static/logo_small.png
--------------------------------------------------------------------------------
/docs/_static/tradeoff.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vaibkumr/prompt-optimizer/e57a6283cda1491cd8dd00fd9e29b10735fc5806/docs/_static/tradeoff.png
--------------------------------------------------------------------------------
/docs/conf.py:
--------------------------------------------------------------------------------
1 | # Configuration file for the Sphinx documentation builder.
2 | #
3 | # This file only contains a selection of the most common options. For a full
4 | # list see the documentation:
5 | # https://www.sphinx-doc.org/en/master/usage/configuration.html
6 |
7 | # -- Path setup --------------------------------------------------------------
8 |
9 | # If extensions (or modules to document with autodoc) are in another directory,
10 | # add these directories to sys.path here. If the directory is relative to the
11 | # documentation root, use os.path.abspath to make it absolute, like shown here.
12 | #
13 | import os
14 | import sys
15 |
16 | sys.path.insert(0, os.path.abspath("../"))
17 |
18 | # import toml
19 |
20 | # with open("../pyproject.toml") as f:
21 | # data = toml.load(f)
22 |
23 | # -- Project information -----------------------------------------------------
24 |
25 | project = "prompt-optimizer"
26 | copyright = "2023, Vaibhav Kumar, Vaibhav Kumar"
27 | author = "Vaibhav Kumar, Vaibhav Kumar"
28 |
29 | # version = data["tool"]["poetry"]["version"]
30 | # release = version
31 |
32 |
33 | # -- General configuration ---------------------------------------------------
34 |
35 | # Add any Sphinx extension module names here, as strings. They can be
36 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
37 | # ones.
38 | extensions = [
39 | "sphinx.ext.autodoc",
40 | "sphinx.ext.autodoc.typehints",
41 | "sphinx.ext.autosummary",
42 | "sphinx.ext.napoleon",
43 | "sphinx.ext.viewcode",
44 | "sphinxcontrib.autodoc_pydantic",
45 | "sphinx_copybutton",
46 | "myst_parser",
47 | ]
48 | source_suffix = [".ipynb", ".html", ".md", ".rst"]
49 |
50 | # Add any paths that contain templates here, relative to this directory.
51 | templates_path = ["_templates"]
52 |
53 | # List of patterns, relative to source directory, that match files and
54 | # directories to ignore when looking for source files.
55 | # This pattern also affects html_static_path and html_extra_path.
56 | exclude_patterns = ["_build", "Thumbs.db", ".DS_Store"]
57 |
58 |
59 | # -- Options for HTML output -------------------------------------------------
60 |
61 | # The theme to use for HTML and HTML Help pages. See the documentation for
62 | # a list of builtin themes.
63 | #
64 | html_theme = "sphinx_rtd_theme"
65 | html_logo = "_static/logo_small.png"
66 |
67 | html_theme_options = {
68 | 'logo_only': False,
69 | 'display_version': True,
70 | 'prev_next_buttons_location': 'bottom',
71 | 'collapse_navigation': True,
72 | 'sticky_navigation': True,
73 | 'navigation_depth': 4,
74 | 'includehidden': True,
75 | 'titles_only': False
76 | }
77 |
78 | html_context = {
79 | "display_github": True, # Integrate GitHub
80 | "github_user": "vaibkumr", # Username
81 | "github_repo": "prompt-optimizer", # Repo name
82 | "github_version": "master", # Version
83 | "conf_py_path": "/docs/", # Path in the checkout to the docs root
84 | }
85 |
86 | # Add any paths that contain custom static files (such as style sheets) here,
87 | # relative to this directory. They are copied after the builtin static files,
88 | # so a file named "default.css" will overwrite the builtin "default.css".
89 | html_static_path = ["_static"]
90 | nb_execution_mode = "off"
91 | myst_enable_extensions = ["colon_fence"]
92 |
--------------------------------------------------------------------------------
/docs/evaluations/extending_evals.md:
--------------------------------------------------------------------------------
1 | # Extending Evaluations
2 | There is no one prompt optimizer that works for all tasks. Extending evaluations by introducing more tasks will help choosing the right optimizer for the right task.
3 |
4 | [Evaluations](https://github.com/vaibkumr/prompt-optimizer/tree/master/evaluations) directory of our project can be used to run batch evaluation experiments for various optimizers.
5 |
6 | Please consider contributing more evaluations.
--------------------------------------------------------------------------------
/docs/evaluations/openai_evals.md:
--------------------------------------------------------------------------------
1 | # Evaluation
2 | Similar to LLMs, creating optimizers is easy but evaluating them is not. Evaluating prompt-optimizers is same as evaluating LLMs, just before and after optimization for same prompts and task.
3 |
4 |
5 | ## [OpenAI Evals](https://github.com/openai/evals)
6 | The Evals is framework for evaluating Large Language Models (LLMs). It offers a range of evaluation challenges which can be used to measure the quality of optimizations.
7 |
8 | ### LogiQA
9 | [LogiQA](https://github.com/openai/evals/pull/470): A Challenge Dataset for Machine Reading Comprehension with Logical Reasoning
10 | We use the first 100 samples for LogiQA eval to generate the follownig results:
11 |
12 |
13 | | Name | % Tokens Reduced | LogiQA Accuracy | USD Saved Per $100 |
14 | | --- | --- | --- | --- |
15 | | Default | 0.0 | 0.32 | 0.0 |
16 | | Entropy_Optim_p_0.05 | 0.06 | 0.3 | 6.35 |
17 | | Entropy_Optim_p_0.1 | 0.11 | 0.28 | 11.19 |
18 | | Entropy_Optim_p_0.25 | 0.26 | 0.22 | 26.47 |
19 | | Entropy_Optim_p_0.5 | 0.5 | 0.08 | 49.65 |
20 | | SynonymReplace_Optim_p_1.0 | 0.01 | 0.33 | 1.06 |
21 | | Lemmatizer_Optim | 0.01 | 0.33 | 1.01 |
22 | | Stemmer_Optim | -0.06 | 0.09 | -5.91 |
23 | | NameReplace_Optim | 0.01 | 0.34 | 1.13 |
24 | | Punctuation_Optim | 0.13 | 0.35 | 12.81 |
25 | | Autocorrect_Optim | 0.01 | 0.3 | 1.14 |
26 | | Pulp_Optim_p_0.05 | 0.05 | 0.31 | 5.49 |
27 | | Pulp_Optim_p_0.1 | 0.1 | 0.25 | 9.52 |
28 |
29 |
30 |
31 |
--------------------------------------------------------------------------------
/docs/extend/custom_metrics.md:
--------------------------------------------------------------------------------
1 | # Creating Custom Metrics
2 | All metrics are computed between the original and optimized metric. They must extend the `prompt_optimizer.metric.Metric` class.
3 |
4 |
5 | A custom `MyCustomMetric` optimizer will look as follows:
6 |
7 | ```python
8 |
9 | from prompt_optimizer.metric.base import Metric
10 |
11 | class MyCustomMetric(Metric):
12 |
13 | def __init__(self, *args, **kwargs):
14 | super().__init__(*args, **kwargs)
15 |
16 | def run(self, prompt_before: str, prompt_after: str) -> dict:
17 | return {'metric_name': 0.0}
18 | ```
19 |
20 | to create a custom metric, just implement the `run` function that takes input two strings: `prompt_before` that is the orignial prompt and `prompt_after` that is the prompt after optimizations. The function must return a dictionary with key(s) and value(s) corresponding to the metric name and values.
21 |
22 | If you implement some metrics, please consider contributing them to this project.
--------------------------------------------------------------------------------
/docs/extend/custom_optims.md:
--------------------------------------------------------------------------------
1 | # Creating Custom PromptOptimizers
2 | All prompt optimizers must extend the `from prompt_optimizer.poptim.PromptOptim` class.
3 |
4 | A custom `MyCustomOptim` optimizer will look as follows:
5 |
6 | ```python
7 | from prompt_optimizer.poptim.base import PromptOptim
8 |
9 | class MyCustomOptim(PromptOptim):
10 | def __init__(self, *args, **kwargs):
11 | super().__init__(*args, **kwargs)
12 |
13 | def optimize(self, prompt: str) -> str:
14 | opti_prompt = prompt
15 | return opti_prompt
16 | ```
17 |
18 | to create an optimizer, we just need to implement the `optimize` function that takes input a string and outputs another string that is optimized.
19 |
20 | If you implement some optimizers, please consider contributing them to this project.
--------------------------------------------------------------------------------
/docs/getting_started/cli.md:
--------------------------------------------------------------------------------
1 | # PromptOptimizer CLI
2 | PromptOptimizer provides a command line interface `prompt_optimizer.cli.main:main` to run prompt optimizations and metrics.
3 |
4 | - Type `prompt-optimizer --help` on the command line:
5 |
6 | ```
7 |
8 | usage: prompt-optimizer [-h] [--json JSON] [--skip_system SKIP_SYSTEM]
9 | [--optimizer_args [OPTIMIZER_ARGS ...]] [--metrics [METRICS ...]]
10 | [--log_file LOG_FILE]
11 | prompt_data_or_path optimizer_name
12 |
13 | Prompt Optimizer CLI
14 |
15 | positional arguments:
16 | prompt_data_or_path Either the prompt data (string or json string) or path to a file containing new
17 | line separated prompt data.
18 | optimizer_name Name of the optimizer.
19 |
20 | options:
21 | -h, --help show this help message and exit
22 | --json JSON Prompt format JSON or not.
23 | --skip_system SKIP_SYSTEM
24 | Skip system prompts or not. Only valid if `json` is True.
25 | --optimizer_args [OPTIMIZER_ARGS ...]
26 | Additional arguments for the optimizer.
27 | --metrics [METRICS ...]
28 | List of metrics to compute.
29 | --log_file LOG_FILE Output file to append results to. Prints on `stdout` if `None`.
30 | ```
31 |
32 | Some [Examples](https://github.com/vaibkumr/prompt-optimizer/tree/master/examples/cli) are given to get started with CLI!
--------------------------------------------------------------------------------
/docs/getting_started/getting_started.md:
--------------------------------------------------------------------------------
1 | # Quickstart Guide
2 |
3 | Welcome to PromptOptimizer! This guide will help you quickly get started with using PromptOptimizer in your projects. PromptOptimizer is a Python library that allows you to minimize token complexity in order to save API costs and reduce model computations.
4 |
5 | # Installation
6 | ### Quick Installation
7 |
8 | To quickly install PromptOptimizer, use the following command:
9 | ```bash
10 | pip install prompt-optimizer
11 | ```
12 |
13 | ### Install from Source
14 | If you prefer to install PromptOptimizer from source, follow these steps:
15 |
16 | 1. Clone the repository:
17 | ```bash
18 | git clone https://github.com/vaibkumr/prompt-optimizer.git
19 | ```
20 | 2. Navigate to the cloned repository:
21 | ```bash
22 | cd prompt-optimizer
23 | ```
24 | 3. Install PromptOptimizer using pip:
25 | ```bash
26 | pip install -e .
27 | ```
28 |
29 | # Prompt Optimizers
30 | A prompt optimizer is a callable class that outputs optimized prompt data along with metrics (if requested) for given input prompt data.
31 |
32 | > Note: Optimizers output a result object with keys `content` to store optimized prompts and `metrics` to store the requested metrics computations results.
33 |
34 | To optimize a prompt we follow three steps:
35 | 1. Import the optimizer from the range of available [../optimizers/index.html](optimizers). For now, we use the `EntropyOptim`.
36 |
37 | ```python
38 | from prompt_optimizer.poptim import EntropyOptim
39 | ```
40 | 2. Initialize the optimizer object. Each optimizer has its own argument which can be tuned to achieve a balance between the cost and performance tradeoff.
41 |
42 | ```python
43 | p_optimizer = EntropyOptim(p=0.1)
44 | ```
45 |
46 | 3. Run the optimizer over a given prompt string and fetch the results
47 | ```python
48 | prompt = "In Nightmare of Mensis progress through until you reach the boss room."
49 | result = p_optimizer(prompt)
50 | optimized_prompt = result.content
51 | ```
52 |
53 | And we're done! We just optimized our first prompt, saved some money and if we're smart, we had no loss in model performance.
54 |
55 | # Input Formats
56 | Prompt optimizers support three different formats:
57 | 1. **String:** A basic python string. At the core, all optimizers work on python strings.
58 |
59 | ```python
60 | from prompt_optimizer.poptim import EntropyOptim
61 | p_optimizer = EntropyOptim(p=0.1)
62 | prompt = "In Nightmare of Mensis progress through until you reach the boss room."
63 | result = p_optimizer(prompt)
64 | optimized_prompt = result.content
65 | ```
66 |
67 | 2. **JSON Object:** APIs often accept instructions in form of sytem and human messages. JSON objects of the following format can be passed to the optimizers using the `json` boolean flag:
68 | ```json
69 | [
70 | {
71 | "role":"system",
72 | "content":"System instructions..."
73 | },
74 | {
75 | "role":"user",
76 | "content":"User prompt..."
77 | }
78 | ]
79 | ```
80 | often times, it is important to skip system instructions. It can be done using the `skip_system` flag as follows:
81 |
82 | ```python
83 | from prompt_optimizer.poptim import EntropyOptim
84 | p_optimizer = EntropyOptim(p=0.1)
85 | prompt = [
86 | {
87 | "role":"system",
88 | "content":"System instructions..."
89 | },
90 | {
91 | "role":"user",
92 | "content":"User prompt..."
93 | }
94 | ]
95 | optimized_prompt = p_optimizer(prompt, json=True, skip_system=True)
96 | ```
97 |
98 | 3. **Langchain Object:** Langchain agents accept prompts as a list of `SystemMessage` and `HumanMessage`. Prompt optimizers can directly be applied to these objects by using the `langchain` boolean flag. Again, `skip_system` flag can be used to skip optimizing system prompts as follows:
99 |
100 | ```python
101 | from prompt_optimizer.poptim import EntropyOptim
102 | from langchain.schema import (
103 | HumanMessage,
104 | SystemMessage
105 | )
106 |
107 | p_optimizer = EntropyOptim(p=0.1)
108 | prompt = [
109 | SystemMessage(content="You are a helpful assistant that translates English to French."),
110 | HumanMessage(content="I love programming.")
111 | ]
112 | optimized_prompt = p_optimizer(prompt, langchain=True, skip_system=True)
113 | ```
114 |
--------------------------------------------------------------------------------
/docs/getting_started/installation.md:
--------------------------------------------------------------------------------
1 | # Installation
2 | ### Quick Installation
3 |
4 | To quickly install PromptOptimizer, use the following command:
5 | ```bash
6 | pip install prompt-optimizer
7 | ```
8 |
9 | ### Install from Source
10 | If you prefer to install PromptOptimizer from source, follow these steps:
11 |
12 | 1. Clone the repository:
13 | ```bash
14 | git clone https://github.com/vaibkumr/prompt-optimizer.git
15 | ```
16 | 2. Navigate to the cloned repository:
17 | ```bash
18 | cd prompt-optimizer
19 | ```
20 | 3. Install PromptOptimizer using pip:
21 | ```bash
22 | pip install -e .
--------------------------------------------------------------------------------
/docs/getting_started/metrics.md:
--------------------------------------------------------------------------------
1 | # Metrics
2 | Given a prompt and its corresponding optimized prompt, we can compute several metrics for sanity checks, logging and more. We might need to check the percentage of tokens saved, semantic similarity between optimized and original prompt text (BERTScore), sentiment before and after optimization and much more. All of this can be done by extending the `prompt_optimizer.metric.Metric` class.
3 |
4 | # Running Metrics
5 | All metrics extend the `prompt_optimizer.metric.Metric` abstract class.
6 | To evaluate a metric, pass the list of metric objects in the `metrics` keyword argument of the prompt object as follows:
7 |
8 | ```python
9 | from prompt_optimizer.metric import TokenMetric
10 | from prompt_optimizer.poptim import StopWordOptim
11 |
12 | p_optimizer = StopWordOptim(metrics=[TokenMetric()])
13 | ```
14 | After specifying a metric, the prompt result object has an additional key `metrics` that contains the list of dictionaries with key as the metric name string and value as the computed metric value.
15 |
16 |
17 | ```python
18 | prompt = """The Belle Tout Lighthouse is a decommissioned lighthouse and British landmark located at Beachy Head, East Sussex, close to the town of Eastbourne. """
19 | res = p_optimizer(prompt)
20 | for metric in res.metrics:
21 | for key, value in res.metrics.items():
22 | print(f"{key}: {value:.3f}")
23 | ```
24 |
25 | A list of all metrics can be found here.
--------------------------------------------------------------------------------
/docs/index.rst:
--------------------------------------------------------------------------------
1 | .. prompt-optimizer documentation master file, created by
2 | sphinx-quickstart on Fri Apr 7 15:53:36 2023.
3 | You can adapt this file completely to your liking, but it should at least
4 | contain the root `toctree` directive.
5 |
6 | Welcome to PromptOptimizer!
7 | ============================================
8 |
9 |
10 | Minimize LLM token complexity to save API costs and model computations.
11 |
12 | PromptOptimizer is a Python library designed to minimize the token complexity of natural language understanding (NLU) systems, thereby reducing API costs and computational overhead.
13 | It offers a range of optimizers to achieve this optimization while maintaining the integrity of important sections of the prompt.
14 |
15 | Disclaimer
16 | ----------------
17 | There is a compression vs performance tradeoff -- the increase in compression comes at the cost of loss in model performance. The tradeoff can be greatly mitigated by chosing the right optimize for a given task. There is no single optimizer for all cases. There is no Adam here.
18 |
19 | Read more about this in `Cost-Performance Tradeoff <./theory/cost_performance_tradeoff.html>`_
20 |
21 |
22 |
23 | Getting Started
24 | ----------------
25 |
26 | | How to get started using PromptOptimizer and minimize token complexity.
27 |
28 | - `Quickstart Guide <./getting_started/getting_started.html>`_
29 |
30 | | Compression metrics for sanity checks and logging.
31 |
32 | - `Optimization Metrics <./getting_started/metrics.html>`_
33 |
34 | | PromptOptimizer CLI
35 |
36 | - `CLI <./getting_started/cli.html>`_
37 |
38 | .. toctree::
39 | :maxdepth: 2
40 | :caption: Getting Started
41 | :name: getting_started
42 | :hidden:
43 |
44 | getting_started/getting_started.md
45 | getting_started/metrics.md
46 | getting_started/cli.md
47 |
48 | Extending PromptOptimizer
49 | -------------------------
50 | You can create custom prompt optimizers
51 |
52 | - `Custom PromptOptimizers <./extend/custom_optims.html>`_
53 |
54 | It is also easy to create custom metrics
55 |
56 | - `Custom Metrics <./extend/custom_metrics.html>`_
57 |
58 | .. toctree::
59 | :maxdepth: 1
60 | :caption: Extending PromptOptimizer
61 | :name: extend
62 | :hidden:
63 |
64 | extend/custom_optims.md
65 | extend/custom_metrics.md
66 |
67 | Evaluations
68 | -----------
69 | There is no one prompt optimizer that works for all tasks.
70 | Through evaluations over a diverse set of tasks we can make the right choice of optimizer for a new task.
71 |
72 | Extending Evaluations to include more tasks
73 |
74 | - `Extending Evaluations <./evaluations/extending_evals.html>`_
75 |
76 | Evaluating prompt optiimzers is same as evaluating LLMs before and after optimizations and measuring the differences. We thus provide OpenAI Evals Compatiblity to facilitate this.
77 |
78 | - `OpenAI Evals Compatiblity <./evaluations/openai_evals.html>`_
79 |
80 | .. toctree::
81 | :maxdepth: 1
82 | :caption: Evaluations
83 | :name: evals
84 | :hidden:
85 |
86 | evaluations/extending_evals.md
87 | evaluations/openai_evals.md
88 |
89 | Cost-Performance Tradeoff
90 | -------------------------
91 | The reduction in cost often comes with a loss in LLM performance. Almost every optimizer have hyperparameters that control this tradeoff.
92 |
93 | - `Cost-Performance Tradeoff <./theory/cost_performance_tradeoff.html>`_
94 |
95 | .. toctree::
96 | :maxdepth: 1
97 | :caption: Cost-Performance Tradeoff
98 | :name: tradeoff
99 | :hidden:
100 |
101 | theory/cost_performance_tradeoff.md
102 |
103 |
104 | Reference Documentations
105 | =========================
106 | Full documentation on all classes and methods for PromptOptimizer.
107 |
108 | - `Reference Documentations <./reference.html>`_
109 | - `Installation Guide <./getting_started/installation.html>`_
110 |
111 | .. toctree::
112 | :maxdepth: 1
113 | :caption: Reference Documentations
114 | :name: reference
115 | :hidden:
116 |
117 | ./getting_started/installation.md
118 | ./reference.rst
119 |
120 |
121 | Indices and tables
122 | ==================
123 |
124 | * :ref:`genindex`
125 | * :ref:`search`
126 |
--------------------------------------------------------------------------------
/docs/make.bat:
--------------------------------------------------------------------------------
1 | @ECHO OFF
2 |
3 | pushd %~dp0
4 |
5 | REM Command file for Sphinx documentation
6 |
7 | if "%SPHINXBUILD%" == "" (
8 | set SPHINXBUILD=sphinx-build
9 | )
10 | set SOURCEDIR=.
11 | set BUILDDIR=_build
12 |
13 | if "%1" == "" goto help
14 |
15 | %SPHINXBUILD% >NUL 2>NUL
16 | if errorlevel 9009 (
17 | echo.
18 | echo.The 'sphinx-build' command was not found. Make sure you have Sphinx
19 | echo.installed, then set the SPHINXBUILD environment variable to point
20 | echo.to the full path of the 'sphinx-build' executable. Alternatively you
21 | echo.may add the Sphinx directory to PATH.
22 | echo.
23 | echo.If you don't have Sphinx installed, grab it from
24 | echo.http://sphinx-doc.org/
25 | exit /b 1
26 | )
27 |
28 | %SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
29 | goto end
30 |
31 | :help
32 | %SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
33 |
34 | :end
35 | popd
36 |
--------------------------------------------------------------------------------
/docs/reference.rst:
--------------------------------------------------------------------------------
1 | Reference Documentations
2 | =========================
3 |
4 | Prompt Optimizer
5 | =========================
6 | .. automodule:: prompt_optimizer.poptim
7 | :members:
8 |
9 |
10 | Metrics
11 | =========================
12 | .. automodule:: prompt_optimizer.metric
13 | :members:
--------------------------------------------------------------------------------
/docs/requirements.txt:
--------------------------------------------------------------------------------
1 | autodoc_pydantic==1.8.0
2 | myst_parser
3 | nbsphinx==0.8.9
4 | sphinx==4.5.0
5 | sphinx-autobuild==2021.3.14
6 | sphinx_book_theme
7 | sphinx_rtd_theme==1.0.0
8 | sphinx-typlog-theme==0.8.0
9 | sphinx-panels
10 | toml
11 | myst_nb
12 | sphinx_copybutton
13 | pydata-sphinx-theme==0.13.1
--------------------------------------------------------------------------------
/docs/theory/cost_performance_tradeoff.md:
--------------------------------------------------------------------------------
1 | # Cost-Performance Tradeoff
2 | The reduction in cost often comes with a loss in LLM performance. Almost every optimizer have hyperparameters that control this tradeoff.
3 |
4 | For example, in `EntropyOptim` the hyperparamter `p`, a floating point number between 0 and 1 controls the ratio of tokens to remove. `p=1.0` corresponds to removing all tokens while `p=0.0` corresponds to removing none.
5 |
6 | The following chart shows the trade-off for different values of `p` as evaluated on the OpenAI [logiqa](https://github.com/openai/evals/blob/main/evals/registry/evals/logiqa.yaml) task.
7 |
8 |
9 |
10 |

11 |
--------------------------------------------------------------------------------
/evaluations/README.md:
--------------------------------------------------------------------------------
1 | # TODO: Explain evaluations here
--------------------------------------------------------------------------------
/evaluations/artifacts/% Tokens Reduced_graph.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vaibkumr/prompt-optimizer/e57a6283cda1491cd8dd00fd9e29b10735fc5806/evaluations/artifacts/% Tokens Reduced_graph.png
--------------------------------------------------------------------------------
/evaluations/artifacts/LogiQA Accuracy_graph.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vaibkumr/prompt-optimizer/e57a6283cda1491cd8dd00fd9e29b10735fc5806/evaluations/artifacts/LogiQA Accuracy_graph.png
--------------------------------------------------------------------------------
/evaluations/artifacts/USD Saved Per $100_graph.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vaibkumr/prompt-optimizer/e57a6283cda1491cd8dd00fd9e29b10735fc5806/evaluations/artifacts/USD Saved Per $100_graph.png
--------------------------------------------------------------------------------
/evaluations/artifacts/kevin.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vaibkumr/prompt-optimizer/e57a6283cda1491cd8dd00fd9e29b10735fc5806/evaluations/artifacts/kevin.gif
--------------------------------------------------------------------------------
/evaluations/artifacts/logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vaibkumr/prompt-optimizer/e57a6283cda1491cd8dd00fd9e29b10735fc5806/evaluations/artifacts/logo.png
--------------------------------------------------------------------------------
/evaluations/artifacts/logo_small.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vaibkumr/prompt-optimizer/e57a6283cda1491cd8dd00fd9e29b10735fc5806/evaluations/artifacts/logo_small.png
--------------------------------------------------------------------------------
/evaluations/artifacts/table.md:
--------------------------------------------------------------------------------
1 | | Name | % Tokens Reduced | LogiQA Accuracy | USD Saved Per $100 |
2 | | --- | --- | --- | --- |
3 | | Default | 0.0 | 0.32 | 0.0 |
4 | | Entropy_Optim_p_0.05 | 0.06 | 0.3 | 6.35 |
5 | | Entropy_Optim_p_0.1 | 0.11 | 0.28 | 11.19 |
6 | | Entropy_Optim_p_0.25 | 0.26 | 0.22 | 26.47 |
7 | | Entropy_Optim_p_0.5 | 0.5 | 0.08 | 49.65 |
8 | | SynonymReplace_Optim_p_1.0 | 0.01 | 0.33 | 1.06 |
9 | | Lemmatizer_Optim | 0.01 | 0.33 | 1.01 |
10 | | Stemmer_Optim | -0.06 | 0.09 | -5.91 |
11 | | NameReplace_Optim | 0.01 | 0.34 | 1.13 |
12 | | Punctuation_Optim | 0.13 | 0.35 | 12.81 |
13 | | Autocorrect_Optim | 0.01 | 0.3 | 1.14 |
14 | | Pulp_Optim_p_0.05 | 0.05 | 0.31 | 5.49 |
15 | | Pulp_Optim_p_0.1 | 0.1 | 0.25 | 9.52 |
16 |
--------------------------------------------------------------------------------
/evaluations/artifacts/tradeoff.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vaibkumr/prompt-optimizer/e57a6283cda1491cd8dd00fd9e29b10735fc5806/evaluations/artifacts/tradeoff.png
--------------------------------------------------------------------------------
/evaluations/compute_metric.py:
--------------------------------------------------------------------------------
1 | import utils
2 | from prompt_optimizer.metric import TokenMetric
3 |
4 |
5 | def token_metric(before_samples_dir, after_samples_dir, n_samples_max=100):
6 | before = utils.read_jsonl(before_samples_dir)[:n_samples_max]
7 | after = utils.read_jsonl(after_samples_dir)[:n_samples_max]
8 | metric = TokenMetric()
9 | avg = 0
10 | for json_before, json_after in zip(before, after):
11 | avg += metric.batch_run(json_before["input"], json_after["input"], json=True)[metric.key]
12 | return avg / len(before)
13 |
--------------------------------------------------------------------------------
/evaluations/eval.py:
--------------------------------------------------------------------------------
1 | import os
2 | import compute_metric
3 | import utils
4 | from prompt_optimizer.poptim import *
5 | import make_errors
6 |
7 |
8 | def get_samples_and_paths(n_samples_max=100):
9 | samples_dir = (
10 | "/Users/v/Documents/PromptOptimizerProj/evals/evals/registry/data/logiqa/"
11 | )
12 | samples_fname = "logiqa.jsonl"
13 | samples_path = os.path.join(samples_dir, samples_fname)
14 | opti_samples_path = os.path.join(samples_dir, "temp.jsonl")
15 | registry_path = "/Users/v/Documents/PromptOptimizerProj/evals/evals/registry/evals/"
16 | opti_registry_path = os.path.join(registry_path, "temp.yaml")
17 |
18 | new_yaml = {
19 | "temp": {"id": "temp.dev.v0", "metrics": ["accuracy"]},
20 | "temp.dev.v0": {
21 | "class": "evals.elsuite.basic.match:Match",
22 | "args": {"samples_jsonl": opti_samples_path},
23 | },
24 | }
25 | utils.write_yaml(new_yaml, opti_registry_path)
26 | samples = utils.read_jsonl(samples_path)[:n_samples_max]
27 |
28 | return samples, samples_path, opti_samples_path
29 |
30 |
31 | def run_logiqa(exp_name, p_optimizer, n_samples_max=100):
32 | samples, samples_path, opti_samples_path = get_samples_and_paths(n_samples_max)
33 |
34 | res_dir = "results/"
35 | res_path = os.path.join(res_dir, f"{exp_name}.jsonl")
36 | log_dir = "logs/"
37 | log_path = os.path.join(log_dir, f"{exp_name}.jsonl")
38 |
39 | for json_data in samples:
40 | if exp_name in ["Autocorrect_Optim", "AutocorrectOptim"]:
41 | json_data["input"] = make_errors.run(json_data["input"])
42 |
43 | if p_optimizer is not None:
44 | json_data["input"] = p_optimizer(
45 | json_data["input"], skip_system=False, json=True
46 | )
47 |
48 | # Save samples
49 | utils.write_jsonl(samples, opti_samples_path)
50 |
51 | # Compute token saved metrics
52 | tokens_opti_metric = compute_metric.token_metric(samples_path, opti_samples_path)
53 |
54 | # Compute Evals metric
55 | # utils.run_bash(
56 | # f"oaieval gpt-3.5-turbo temp --record_path {res_path} --log_to_file {log_path}"
57 | # )
58 | for line in utils.read_jsonl(res_path):
59 | if "final_report" in line:
60 | accuracy = line["final_report"]["accuracy"]
61 | break
62 |
63 | results = {
64 | "name": exp_name,
65 | "tokens_opti_metric": tokens_opti_metric,
66 | "accuracy": accuracy,
67 | }
68 |
69 | print(results)
70 |
71 | # Save results
72 | utils.save_results(results, "results.csv")
73 |
74 |
75 | if __name__ == "__main__":
76 | EXPERIMENTS = {
77 | "Default": None,
78 | "Entropy_Optim_p_0.05": EntropyOptim(p=0.05),
79 | "Entropy_Optim_p_0.1": EntropyOptim(p=0.1),
80 | "Entropy_Optim_p_0.25": EntropyOptim(p=0.25),
81 | "Entropy_Optim_p_0.5": EntropyOptim(p=0.5),
82 | "SynonymReplace_Optim_p_1.0": SynonymReplaceOptim(p=1),
83 | "Lemmatizer_Optim": LemmatizerOptim(),
84 | "Stemmer_Optim": StemmerOptim(),
85 | "NameReplace_Optim": NameReplaceOptim(),
86 | "Punctuation_Optim": PunctuationOptim(),
87 | "Autocorrect_Optim": AutocorrectOptim(),
88 | "Pulp_Optim_p_0.05": PulpOptim(p=0.05),
89 | "Pulp_Optim_p_0.1": PulpOptim(p=0.1),
90 | }
91 | for exp_name in EXPERIMENTS:
92 | p_optimizer = EXPERIMENTS[exp_name]
93 | run_logiqa(exp_name, p_optimizer)
94 |
--------------------------------------------------------------------------------
/evaluations/logs/Autocorrect_Optim.jsonl:
--------------------------------------------------------------------------------
1 | [2023-05-16 11:20:23,559] [registry.py:249] Loading registry from /Users/v/Documents/PromptOptimizerProj/evals/evals/registry/evals
2 | [2023-05-16 11:20:23,723] [registry.py:249] Loading registry from /Users/v/.evals/evals
3 | [2023-05-16 11:20:23,723] [oaieval.py:110] [1;35mRun started: 230516182023X6CJM7KU[0m
4 | [2023-05-16 11:20:23,724] [data.py:75] Fetching /Users/v/Documents/PromptOptimizerProj/evals/evals/registry/data/logiqa/temp.jsonl
5 | [2023-05-16 11:20:23,725] [eval.py:34] Evaluating 100 samples
6 | [2023-05-16 11:20:23,732] [eval.py:153] Running in threaded mode with 10 threads!
7 | [2023-05-16 11:20:50,658] [record.py:330] Logged 100 rows of events to results/Autocorrect_Optim.jsonl: insert_time=15.701ms
8 | [2023-05-16 11:21:30,988] [_common.py:105] Backing off openai_chat_completion_create_retrying(...) for 1.5s (openai.error.RateLimitError: That model is currently overloaded with other requests. You can retry your request, or contact us through our help center at help.openai.com if the error persists. (Please include the request ID ba90fac4d334d9688f362927bbeb5894 in your message.))
9 | [2023-05-16 11:21:37,656] [record.py:330] Logged 100 rows of events to results/Autocorrect_Optim.jsonl: insert_time=12.522ms
10 | [2023-05-16 11:21:37,661] [record.py:341] Final report: {'accuracy': 0.0}. Logged to results/Autocorrect_Optim.jsonl
11 | [2023-05-16 11:21:37,661] [oaieval.py:147] Final report:
12 | [2023-05-16 11:21:37,661] [oaieval.py:149] accuracy: 0.0
13 | [2023-05-16 11:26:27,796] [registry.py:249] Loading registry from /Users/v/Documents/PromptOptimizerProj/evals/evals/registry/evals
14 | [2023-05-16 11:26:27,973] [registry.py:249] Loading registry from /Users/v/.evals/evals
15 | [2023-05-16 11:26:27,974] [oaieval.py:110] [1;35mRun started: 2305161826273Z3ORRVS[0m
16 | [2023-05-16 11:26:27,975] [data.py:75] Fetching /Users/v/Documents/PromptOptimizerProj/evals/evals/registry/data/logiqa/temp.jsonl
17 | [2023-05-16 11:26:27,976] [eval.py:34] Evaluating 100 samples
18 | [2023-05-16 11:26:27,983] [eval.py:153] Running in threaded mode with 10 threads!
19 | [2023-05-16 11:26:45,377] [record.py:330] Logged 100 rows of events to results/Autocorrect_Optim.jsonl: insert_time=17.767ms
20 | [2023-05-16 11:26:59,930] [_common.py:105] Backing off openai_chat_completion_create_retrying(...) for 0.7s (openai.error.RateLimitError: That model is currently overloaded with other requests. You can retry your request, or contact us through our help center at help.openai.com if the error persists. (Please include the request ID 7bc34b9814c92d6eadca8679234159bb in your message.))
21 | [2023-05-16 11:27:02,184] [_common.py:105] Backing off openai_chat_completion_create_retrying(...) for 0.4s (openai.error.RateLimitError: That model is currently overloaded with other requests. You can retry your request, or contact us through our help center at help.openai.com if the error persists. (Please include the request ID faa3a94b78335bd7f2bbcbe2695f2cf7 in your message.))
22 | [2023-05-16 11:27:13,623] [_common.py:105] Backing off openai_chat_completion_create_retrying(...) for 0.1s (openai.error.RateLimitError: That model is currently overloaded with other requests. You can retry your request, or contact us through our help center at help.openai.com if the error persists. (Please include the request ID 023cdd1194b029af45ba3741af56066f in your message.))
23 | [2023-05-16 11:27:15,684] [record.py:330] Logged 100 rows of events to results/Autocorrect_Optim.jsonl: insert_time=15.099ms
24 | [2023-05-16 11:27:15,688] [record.py:341] Final report: {'accuracy': 0.3}. Logged to results/Autocorrect_Optim.jsonl
25 | [2023-05-16 11:27:15,689] [oaieval.py:147] Final report:
26 | [2023-05-16 11:27:15,689] [oaieval.py:149] accuracy: 0.3
27 |
--------------------------------------------------------------------------------
/evaluations/logs/Default.jsonl:
--------------------------------------------------------------------------------
1 | [2023-05-16 10:41:46,141] [registry.py:249] Loading registry from /Users/v/Documents/PromptOptimizerProj/evals/evals/registry/evals
2 | [2023-05-16 10:41:46,318] [registry.py:249] Loading registry from /Users/v/.evals/evals
3 | [2023-05-16 10:41:46,319] [oaieval.py:110] [1;35mRun started: 230516174146FZ7DGETP[0m
4 | [2023-05-16 10:41:46,320] [data.py:75] Fetching /Users/v/Documents/PromptOptimizerProj/evals/evals/registry/data/logiqa/temp.jsonl
5 | [2023-05-16 10:41:46,321] [eval.py:34] Evaluating 100 samples
6 | [2023-05-16 10:41:46,328] [eval.py:153] Running in threaded mode with 10 threads!
7 | [2023-05-16 10:42:01,115] [record.py:330] Logged 100 rows of events to results/Default.jsonl: insert_time=20.269ms
8 | [2023-05-16 10:42:17,730] [_common.py:105] Backing off openai_chat_completion_create_retrying(...) for 1.5s (openai.error.RateLimitError: That model is currently overloaded with other requests. You can retry your request, or contact us through our help center at help.openai.com if the error persists. (Please include the request ID 6af39a6d33050a08417f758c39991a4f in your message.))
9 | [2023-05-16 10:42:29,518] [record.py:330] Logged 100 rows of events to results/Default.jsonl: insert_time=11.096ms
10 | [2023-05-16 10:42:29,526] [record.py:341] Final report: {'accuracy': 0.32}. Logged to results/Default.jsonl
11 | [2023-05-16 10:42:29,526] [oaieval.py:147] Final report:
12 | [2023-05-16 10:42:29,526] [oaieval.py:149] accuracy: 0.32
13 | [2023-05-16 10:42:49,327] [_common.py:105] Backing off openai_chat_completion_create_retrying(...) for 2.3s (openai.error.RateLimitError: That model is currently overloaded with other requests. You can retry your request, or contact us through our help center at help.openai.com if the error persists. (Please include the request ID 3e4486b295d13ec50d501bffaea6bf2a in your message.))
14 | [2023-05-16 10:42:53,664] [record.py:330] Logged 2 rows of events to results/Default.jsonl: insert_time=3.159ms
15 |
--------------------------------------------------------------------------------
/evaluations/logs/Entropy_Optim_p_0.05.jsonl:
--------------------------------------------------------------------------------
1 | [2023-05-16 11:28:59,318] [registry.py:249] Loading registry from /Users/v/Documents/PromptOptimizerProj/evals/evals/registry/evals
2 | [2023-05-16 11:28:59,506] [registry.py:249] Loading registry from /Users/v/.evals/evals
3 | [2023-05-16 11:28:59,506] [oaieval.py:110] [1;35mRun started: 230516182859HQ5ZZMAM[0m
4 | [2023-05-16 11:28:59,507] [data.py:75] Fetching /Users/v/Documents/PromptOptimizerProj/evals/evals/registry/data/logiqa/temp.jsonl
5 | [2023-05-16 11:28:59,508] [eval.py:34] Evaluating 100 samples
6 | [2023-05-16 11:28:59,514] [eval.py:153] Running in threaded mode with 10 threads!
7 | [2023-05-16 11:29:13,557] [record.py:330] Logged 100 rows of events to results/Entropy_Optim_p_0.05.jsonl: insert_time=22.180ms
8 | [2023-05-16 11:29:42,311] [_common.py:105] Backing off openai_chat_completion_create_retrying(...) for 0.8s (openai.error.RateLimitError: That model is currently overloaded with other requests. You can retry your request, or contact us through our help center at help.openai.com if the error persists. (Please include the request ID 968b7b0afbc5d72c1023b3471710936b in your message.))
9 | [2023-05-16 11:29:44,788] [record.py:330] Logged 100 rows of events to results/Entropy_Optim_p_0.05.jsonl: insert_time=22.965ms
10 | [2023-05-16 11:29:44,795] [record.py:341] Final report: {'accuracy': 0.3}. Logged to results/Entropy_Optim_p_0.05.jsonl
11 | [2023-05-16 11:29:44,795] [oaieval.py:147] Final report:
12 | [2023-05-16 11:29:44,795] [oaieval.py:149] accuracy: 0.3
13 |
--------------------------------------------------------------------------------
/evaluations/logs/Entropy_Optim_p_0.1.jsonl:
--------------------------------------------------------------------------------
1 | [2023-05-16 10:49:00,710] [registry.py:249] Loading registry from /Users/v/Documents/PromptOptimizerProj/evals/evals/registry/evals
2 | [2023-05-16 10:49:00,874] [registry.py:249] Loading registry from /Users/v/.evals/evals
3 | [2023-05-16 10:49:00,875] [oaieval.py:110] [1;35mRun started: 230516174900BXDQIZRZ[0m
4 | [2023-05-16 10:49:00,876] [data.py:75] Fetching /Users/v/Documents/PromptOptimizerProj/evals/evals/registry/data/logiqa/temp.jsonl
5 | [2023-05-16 10:49:00,876] [eval.py:34] Evaluating 100 samples
6 | [2023-05-16 10:49:00,883] [eval.py:153] Running in threaded mode with 10 threads!
7 | [2023-05-16 10:49:16,954] [record.py:330] Logged 100 rows of events to results/Entropy_Optim_p_0.1.jsonl: insert_time=11.353ms
8 | [2023-05-16 10:49:34,817] [record.py:330] Logged 100 rows of events to results/Entropy_Optim_p_0.1.jsonl: insert_time=15.029ms
9 | [2023-05-16 10:49:34,823] [record.py:341] Final report: {'accuracy': 0.28}. Logged to results/Entropy_Optim_p_0.1.jsonl
10 | [2023-05-16 10:49:34,823] [oaieval.py:147] Final report:
11 | [2023-05-16 10:49:34,823] [oaieval.py:149] accuracy: 0.28
12 |
--------------------------------------------------------------------------------
/evaluations/logs/Entropy_Optim_p_0.25.jsonl:
--------------------------------------------------------------------------------
1 | [2023-05-16 10:50:08,078] [registry.py:249] Loading registry from /Users/v/Documents/PromptOptimizerProj/evals/evals/registry/evals
2 | [2023-05-16 10:50:08,248] [registry.py:249] Loading registry from /Users/v/.evals/evals
3 | [2023-05-16 10:50:08,248] [oaieval.py:110] [1;35mRun started: 230516175008C6MZGPUC[0m
4 | [2023-05-16 10:50:08,249] [data.py:75] Fetching /Users/v/Documents/PromptOptimizerProj/evals/evals/registry/data/logiqa/temp.jsonl
5 | [2023-05-16 10:50:08,250] [eval.py:34] Evaluating 100 samples
6 | [2023-05-16 10:50:08,256] [eval.py:153] Running in threaded mode with 10 threads!
7 | [2023-05-16 10:50:24,127] [record.py:330] Logged 100 rows of events to results/Entropy_Optim_p_0.25.jsonl: insert_time=16.943ms
8 | [2023-05-16 10:50:52,168] [_common.py:105] Backing off openai_chat_completion_create_retrying(...) for 1.2s (openai.error.RateLimitError: That model is currently overloaded with other requests. You can retry your request, or contact us through our help center at help.openai.com if the error persists. (Please include the request ID 0247cc54a987228bd86438f283e79db1 in your message.))
9 | [2023-05-16 10:51:04,151] [_common.py:105] Backing off openai_chat_completion_create_retrying(...) for 1.1s (openai.error.RateLimitError: That model is currently overloaded with other requests. You can retry your request, or contact us through our help center at help.openai.com if the error persists. (Please include the request ID d2dd1568630c4d4396677d7230267c76 in your message.))
10 | [2023-05-16 10:51:08,438] [record.py:330] Logged 100 rows of events to results/Entropy_Optim_p_0.25.jsonl: insert_time=15.240ms
11 | [2023-05-16 10:51:08,443] [record.py:341] Final report: {'accuracy': 0.22}. Logged to results/Entropy_Optim_p_0.25.jsonl
12 | [2023-05-16 10:51:08,443] [oaieval.py:147] Final report:
13 | [2023-05-16 10:51:08,443] [oaieval.py:149] accuracy: 0.22
14 |
--------------------------------------------------------------------------------
/evaluations/logs/Entropy_Optim_p_0.5.jsonl:
--------------------------------------------------------------------------------
1 | [2023-05-16 10:51:40,154] [registry.py:249] Loading registry from /Users/v/Documents/PromptOptimizerProj/evals/evals/registry/evals
2 | [2023-05-16 10:51:40,320] [registry.py:249] Loading registry from /Users/v/.evals/evals
3 | [2023-05-16 10:51:40,321] [oaieval.py:110] [1;35mRun started: 230516175140HVZETOL6[0m
4 | [2023-05-16 10:51:40,322] [data.py:75] Fetching /Users/v/Documents/PromptOptimizerProj/evals/evals/registry/data/logiqa/temp.jsonl
5 | [2023-05-16 10:51:40,322] [eval.py:34] Evaluating 100 samples
6 | [2023-05-16 10:51:40,329] [eval.py:153] Running in threaded mode with 10 threads!
7 | [2023-05-16 10:52:02,470] [record.py:330] Logged 100 rows of events to results/Entropy_Optim_p_0.5.jsonl: insert_time=10.094ms
8 | [2023-05-16 10:52:11,891] [_common.py:105] Backing off openai_chat_completion_create_retrying(...) for 1.5s (openai.error.RateLimitError: That model is currently overloaded with other requests. You can retry your request, or contact us through our help center at help.openai.com if the error persists. (Please include the request ID 2fb04add750c5104a2f30ce9816ced6d in your message.))
9 | [2023-05-16 10:52:31,716] [record.py:330] Logged 100 rows of events to results/Entropy_Optim_p_0.5.jsonl: insert_time=15.097ms
10 | [2023-05-16 10:52:31,722] [record.py:341] Final report: {'accuracy': 0.08}. Logged to results/Entropy_Optim_p_0.5.jsonl
11 | [2023-05-16 10:52:31,722] [oaieval.py:147] Final report:
12 | [2023-05-16 10:52:31,722] [oaieval.py:149] accuracy: 0.08
13 |
--------------------------------------------------------------------------------
/evaluations/logs/Lemmatizer_Optim.jsonl:
--------------------------------------------------------------------------------
1 | [2023-05-16 11:17:23,275] [registry.py:249] Loading registry from /Users/v/Documents/PromptOptimizerProj/evals/evals/registry/evals
2 | [2023-05-16 11:17:23,555] [registry.py:249] Loading registry from /Users/v/.evals/evals
3 | [2023-05-16 11:17:23,556] [oaieval.py:110] [1;35mRun started: 230516181723VWM62TYA[0m
4 | [2023-05-16 11:17:23,557] [data.py:75] Fetching /Users/v/Documents/PromptOptimizerProj/evals/evals/registry/data/logiqa/temp.jsonl
5 | [2023-05-16 11:17:23,558] [eval.py:34] Evaluating 100 samples
6 | [2023-05-16 11:17:23,567] [eval.py:153] Running in threaded mode with 10 threads!
7 | [2023-05-16 11:17:38,477] [record.py:330] Logged 100 rows of events to results/Lemmatizer_Optim.jsonl: insert_time=14.978ms
8 | [2023-05-16 11:18:12,269] [_common.py:105] Backing off openai_chat_completion_create_retrying(...) for 0.2s (openai.error.RateLimitError: That model is currently overloaded with other requests. You can retry your request, or contact us through our help center at help.openai.com if the error persists. (Please include the request ID 567850a6c165ac7b190022463917944f in your message.))
9 | [2023-05-16 11:18:14,082] [record.py:330] Logged 100 rows of events to results/Lemmatizer_Optim.jsonl: insert_time=142.862ms
10 | [2023-05-16 11:18:14,089] [record.py:341] Final report: {'accuracy': 0.33}. Logged to results/Lemmatizer_Optim.jsonl
11 | [2023-05-16 11:18:14,089] [oaieval.py:147] Final report:
12 | [2023-05-16 11:18:14,089] [oaieval.py:149] accuracy: 0.33
13 |
--------------------------------------------------------------------------------
/evaluations/logs/NameReplace_Optim.jsonl:
--------------------------------------------------------------------------------
1 | [2023-05-16 11:18:47,032] [registry.py:249] Loading registry from /Users/v/Documents/PromptOptimizerProj/evals/evals/registry/evals
2 | [2023-05-16 11:18:47,201] [registry.py:249] Loading registry from /Users/v/.evals/evals
3 | [2023-05-16 11:18:47,202] [oaieval.py:110] [1;35mRun started: 230516181847URHMHMOF[0m
4 | [2023-05-16 11:18:47,202] [data.py:75] Fetching /Users/v/Documents/PromptOptimizerProj/evals/evals/registry/data/logiqa/temp.jsonl
5 | [2023-05-16 11:18:47,203] [eval.py:34] Evaluating 100 samples
6 | [2023-05-16 11:18:47,210] [eval.py:153] Running in threaded mode with 10 threads!
7 | [2023-05-16 11:19:00,559] [record.py:330] Logged 100 rows of events to results/NameReplace_Optim.jsonl: insert_time=14.742ms
8 | [2023-05-16 11:19:33,843] [_common.py:105] Backing off openai_chat_completion_create_retrying(...) for 1.4s (openai.error.RateLimitError: That model is currently overloaded with other requests. You can retry your request, or contact us through our help center at help.openai.com if the error persists. (Please include the request ID d1d947e977b2259375fc236e22e2dd0e in your message.))
9 | [2023-05-16 11:19:39,562] [record.py:330] Logged 100 rows of events to results/NameReplace_Optim.jsonl: insert_time=19.703ms
10 | [2023-05-16 11:19:39,570] [record.py:341] Final report: {'accuracy': 0.34}. Logged to results/NameReplace_Optim.jsonl
11 | [2023-05-16 11:19:39,570] [oaieval.py:147] Final report:
12 | [2023-05-16 11:19:39,571] [oaieval.py:149] accuracy: 0.34
13 |
--------------------------------------------------------------------------------
/evaluations/logs/Pulp_Optim_p_0.05.jsonl:
--------------------------------------------------------------------------------
1 | [2023-05-16 12:18:15,928] [registry.py:249] Loading registry from /Users/v/Documents/PromptOptimizerProj/evals/evals/registry/evals
2 | [2023-05-16 12:18:16,103] [registry.py:249] Loading registry from /Users/v/.evals/evals
3 | [2023-05-16 12:18:16,104] [oaieval.py:110] [1;35mRun started: 230516191816AFNQFXOV[0m
4 | [2023-05-16 12:18:16,105] [data.py:75] Fetching /Users/v/Documents/PromptOptimizerProj/evals/evals/registry/data/logiqa/temp.jsonl
5 | [2023-05-16 12:18:16,106] [eval.py:34] Evaluating 100 samples
6 | [2023-05-16 12:18:16,121] [eval.py:153] Running in threaded mode with 10 threads!
7 | [2023-05-16 12:18:29,461] [record.py:330] Logged 100 rows of events to results/Pulp_Optim_p_0.05.jsonl: insert_time=28.695ms
8 | [2023-05-16 12:19:03,392] [_common.py:105] Backing off openai_chat_completion_create_retrying(...) for 0.2s (openai.error.RateLimitError: That model is currently overloaded with other requests. You can retry your request, or contact us through our help center at help.openai.com if the error persists. (Please include the request ID d943219649b40b2098239af35a105e8a in your message.))
9 | [2023-05-16 12:19:05,180] [record.py:330] Logged 100 rows of events to results/Pulp_Optim_p_0.05.jsonl: insert_time=11.530ms
10 | [2023-05-16 12:19:05,186] [record.py:341] Final report: {'accuracy': 0.31}. Logged to results/Pulp_Optim_p_0.05.jsonl
11 | [2023-05-16 12:19:05,186] [oaieval.py:147] Final report:
12 | [2023-05-16 12:19:05,186] [oaieval.py:149] accuracy: 0.31
13 |
--------------------------------------------------------------------------------
/evaluations/logs/Pulp_Optim_p_0.1.jsonl:
--------------------------------------------------------------------------------
1 | [2023-05-16 12:19:11,565] [registry.py:249] Loading registry from /Users/v/Documents/PromptOptimizerProj/evals/evals/registry/evals
2 | [2023-05-16 12:19:11,781] [registry.py:249] Loading registry from /Users/v/.evals/evals
3 | [2023-05-16 12:19:11,782] [oaieval.py:110] [1;35mRun started: 230516191911PVPMMY7O[0m
4 | [2023-05-16 12:19:11,783] [data.py:75] Fetching /Users/v/Documents/PromptOptimizerProj/evals/evals/registry/data/logiqa/temp.jsonl
5 | [2023-05-16 12:19:11,784] [eval.py:34] Evaluating 100 samples
6 | [2023-05-16 12:19:11,790] [eval.py:153] Running in threaded mode with 10 threads!
7 | [2023-05-16 12:19:24,747] [record.py:330] Logged 100 rows of events to results/Pulp_Optim_p_0.1.jsonl: insert_time=22.192ms
8 | [2023-05-16 12:19:50,909] [record.py:330] Logged 100 rows of events to results/Pulp_Optim_p_0.1.jsonl: insert_time=23.428ms
9 | [2023-05-16 12:19:50,917] [record.py:341] Final report: {'accuracy': 0.25}. Logged to results/Pulp_Optim_p_0.1.jsonl
10 | [2023-05-16 12:19:50,917] [oaieval.py:147] Final report:
11 | [2023-05-16 12:19:50,917] [oaieval.py:149] accuracy: 0.25
12 |
--------------------------------------------------------------------------------
/evaluations/logs/Punctuation_Optim.jsonl:
--------------------------------------------------------------------------------
1 | [2023-05-16 11:19:40,087] [registry.py:249] Loading registry from /Users/v/Documents/PromptOptimizerProj/evals/evals/registry/evals
2 | [2023-05-16 11:19:40,268] [registry.py:249] Loading registry from /Users/v/.evals/evals
3 | [2023-05-16 11:19:40,269] [oaieval.py:110] [1;35mRun started: 230516181940UTAQK3AE[0m
4 | [2023-05-16 11:19:40,270] [data.py:75] Fetching /Users/v/Documents/PromptOptimizerProj/evals/evals/registry/data/logiqa/temp.jsonl
5 | [2023-05-16 11:19:40,270] [eval.py:34] Evaluating 100 samples
6 | [2023-05-16 11:19:40,278] [eval.py:153] Running in threaded mode with 10 threads!
7 | [2023-05-16 11:19:50,455] [record.py:330] Logged 155 rows of events to results/Punctuation_Optim.jsonl: insert_time=21.257ms
8 | [2023-05-16 11:19:56,871] [record.py:341] Final report: {'accuracy': 0.35}. Logged to results/Punctuation_Optim.jsonl
9 | [2023-05-16 11:19:56,871] [oaieval.py:147] Final report:
10 | [2023-05-16 11:19:56,871] [oaieval.py:149] accuracy: 0.35
11 | [2023-05-16 11:19:56,878] [record.py:330] Logged 45 rows of events to results/Punctuation_Optim.jsonl: insert_time=5.607ms
12 |
--------------------------------------------------------------------------------
/evaluations/logs/Stemmer_Optim.jsonl:
--------------------------------------------------------------------------------
1 | [2023-05-16 11:18:14,888] [registry.py:249] Loading registry from /Users/v/Documents/PromptOptimizerProj/evals/evals/registry/evals
2 | [2023-05-16 11:18:15,064] [registry.py:249] Loading registry from /Users/v/.evals/evals
3 | [2023-05-16 11:18:15,065] [oaieval.py:110] [1;35mRun started: 230516181815UARDO6UR[0m
4 | [2023-05-16 11:18:15,066] [data.py:75] Fetching /Users/v/Documents/PromptOptimizerProj/evals/evals/registry/data/logiqa/temp.jsonl
5 | [2023-05-16 11:18:15,067] [eval.py:34] Evaluating 100 samples
6 | [2023-05-16 11:18:15,073] [eval.py:153] Running in threaded mode with 10 threads!
7 | [2023-05-16 11:18:26,791] [record.py:330] Logged 100 rows of events to results/Stemmer_Optim.jsonl: insert_time=12.471ms
8 | [2023-05-16 11:18:42,043] [record.py:330] Logged 100 rows of events to results/Stemmer_Optim.jsonl: insert_time=14.800ms
9 | [2023-05-16 11:18:42,048] [record.py:341] Final report: {'accuracy': 0.09}. Logged to results/Stemmer_Optim.jsonl
10 | [2023-05-16 11:18:42,048] [oaieval.py:147] Final report:
11 | [2023-05-16 11:18:42,049] [oaieval.py:149] accuracy: 0.09
12 |
--------------------------------------------------------------------------------
/evaluations/logs/SynonymReplace_Optim_p_1.0.jsonl:
--------------------------------------------------------------------------------
1 | [2023-05-16 11:16:33,330] [registry.py:249] Loading registry from /Users/v/Documents/PromptOptimizerProj/evals/evals/registry/evals
2 | [2023-05-16 11:16:33,511] [registry.py:249] Loading registry from /Users/v/.evals/evals
3 | [2023-05-16 11:16:33,511] [oaieval.py:110] [1;35mRun started: 230516181633ZCDDGWBF[0m
4 | [2023-05-16 11:16:33,512] [data.py:75] Fetching /Users/v/Documents/PromptOptimizerProj/evals/evals/registry/data/logiqa/temp.jsonl
5 | [2023-05-16 11:16:33,513] [eval.py:34] Evaluating 100 samples
6 | [2023-05-16 11:16:33,520] [eval.py:153] Running in threaded mode with 10 threads!
7 | [2023-05-16 11:16:46,158] [record.py:330] Logged 100 rows of events to results/SynonymReplace_Optim_p_1.0.jsonl: insert_time=20.366ms
8 | [2023-05-16 11:17:18,070] [_common.py:105] Backing off openai_chat_completion_create_retrying(...) for 1.3s (openai.error.RateLimitError: That model is currently overloaded with other requests. You can retry your request, or contact us through our help center at help.openai.com if the error persists. (Please include the request ID fd77f0d52eedd5ff46ce3071ff845099 in your message.))
9 | [2023-05-16 11:17:21,155] [record.py:330] Logged 100 rows of events to results/SynonymReplace_Optim_p_1.0.jsonl: insert_time=24.062ms
10 | [2023-05-16 11:17:21,163] [record.py:341] Final report: {'accuracy': 0.33}. Logged to results/SynonymReplace_Optim_p_1.0.jsonl
11 | [2023-05-16 11:17:21,163] [oaieval.py:147] Final report:
12 | [2023-05-16 11:17:21,163] [oaieval.py:149] accuracy: 0.33
13 |
--------------------------------------------------------------------------------
/evaluations/make_artifacts.py:
--------------------------------------------------------------------------------
1 | import os
2 | import utils
3 | import plotly.graph_objects as go
4 | import plotly.io as pio
5 | import pandas as pd
6 |
7 |
8 | df = pd.read_csv("results.csv")
9 |
10 | df.columns = ["Name", "% Tokens Reduced", "LogiQA Accuracy"]
11 | df["USD Saved Per $100"] = df["% Tokens Reduced"] * 100
12 | df = df.round(2)
13 | utils.dataframe_to_markdown(df, os.path.join("artifacts", f"table.md"))
14 |
15 |
16 | for col in df.columns[1:]:
17 | # Plotting
18 | x = df.Name
19 |
20 | fig = go.Figure(
21 | data=[go.Bar(x=x, y=df[col], text=df[col], textposition="auto", name=col)]
22 | )
23 |
24 | fig.update_layout(
25 | title=f"Comparison for {col}",
26 | yaxis=dict(title=col),
27 | xaxis_tickangle=-45,
28 | barmode="group",
29 | )
30 |
31 | pio.write_image(
32 | fig, os.path.join("artifacts", f"{col}_graph.png".replace("\\", "")), scale=2
33 | )
34 |
--------------------------------------------------------------------------------
/evaluations/make_errors.py:
--------------------------------------------------------------------------------
1 | import random
2 |
3 |
4 | def introduce_spelling_errors(sentence, error_rate=0.079):
5 | """according to grammarly, people make 7.9 errors per 100 words"""
6 | words = sentence.split()
7 | num_errors = int(len(words) * error_rate)
8 | for _ in range(num_errors):
9 | word_index = random.randint(0, len(words) - 1)
10 | word = words[word_index]
11 | char_index = random.randint(0, len(word) - 1)
12 | new_char = random.choice(
13 | [chr(i) for i in range(97, 123)]
14 | ) # Random lowercase letter
15 | words[word_index] = word[:char_index] + new_char + word[char_index + 1 :]
16 | return " ".join(words)
17 |
18 |
19 | def run(json_data, error_rate=0.079):
20 | for json_string in json_data:
21 | json_string["content"] = introduce_spelling_errors(
22 | json_string["content"], error_rate
23 | )
24 | return json_data
25 |
26 |
27 | # sentence = "This is a sample sentence for testing."
28 | # error_rate = 0.079
29 | # result = introduce_spelling_errors(sentence, error_rate)
30 | # print(result)
31 |
--------------------------------------------------------------------------------
/evaluations/results.csv:
--------------------------------------------------------------------------------
1 | name,tokens_opti_metric,accuracy
2 | Default,0.0,0.32
3 | Entropy_Optim_p_0.05,0.06354827671009917,0.3
4 | Entropy_Optim_p_0.1,0.11187882464200333,0.28
5 | Entropy_Optim_p_0.25,0.264708657814639,0.22
6 | Entropy_Optim_p_0.5,0.4965456587511314,0.08
7 | SynonymReplace_Optim_p_1.0,0.010552199050304767,0.33
8 | Lemmatizer_Optim,0.010102273794581817,0.33
9 | Stemmer_Optim,-0.05913231081899146,0.09
10 | NameReplace_Optim,0.011329279462348097,0.34
11 | Punctuation_Optim,0.12810019014299953,0.35
12 | Autocorrect_Optim,0.011435464848382511,0.3
13 | Pulp_Optim_p_0.05,0.05493628125175053,0.31
14 | Pulp_Optim_p_0.1,0.09521899460726639,0.25
--------------------------------------------------------------------------------
/evaluations/sample_logs/generate_db.py:
--------------------------------------------------------------------------------
1 | import os
2 | import random
3 | import string
4 | import openai
5 | from prompt_optimizer.poptim import StopWordOptim
6 | from prompt_optimizer.wrapper.sql_db import SQLDBManager
7 | from prompt_optimizer.wrapper.openai import OpenAIWrapper
8 | from dotenv import load_dotenv
9 |
10 | load_dotenv()
11 | openai.api_key = os.getenv("OPENAI_API_KEY")
12 |
13 |
14 | def generate_sample_db():
15 | p_optimizer = StopWordOptim(verbose=True)
16 | sql_db = SQLDBManager("sample_project", "/Users/vaibkumr/Documents/sample.db")
17 | oai_wrapper = OpenAIWrapper(sql_db, p_optimizer)
18 | n = 100
19 | for i in range(n):
20 | x = random.choice(string.ascii_letters)
21 | response = oai_wrapper(
22 | openai.ChatCompletion.create,
23 | model="gpt-3.5-turbo",
24 | messages=[
25 | {"role": "user", "content": f"Generate some text following the character: {x}"},
26 | ]
27 | )
28 | print(f"{[i]/[n]} {response}")
29 |
30 |
31 |
32 | if __name__ == "__main__":
33 | generate_sample_db()
--------------------------------------------------------------------------------
/evaluations/tradeoff.py:
--------------------------------------------------------------------------------
1 | import pandas as pd
2 | import matplotlib.pyplot as plt
3 |
4 | df = pd.read_csv("results.csv")
5 | df.columns = ["Name", "% Tokens Reduced", "LogiQA Accuracy"]
6 | df["$ Saved Per $100"] = df["% Tokens Reduced"] * 100
7 | df = df.round(2)
8 |
9 | df = df[df.Name.str.contains('Entropy_Optim')]
10 | cost = df["$ Saved Per $100"].values
11 | accuracy = df["LogiQA Accuracy"].values
12 |
13 | plt.figure(dpi=300)
14 |
15 | plt.plot(cost, accuracy, 'k-')
16 | plt.plot(cost, accuracy, 'r^')
17 | plt.xlabel('Savings: \$100 -> \$')
18 | plt.ylabel('OpenAI Eval LogiQA Accuracy', fontweight='bold', fontsize=10)
19 | plt.title('Accuracy vs. Cost Tradeoff for `EntropyOptim`', fontweight='bold', fontsize=10)
20 |
21 | labels = [
22 | "p=0.05",
23 | "p=0.10",
24 | "p=0.25",
25 | "p=0.50",
26 | ]
27 | # Plotting
28 | for i in range(cost.shape[0]):
29 | plt.text(cost[i], accuracy[i], labels[i], fontweight='bold', fontsize=10)
30 |
31 |
32 | plt.grid(True)
33 | save_path = 'artifacts/tradeoff.png'
34 | plt.savefig(save_path, bbox_inches="tight")
--------------------------------------------------------------------------------
/evaluations/utils.py:
--------------------------------------------------------------------------------
1 | import os
2 | import json
3 | import yaml
4 | import subprocess
5 | import csv
6 | import os
7 |
8 | # Most of the code here is written by chatgpt
9 |
10 |
11 | def dataframe_to_markdown(df, md_path):
12 | markdown = "| " + " | ".join(df.columns) + " |\n"
13 | markdown += "| " + " | ".join(["---"] * len(df.columns)) + " |\n"
14 |
15 | for _, row in df.iterrows():
16 | markdown += "| " + " | ".join(str(value) for value in row) + " |\n"
17 |
18 | with open(md_path, "w") as handle:
19 | handle.write(markdown)
20 |
21 | return markdown
22 |
23 |
24 | def save_results(dictionary, file_path):
25 | file_exists = os.path.isfile(file_path)
26 |
27 | with open(file_path, "a", newline="") as csvfile:
28 | writer = csv.DictWriter(csvfile, fieldnames=dictionary.keys())
29 | if not file_exists:
30 | writer.writeheader()
31 | writer.writerow(dictionary)
32 |
33 |
34 | def read_yaml(file_path):
35 | with open(file_path, "r") as file:
36 | data = yaml.safe_load(file)
37 | return data
38 |
39 |
40 | def write_yaml(data, file_path):
41 | with open(file_path, "w") as file:
42 | yaml.dump(data, file)
43 |
44 |
45 | def read_jsonl(file_path):
46 | with open(file_path, "r") as f:
47 | lines = f.readlines()
48 | json_list = []
49 | for line in lines:
50 | json_obj = json.loads(line)
51 | json_list.append(json_obj)
52 | return json_list
53 |
54 |
55 | def write_jsonl(data, file_path):
56 | with open(file_path, "w") as f:
57 | for obj in data:
58 | f.write(json.dumps(obj) + "\n")
59 |
60 |
61 | def run_bash(bash_command):
62 | process = subprocess.Popen(bash_command, shell=True)
63 | process.wait()
64 |
--------------------------------------------------------------------------------
/examples/bertscore_metric.py:
--------------------------------------------------------------------------------
1 | from prompt_optimizer.metric import BERTScoreMetric
2 | from prompt_optimizer.poptim import StopWordOptim
3 |
4 |
5 | prompt = """The Belle Tout Lighthouse is a decommissioned lighthouse and British landmark located at Beachy Head, East Sussex, close to the town of Eastbourne. The cliffs near Beachy Head saw numerous shipwrecks in the 17th and early 18th centuries and a petition to erect a lighthouse started around 1691. Despite this, the lighthouse was not built until 1828, initially as a temporary wooden structure, and then as a permanent granite lighthouse which was designed by Thomas Stevenson and became operational in 1834. The light was provided by a three-sided rotating array of oil lamps with ten lamps on each side, each lamp mounted within a parabolic reflector. The Belle Tout lighthouse was decommissioned in 1902, when the replacement Beachy Head Lighthouse was built at the bottom of the cliffs. In 1999, the Grade II listed building was moved in one piece to prevent it from succumbing to coastal erosion, and since 2010 it has operated as a bed and breakfast."""
6 | p_optimizer = StopWordOptim(metrics=[BERTScoreMetric()])
7 |
8 | res = p_optimizer(prompt)
9 |
10 | print(f"Optmized Prompt: {res.content}")
11 | for key, value in res.metrics[0].items():
12 | print(f"{key}: {value:.3f}")
13 |
--------------------------------------------------------------------------------
/examples/cli/data/example.jsonl:
--------------------------------------------------------------------------------
1 | [{"role":"system","content":"you are a python programmer"},{"user":"system","content":"plot the sierpiński triangle"}]
2 | [{"role":"system","content":"you are a python programmer"},{"user":"system","content":"plot the sierpiński carpet"}]
3 | [{"role":"system","content":"you are a python programmer"},{"user":"system","content":"plot the newton fractal"}]
4 | [{"role":"system","content":"you are a python programmer"},{"user":"system","content":"plot the buddhabrot set"}]
--------------------------------------------------------------------------------
/examples/cli/json_stopwordoptim.sh:
--------------------------------------------------------------------------------
1 | prompt-optimizer data/example.jsonl "StopWordOptim" --json True --metrics "TokenMetric"
--------------------------------------------------------------------------------
/examples/cli/string_stopwordoptim.sh:
--------------------------------------------------------------------------------
1 | prompt-optimizer "write a python program to plot the buddhabrot set" StopWordOptim --metrics TokenMetric
--------------------------------------------------------------------------------
/examples/entropy_optimizer.py:
--------------------------------------------------------------------------------
1 | from prompt_optimizer.metric import TokenMetric
2 | from prompt_optimizer.poptim import EntropyOptim
3 | from prompt_optimizer.visualize import StringDiffer
4 |
5 |
6 | prompt = """The Belle Tout Lighthouse is a decommissioned lighthouse and British landmark located at Beachy Head, East Sussex, close to the town of Eastbourne."""
7 | p_optimizer = EntropyOptim(verbose=False, p=0.1, metrics=[TokenMetric()])
8 | optimized_prompt = p_optimizer(prompt).content
9 | sd = StringDiffer()
10 | sd(prompt, optimized_prompt)
11 |
12 |
--------------------------------------------------------------------------------
/examples/json_support.py:
--------------------------------------------------------------------------------
1 | from prompt_optimizer.metric import TokenMetric
2 | from prompt_optimizer.poptim import EntropyOptim
3 |
4 |
5 | prompt = [{"role":"system","content":"The following are binary choice questions (with answers) about whether it is moral to break common rules under some specific circumstances. Please answer the questions with Yes or No."},{"role":"user","content":"Imagine that a stranger tells Hank to spill several gallons of bleach on his neighbor's front lawn without his neighbor's permission. If Hank refuses, the stranger will shoot and kill Hank's son. Hank carries out the stranger's request. Is this morally acceptable? \nYes\nNo\nAnswer:"}]
6 |
7 |
8 | p_optimizer = EntropyOptim(verbose=True, p=0.5, metrics=[TokenMetric()])
9 | res = p_optimizer(prompt, json=True, skip_system=True)
10 |
11 | optimized_prompt = res.content
12 | metrics = res.metrics
13 |
14 | print(f"optimized_prompt: {optimized_prompt}")
15 | print(f"metrics: {metrics}")
16 |
--------------------------------------------------------------------------------
/examples/langchain_support.py:
--------------------------------------------------------------------------------
1 | from prompt_optimizer.metric import TokenMetric
2 | from prompt_optimizer.poptim import EntropyOptim
3 | from langchain.schema import (
4 | HumanMessage,
5 | SystemMessage
6 | )
7 |
8 | messages = [
9 | SystemMessage(content="You are a helpful assistant that translates English to French."),
10 | HumanMessage(content="I love programming.")
11 | ]
12 |
13 | p_optimizer = EntropyOptim(verbose=True, p=0.5, metrics=[TokenMetric()])
14 | optim_batch_messages = p_optimizer(messages, langchain=True)
15 |
16 | print(messages)
17 | print(optim_batch_messages)
18 |
--------------------------------------------------------------------------------
/examples/protect_tags.py:
--------------------------------------------------------------------------------
1 | from prompt_optimizer.poptim import PunctuationOptim
2 |
3 |
4 | prompt = """The Belle Tout Lighthouse (!!) is a decommissioned lighthouse and British landmark located at Beachy Head, East Sussex, close to the town of Eastbourne."""
5 | p_optimizer = PunctuationOptim(verbose=True, protect_tag="pt")
6 | optimized_prompt = p_optimizer(prompt).content
7 | print("optimized_prompt: ", optimized_prompt)
8 |
--------------------------------------------------------------------------------
/examples/sequential.py:
--------------------------------------------------------------------------------
1 | from prompt_optimizer.metric import TokenMetric
2 | from prompt_optimizer.poptim import (
3 | AutocorrectOptim,
4 | LemmatizerOptim,
5 | PunctuationOptim,
6 | Sequential,
7 | )
8 |
9 |
10 | prompt = """The Belle Tout Lighthouse is a decommissioned lighthouse and British landmark located at Beachy Head, East Sussex, close to the town of Eastbourne. The cliffs near Beachy Head saw numerous shipwrecks in the 17th and early 18th centuries and a petition to erect a lighthouse started around 1691. Despite this, the lighthouse was not built until 1828, initially as a temporary wooden structure, and then as a permanent granite lighthouse which was designed by Thomas Stevenson and became operational in 1834. The light was provided by a three-sided rotating array of oil lamps with ten lamps on each side, each lamp mounted within a parabolic reflector. The Belle Tout lighthouse was decommissioned in 1902, when the replacement Beachy Head Lighthouse was built at the bottom of the cliffs. In 1999, the Grade II listed building was moved in one piece to prevent it from succumbing to coastal erosion, and since 2010 it has operated as a bed and breakfast."""
11 | p_optimizer = Sequential(
12 | LemmatizerOptim(metrics=[TokenMetric()]),
13 | PunctuationOptim(metrics=[TokenMetric()]),
14 | AutocorrectOptim(metrics=[TokenMetric()]),
15 | )
16 | optimized_prompt = p_optimizer(prompt)
17 | print(optimized_prompt)
18 |
--------------------------------------------------------------------------------
/prompt_optimizer/__init__.py:
--------------------------------------------------------------------------------
1 | from prompt_optimizer.metric import BERTScoreMetric, Metric, TokenMetric
2 | from prompt_optimizer.poptim import (
3 | LemmatizerOptim,
4 | NameReplaceOptim,
5 | PromptOptim,
6 | PulpOptim,
7 | PunctuationOptim,
8 | StemmerOptim,
9 | StopWordOptim,
10 | )
11 | from prompt_optimizer.visualize import StringDiffer
12 |
13 | __all__ = [
14 | "StringDiffer",
15 | "Metric",
16 | "BERTScoreMetric",
17 | "TokenMetric",
18 | "PromptOptim",
19 | "LemmatizerOptim",
20 | "StopWordOptim",
21 | "NameReplaceOptim",
22 | "PunctuationOptim",
23 | "PulpOptim",
24 | "StemmerOptim",
25 | "AutocorrectOptim",
26 | "SynonymReplaceOptim",
27 | "EntropyOptim",
28 | ]
29 |
--------------------------------------------------------------------------------
/prompt_optimizer/cli/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vaibkumr/prompt-optimizer/e57a6283cda1491cd8dd00fd9e29b10735fc5806/prompt_optimizer/cli/__init__.py
--------------------------------------------------------------------------------
/prompt_optimizer/cli/main.py:
--------------------------------------------------------------------------------
1 | import argparse
2 | import inspect
3 | import json
4 | import os
5 | import sys
6 | from typing import Any, List, Union
7 |
8 | import prompt_optimizer
9 | from prompt_optimizer.metric import *
10 | from prompt_optimizer.poptim import *
11 |
12 |
13 | def write_data(data: Union[object, List[object]], file_path: str) -> None:
14 | """
15 | Writes data to a file in JSON format.
16 |
17 | Args:
18 | data (Union[object, List[object]]): The data to be written. It can be a single object or a list of objects.
19 | file_path (str): The path to the file where the data will be written.
20 |
21 | Returns:
22 | None
23 | """
24 | if not isinstance(data, (list, object)):
25 | raise TypeError("The 'data' argument must be an object or a list of objects.")
26 |
27 | if not isinstance(data, list):
28 | data = [data]
29 |
30 | try:
31 | with open(file_path, "a+") as f:
32 | for obj in data:
33 | f.write(json.dumps(obj) + "\n")
34 | except IOError:
35 | raise IOError("An error occurred while writing to the file.")
36 |
37 |
38 | def read_jsonl(file_path: str) -> List[object]:
39 | """
40 | Reads a file in JSONL format and returns a list of JSON objects.
41 |
42 | Args:
43 | file_path (str): The path to the JSONL file.
44 |
45 | Returns:
46 | List[object]: A list of JSON objects parsed from the file.
47 | """
48 | try:
49 | with open(file_path, "r") as f:
50 | lines = f.readlines()
51 | json_list = []
52 | for line in lines:
53 | try:
54 | json_obj = json.loads(line)
55 | json_list.append(json_obj)
56 | except json.JSONDecodeError as e:
57 | raise json.JSONDecodeError(
58 | f"Error decoding JSON object: {e.msg}", e.doc, e.pos
59 | )
60 | except IOError:
61 | raise IOError("An error occurred while reading the file.")
62 |
63 | return json_list
64 |
65 |
66 | def read_txt(file_path: str) -> List[str]:
67 | """
68 | Reads a text file and returns a list of lines.
69 |
70 | Args:
71 | file_path (str): The path to the text file.
72 |
73 | Returns:
74 | List[str]: A list of lines read from the file.
75 |
76 | """
77 | try:
78 | with open(file_path, "r") as f:
79 | lines = f.readlines()
80 | except IOError:
81 | raise IOError("An error occurred while reading the file.")
82 |
83 | return lines
84 |
85 |
86 | def read_data(file_path: str, json: bool) -> List[object]:
87 | """
88 | Reads data from a file either in JSONL format or plain text format.
89 |
90 | Args:
91 | file_path (str): The path to the file.
92 | json (bool): Specifies whether the file is in JSONL format (True) or plain text format (False).
93 |
94 | Returns:
95 | List[object]: A list of objects parsed from the file.
96 |
97 | """
98 | if json:
99 | return read_jsonl(file_path)
100 | else:
101 | return read_txt(file_path)
102 |
103 |
104 | def run_optimize(
105 | optimizer_obj: prompt_optimizer.PromptOptim,
106 | prompt: str,
107 | json: bool,
108 | skip_system: bool,
109 | ) -> Any:
110 | """
111 | Runs an optimizer object with the specified parameters.
112 |
113 | Args:
114 | optimizer_obj (prompt_optimizer.PromptOptim): The optimizer object to be run.
115 | prompt (str): The prompt for the optimizer.
116 | json (bool): Specifies whether to process the prompt as JSON (True) or plain text (False).
117 | skip_system (bool): Specifies whether to skip the system response in the optimization (True) or include it (False).
118 |
119 | Returns:
120 | Any: The result of running the optimizer object.
121 | """
122 | print(f"!!! prompt: {prompt}")
123 | return optimizer_obj(prompt, json=json, skip_system=skip_system)
124 |
125 |
126 | def print_result(res: Any) -> None:
127 | """
128 | Prints the result or a list of results.
129 |
130 | Args:
131 | res (Any): The result to be printed. It can be a single result object or a list of results.
132 |
133 | """
134 | if isinstance(res, list):
135 | for r in res:
136 | print(r)
137 | else:
138 | print(res)
139 |
140 |
141 | def run(args: argparse.Namespace) -> None:
142 | """
143 | Runs the optimization process based on the provided CLI arguments.
144 |
145 | Args:
146 | args (argparse.Namespace): The CLI arguments for running the optimization.
147 |
148 | Returns:
149 | None
150 |
151 | """
152 | try:
153 | poptimizer_class = getattr(sys.modules[__name__], args.optimizer_name)
154 | except AttributeError:
155 | implemented_optims = inspect.getmembers(prompt_optimizer.poptim)
156 | implemented_optims = [
157 | member[0] for member in implemented_optims if inspect.isclass(member[1])
158 | ]
159 | raise NotImplementedError(
160 | f"Optimizer `{args.optimizer_name}` not implemented.\nChoose one of: {implemented_optims}"
161 | )
162 |
163 | metrics = []
164 | for metric in args.metrics:
165 | try:
166 | metrics.append(getattr(sys.modules[__name__], metric)())
167 | except AttributeError:
168 | implemented_metrics = inspect.getmembers(prompt_optimizer.metric)
169 | implemented_metrics = [
170 | member[0]
171 | for member in implemented_metrics
172 | if inspect.isclass(member[1])
173 | ]
174 | raise NotImplementedError(
175 | f"Metric `{metric}` not implemented!\nChoose one of: {implemented_metrics}"
176 | )
177 |
178 | poptimizer = poptimizer_class(*args.optimizer_args, verbose=False, metrics=metrics)
179 |
180 | current_directory = os.getcwd()
181 | full_path = os.path.join(current_directory, args.prompt_data_or_path)
182 | print(f"full_path: {full_path}")
183 | if os.path.exists(full_path):
184 | prompts = read_data(full_path, args.json)
185 | res = [
186 | run_optimize(poptimizer, prompt, args.json, args.skip_system)
187 | for prompt in prompts
188 | ]
189 | else:
190 | res = run_optimize(
191 | poptimizer, args.prompt_data_or_path, args.json, args.skip_system
192 | )
193 |
194 | if args.log_file is not None:
195 | write_data(res, args.log_file)
196 | else:
197 | print_result(res)
198 |
199 |
200 | def main():
201 | """Main entrypoint for the Optimizer CLI."""
202 | parser = argparse.ArgumentParser(description="Prompt Optimizer CLI")
203 |
204 | parser.add_argument(
205 | "prompt_data_or_path",
206 | help="Either the prompt data (string or json string) or path to a file containing new line separated prompt data.",
207 | )
208 | parser.add_argument("optimizer_name", help="Name of the optimizer.")
209 | parser.add_argument("--json", default=False, help="Prompt format JSON or not.")
210 | parser.add_argument(
211 | "--skip_system",
212 | default=False,
213 | help="Skip system prompts or not. Only valid if `json` is True.",
214 | )
215 | parser.add_argument(
216 | "--optimizer_args",
217 | nargs="*",
218 | default=[],
219 | help="Additional arguments for the optimizer.",
220 | )
221 | parser.add_argument(
222 | "--metrics", nargs="*", default=[], help="List of metrics to compute."
223 | )
224 | parser.add_argument(
225 | "--log_file",
226 | default=None,
227 | help="Output file to append results to. Prints on `stdout` if `None`.",
228 | )
229 |
230 | args = parser.parse_args()
231 | run(args)
232 |
233 |
234 | if __name__ == "__main__":
235 | main()
236 |
--------------------------------------------------------------------------------
/prompt_optimizer/metric/__init__.py:
--------------------------------------------------------------------------------
1 | from prompt_optimizer.metric.base import Metric
2 | from prompt_optimizer.metric.bertscore_metric import BERTScoreMetric
3 | from prompt_optimizer.metric.token_metric import TokenMetric
4 |
5 | __all__ = ["Metric", "BERTScoreMetric", "TokenMetric"]
6 |
--------------------------------------------------------------------------------
/prompt_optimizer/metric/base.py:
--------------------------------------------------------------------------------
1 | from abc import ABC, abstractmethod
2 | from collections import defaultdict
3 |
4 |
5 | class Metric(ABC):
6 | def __init__(self):
7 | self.key = None
8 |
9 | @abstractmethod
10 | def run(self, prompt_before: str, prompt_after: str) -> dict:
11 | """
12 | Abstract method to run the metric on the given prompts.
13 |
14 | Args:
15 | prompt_before (str): The prompt before the modification.
16 | prompt_after (str): The prompt after the modification.
17 |
18 | Returns:
19 | dict: The result of the metric computation.
20 | """
21 | pass
22 |
23 | def run_json(self, json_data_before: dict, json_data_after: dict) -> dict:
24 | """
25 | Runs the metric on the content of JSON data.
26 |
27 | Args:
28 | json_data_before (dict): JSON data before the modification with "content" key.
29 | json_data_after (dict): JSON data after the modification with "content" key.
30 |
31 | Returns:
32 | dict: The result of the metric computation.
33 | """
34 | res = self.run(json_data_before["content"], json_data_after["content"])
35 | return res
36 |
37 | def batch_run(
38 | self,
39 | prompts_before: list,
40 | prompts_after: list,
41 | skip_system: bool = False,
42 | json: bool = False,
43 | langchain: bool = False,
44 | ) -> float:
45 | """
46 | Runs the metric on a batch of prompts.
47 |
48 | Args:
49 | prompts_before (list): List of prompts before the modification.
50 | prompts_after (list): List of prompts after the modification.
51 | skip_system (bool, optional): Whether to skip prompts with "system" role. Defaults to False.
52 | json (bool, optional): Whether the prompts are JSON data. Defaults to False.
53 | langchain (bool, optional): Whether the prompts are langchain chat data. Defaults to False.
54 |
55 | Returns:
56 | float: The average metric value across the batch.
57 | """
58 | avg_m = defaultdict(float)
59 | n = 0
60 | for pb, pa in zip(prompts_before, prompts_after):
61 | if json:
62 | if skip_system and pb["role"] == "system":
63 | continue
64 | else:
65 | res = self.run_json(pb, pa)
66 | n += 1
67 |
68 | elif langchain:
69 | if skip_system and pb.role == "system":
70 | continue
71 | else:
72 | res = self.run(pb.content, pa.content)
73 | n += 1
74 |
75 | else:
76 | res = self.run(pb, pa)
77 | n += 1
78 |
79 | for key in res:
80 | avg_m[key] += res[key]
81 |
82 | for key in avg_m:
83 | avg_m[key] /= n
84 |
85 | return avg_m
86 |
87 | def __call__(self, prompt_before: str, prompt_after: str) -> dict:
88 | """
89 | Callable method to run the metric on the given prompts.
90 |
91 | Args:
92 | prompt_before (str): The prompt before the modification.
93 | prompt_after (str): The prompt after the modification.
94 |
95 | Returns:
96 | dict: The result of the metric computation.
97 | """
98 | return self.run(prompt_before, prompt_after)
99 |
--------------------------------------------------------------------------------
/prompt_optimizer/metric/bertscore_metric.py:
--------------------------------------------------------------------------------
1 | import torch
2 | from transformers import AutoModelForSequenceClassification, AutoTokenizer
3 |
4 | from prompt_optimizer.metric.base import Metric
5 |
6 |
7 | class BERTScoreMetric(Metric):
8 | """
9 | BERTScoreMetric is a metric that calculates precision, recall, and F1 score based on BERT embeddings.
10 | It inherits from the Metric base class.
11 |
12 | Example:
13 | >>> from prompt_optimizer.metric import BERTScoreMetric
14 | >>> metric = BERTScoreMetric()
15 | >>> res = metric("default prompt...", "optimized prompt...")
16 | """
17 |
18 | def __init__(self):
19 | super().__init__()
20 | self.tokenizer = AutoTokenizer.from_pretrained("bert-base-uncased")
21 | self.model = AutoModelForSequenceClassification.from_pretrained(
22 | "bert-base-uncased", num_labels=2
23 | )
24 |
25 | def run(self, prompt_before: str, prompt_after: str) -> dict:
26 | """
27 | Calculates precision, recall, and F1 score based on BERT embeddings.
28 |
29 | Args:
30 | prompt_before (str): The text before the prompt.
31 | prompt_after (str): The text after the prompt.
32 |
33 | Returns:
34 | dict: A dictionary containing the precision, recall, and F1 score.
35 | """
36 | inputs = self.tokenizer(
37 | [prompt_before, prompt_after],
38 | return_tensors="pt",
39 | padding=True,
40 | truncation=True,
41 | )
42 | outputs = self.model(**inputs, output_hidden_states=True)
43 | embedding1 = outputs.hidden_states[-2][0]
44 | embedding2 = outputs.hidden_states[-2][1]
45 | cos_sim = torch.nn.functional.cosine_similarity(embedding1, embedding2)
46 | precision, recall, f1 = (
47 | cos_sim.mean().item(),
48 | cos_sim.max().item(),
49 | 2
50 | * cos_sim.mean().item()
51 | * cos_sim.max().item()
52 | / (cos_sim.mean().item() + cos_sim.max().item()),
53 | )
54 | return {
55 | "bert_score_precision": precision,
56 | "bert_score_recall": recall,
57 | "bert_score_f1": f1,
58 | }
59 |
--------------------------------------------------------------------------------
/prompt_optimizer/metric/token_metric.py:
--------------------------------------------------------------------------------
1 | import tiktoken
2 |
3 | from prompt_optimizer.metric.base import Metric
4 |
5 |
6 | class TokenMetric(Metric):
7 | """
8 | TokenMetric is a metric that calculates the optimization ratio based on the number of tokens reduced.
9 | It uses `tiktoken` to tokenize strings and count the number of tokens.
10 |
11 | It inherits from the Metric base class.
12 |
13 | Example:
14 | >>> from prompt_optimizer.metric import TokenMetric
15 | >>> metric = TokenMetric()
16 | >>> res = metric("default prompt...", "optimized prompt...")
17 | """
18 |
19 | def __init__(self, tokenizer: str = "cl100k_base"):
20 | """
21 | Initializes the TokenMetric.
22 |
23 | Args:
24 | tokenizer (str, optional): The tokenizer to use. Defaults to "cl100k_base".
25 | """
26 | super().__init__()
27 | self.tokenizer = tiktoken.get_encoding(tokenizer)
28 | self.key = "num_token_opti_ratio"
29 |
30 | def run(self, prompt_before: str, prompt_after: str) -> dict:
31 | """
32 | Calculates the optimization ratio based on the number of tokens.
33 |
34 | Args:
35 | prompt_before (str): The text before the prompt.
36 | prompt_after (str): The text after the prompt.
37 |
38 | Returns:
39 | dict: A dictionary containing the optimization ratio.
40 | """
41 | n_tokens_before = len(self.tokenizer.encode(prompt_before))
42 | n_tokens_after = len(self.tokenizer.encode(prompt_after))
43 | opti_ratio = (n_tokens_before - n_tokens_after) / n_tokens_before
44 | return {self.key: opti_ratio}
45 |
46 | def __call__(self, prompt_before: str, prompt_after: str) -> dict:
47 | """
48 | Calls the run method to calculate the optimization ratio.
49 |
50 | Args:
51 | prompt_before (str): The text before the prompt.
52 | prompt_after (str): The text after the prompt.
53 |
54 | Returns:
55 | dict: A dictionary containing the optimization ratio.
56 | """
57 | return self.run(prompt_before, prompt_after)
58 |
--------------------------------------------------------------------------------
/prompt_optimizer/poptim/__init__.py:
--------------------------------------------------------------------------------
1 | import logging
2 |
3 | from prompt_optimizer.poptim.autocorrect_optim import AutocorrectOptim
4 | from prompt_optimizer.poptim.base import PromptOptim
5 | from prompt_optimizer.poptim.entropy_optim import EntropyOptim
6 | from prompt_optimizer.poptim.lemmatizer_optim import LemmatizerOptim
7 | from prompt_optimizer.poptim.name_replace_optim import NameReplaceOptim
8 | from prompt_optimizer.poptim.pulp_optim import PulpOptim
9 | from prompt_optimizer.poptim.punctuation_optim import PunctuationOptim
10 | from prompt_optimizer.poptim.sequential import Sequential
11 | from prompt_optimizer.poptim.stemmer_optim import StemmerOptim
12 | from prompt_optimizer.poptim.stop_word_optim import StopWordOptim
13 | from prompt_optimizer.poptim.synonym_replace_optim import SynonymReplaceOptim
14 |
15 | __all__ = [
16 | "Sequential",
17 | "PromptOptim",
18 | "LemmatizerOptim",
19 | "StopWordOptim",
20 | "NameReplaceOptim",
21 | "PunctuationOptim",
22 | "PulpOptim",
23 | "StemmerOptim",
24 | "AutocorrectOptim",
25 | "SynonymReplaceOptim",
26 | "EntropyOptim",
27 | ]
28 |
29 | logger = logging.getLogger(__name__)
30 |
--------------------------------------------------------------------------------
/prompt_optimizer/poptim/autocorrect_optim.py:
--------------------------------------------------------------------------------
1 | from autocorrect import Speller
2 |
3 | from prompt_optimizer.poptim.base import PromptOptim
4 |
5 |
6 | class AutocorrectOptim(PromptOptim):
7 | """
8 | AutocorrectOptim is a prompt optimization technique that applies autocorrection to the prompt text.
9 | Correctly spelled words have less token count than incorrect ones. This is useful in scenarios where
10 | human client types the text.
11 |
12 | It inherits from the PromptOptim base class.
13 |
14 | Example:
15 | >>> from prompt_optimizer.poptim import AutocorrectOptim
16 | >>> p_optimizer = AutocorrectOptim()
17 | >>> res = p_optimizer("example prompt...")
18 | >>> optimized_prompt = res.content
19 | """
20 |
21 | def __init__(self, fast: bool = False, verbose: bool = False, metrics: list = []):
22 | """
23 | Initializes the AutocorrectOptim.
24 |
25 | Args:
26 | fast (bool, optional): Flag indicating whether to use a fast autocorrect implementation. Defaults to False.
27 | verbose (bool, optional): Flag indicating whether to enable verbose output. Defaults to False.
28 | metrics (list, optional): A list of metric names to evaluate during optimization. Defaults to an empty list.
29 | """
30 | super().__init__(verbose, metrics)
31 | self.spell = Speller(lang="en", fast=fast)
32 |
33 | def optimize(self, prompt: str) -> str:
34 | """
35 | Applies autocorrection to the prompt text.
36 |
37 | Args:
38 | prompt (str): The prompt text.
39 |
40 | Returns:
41 | str: The optimized prompt text after applying autocorrection.
42 | """
43 | words = prompt.split()
44 | autocorrected_words = [self.spell(word) for word in words]
45 | opti_prompt = " ".join(autocorrected_words)
46 | return opti_prompt
47 |
--------------------------------------------------------------------------------
/prompt_optimizer/poptim/base.py:
--------------------------------------------------------------------------------
1 | import copy
2 | from abc import ABC, abstractmethod
3 |
4 | from .logger import logger
5 | from .utils import DotDict, protected_runner
6 |
7 |
8 | class PromptOptim(ABC):
9 | """
10 | PromptOptim is an abstract base class for prompt optimization techniques.
11 |
12 | It defines the common structure and interface for prompt optimization.
13 |
14 | This class inherits from ABC (Abstract Base Class).
15 | """
16 |
17 | def __init__(
18 | self, verbose: bool = False, metrics: list = [], protect_tag: str = None
19 | ):
20 | """
21 | Initializes the PromptOptim.
22 |
23 | Args:
24 | verbose (bool, optional): Flag indicating whether to enable verbose output. Defaults to False.
25 | metrics (list, optional): A list of metric names to evaluate during optimization. Defaults to an empty list.
26 | protect_tag (str, optional): markup style tag string to indicate protected content that can't be deleted or modified. Defaults to `None`.
27 | """
28 | self.verbose = verbose
29 | self.metrics = metrics
30 | self.protect_tag = protect_tag
31 |
32 | @abstractmethod
33 | def optimize(self, prompt: str) -> str:
34 | """
35 | Abstract method to run the prompt optimization technique on a prompt.
36 |
37 | This method must be implemented by subclasses.
38 |
39 | Args:
40 | prompt (str): The prompt text.
41 |
42 | Returns:
43 | str: The optimized prompt text.
44 | """
45 | pass
46 |
47 | @protected_runner
48 | def run(self, prompt: str) -> str:
49 | """
50 | Wrapper around `optimize` to do protected optimization.
51 |
52 | Args:
53 | prompt (str): The prompt text.
54 |
55 | Returns:
56 | str: The protected optimized prompt text.
57 | """
58 | return self.optimize(prompt)
59 |
60 | def run_json(self, json_data: list, skip_system: bool = False) -> dict:
61 | """
62 | Applies prompt optimization to the JSON request object.
63 |
64 | Args:
65 | json_data (dict): The JSON data object.
66 |
67 | Returns:
68 | dict: The JSON data object with the content field replaced by the optimized prompt text.
69 | """
70 | optim_json_data = copy.deepcopy(json_data)
71 |
72 | for data in optim_json_data:
73 | if skip_system and data["role"] == "system":
74 | continue
75 | data["content"] = self.run(data["content"])
76 | return optim_json_data
77 |
78 | def run_langchain(self, langchain_data: list, skip_system: bool = False):
79 | """
80 | Runs the prompt optimizer on langchain chat data.
81 |
82 | Args:
83 | langchain_data (list): The langchain data containing 'type' and 'content' fields.
84 | skip_system (bool, optional): Whether to skip data with type 'system'. Defaults to False.
85 |
86 | Returns:
87 | list: The modified langchain data.
88 |
89 | """
90 |
91 | optim_langchain_data = copy.deepcopy(langchain_data)
92 |
93 | for data in optim_langchain_data:
94 | if skip_system and data.type == "system":
95 | continue
96 | data.content = self.run(data.content)
97 |
98 | return optim_langchain_data
99 |
100 | # def batch_run(
101 | # self, data: list, skip_system: bool = False, json: bool = True
102 | # ) -> list:
103 | # """
104 | # Applies prompt optimization to a batch of data.
105 |
106 | # Args:
107 | # data (list): A list of prompts or JSON data objects.
108 | # skip_system (bool, optional): Flag indicating whether to skip system role data objects. Defaults to False.
109 | # json (bool, optional): Flag indicating whether the input data is in JSON format. Defaults to True.
110 |
111 | # Returns:
112 | # list: A list of optimized prompts or JSON data objects.
113 | # """
114 | # optimized_data = []
115 | # for d in data:
116 | # if json:
117 | # optimized_data.append(self.run_json(d, skip_system))
118 | # else:
119 | # optimized_data.append(self.run(d))
120 | # return optimized_data
121 |
122 | def __call__(
123 | self,
124 | prompt_data: list,
125 | skip_system: bool = False,
126 | json: bool = False,
127 | langchain: bool = False,
128 | ) -> list:
129 | """
130 | Process the prompt data and return optimized prompt data.
131 |
132 | Args:
133 | prompt_data: A list of prompt data.
134 | skip_system: A boolean indicating whether to skip system prompts. Default is False.
135 | json: A boolean indicating whether the prompt data is in JSON format. Default is False.
136 | langchain: A boolean indicating whether the prompt data is in langchain format. Default is False.
137 |
138 | Returns:
139 | A list of optimized prompt data.
140 |
141 | Raises:
142 | AssertionError: If skip_system is True and json is False.
143 |
144 | """
145 |
146 | assert not (json and langchain), "Data type can't be both json and langchain"
147 |
148 | if skip_system:
149 | assert (
150 | json or langchain
151 | ), "Can't skip system prompts without batched json format"
152 |
153 | if json:
154 | opti_prompt_data = self.run_json(prompt_data, skip_system)
155 | elif langchain:
156 | opti_prompt_data = self.run_langchain(prompt_data, skip_system)
157 | else:
158 | opti_prompt_data = self.run(prompt_data)
159 |
160 | metric_results = []
161 | for metric in self.metrics:
162 | if json or langchain:
163 | metric_result = metric.batch_run(
164 | prompt_data, opti_prompt_data, skip_system, json, langchain
165 | )
166 | else:
167 | metric_result = metric.run(prompt_data, opti_prompt_data)
168 |
169 | metric_results.append(metric_result)
170 |
171 | if self.verbose:
172 | logger.info(f"Prompt Data Before: {prompt_data}")
173 | logger.info(f"Prompt Data After: {opti_prompt_data}")
174 | for metric_result in metric_results:
175 | for key in metric_result:
176 | logger.info(f"{key}: {metric_result[key]:.3f}")
177 |
178 | result = DotDict()
179 | result.content = opti_prompt_data
180 | result.metrics = metric_results
181 |
182 | return result
183 |
--------------------------------------------------------------------------------
/prompt_optimizer/poptim/entropy_optim.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import torch
3 | from transformers import AutoModelForMaskedLM, AutoTokenizer
4 |
5 | from prompt_optimizer.poptim.base import PromptOptim
6 |
7 |
8 | class EntropyOptim(PromptOptim):
9 | """
10 | EntropyOptim is a prompt optimization technique based on entropy values of tokens.
11 | A masked language model (`bert-base-cased` by default) is used to compute probabilities
12 | of observing the current token based on right and left context. These probability values
13 | are further used to compute the entropy values. Optimizer then moves on to remove the
14 | tokens corresponding to lowest `p` percentile entropies.
15 |
16 | The intuition of this method is that the model can infill low entropy i.e. low surprise
17 | or highly probable tokens through the context. I will probably write a paper to explain
18 | this in more detail.
19 |
20 | `EntropyOptim` inherits from the PromptOptim base class.
21 |
22 | Example:
23 | >>> from prompt_optimizer.poptim import EntropyOptim
24 | >>> p_optimizer = EntropyOptim(p=0.1)
25 | >>> res = p_optimizer("example prompt...")
26 | >>> optimized_prompt = res.content
27 |
28 | """
29 |
30 | def __init__(
31 | self,
32 | model_name: str = "bert-base-cased",
33 | p: float = 0.1,
34 | verbose: bool = False,
35 | metrics: list = [],
36 | **kwargs,
37 | ):
38 | """
39 | Initializes the EntropyOptim.
40 |
41 | Args:
42 | model_name (str, optional): The name of the pretrained masked language model. Defaults to "bert-base-cased".
43 | p (float, optional): The percentile cutoff value for selecting tokens. Defaults to `0.1`. Higher `p` means more compression.
44 | verbose (bool, optional): Flag indicating whether to enable verbose output. Defaults to False.
45 | metrics (list, optional): A list of metric names to evaluate during optimization. Defaults to an empty list.
46 | """
47 | super().__init__(verbose, metrics, **kwargs)
48 | self.p = p * 100
49 | self.model_name = model_name
50 | self.load_mlm_model_tokenizer()
51 |
52 | def load_mlm_model_tokenizer(self):
53 | """
54 | Loads the masked language model and tokenizer.
55 | """
56 | self.tokenizer = AutoTokenizer.from_pretrained(self.model_name)
57 | self.model = AutoModelForMaskedLM.from_pretrained(self.model_name)
58 | self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
59 | self.model.to(self.device)
60 |
61 | def generate_confidence_values(self, sentence: str) -> list:
62 | """
63 | Generates entropy values for each token in the sentence.
64 |
65 | Args:
66 | sentence (str): The input sentence.
67 |
68 | Returns:
69 | list: A list of tuples containing token IDs and their corresponding entropy values.
70 | """
71 | inputs = self.tokenizer.encode_plus(
72 | sentence, return_tensors="pt", add_special_tokens=False
73 | )
74 | input_ids = inputs["input_ids"].to(self.device)
75 | attention_mask = inputs["attention_mask"].to(self.device)
76 |
77 | with torch.no_grad():
78 | outputs = self.model(input_ids, attention_mask=attention_mask)
79 | logits = outputs.logits[0]
80 |
81 | probs = torch.softmax(logits, dim=-1)
82 | entropy_mapping = []
83 | for i, input_id in enumerate(input_ids[0].detach().cpu().numpy()):
84 | entropy = -torch.log2(probs[i, input_id]).detach().cpu().item()
85 | entropy_mapping.append((input_id, entropy))
86 | return entropy_mapping
87 |
88 | def percentile_cutoff_tokens(self, entropy_mapping: list) -> list:
89 | """
90 | Selects tokens with entropy values above a percentile cutoff.
91 |
92 | Args:
93 | entropy_mapping (list): A list of tuples containing token IDs and their corresponding entropy values.
94 |
95 | Returns:
96 | list: A list of selected token IDs.
97 | """
98 | surprise_cutoff = np.percentile([cm[1] for cm in entropy_mapping], self.p)
99 | filtered_tokens = [cm[0] for cm in entropy_mapping if cm[1] >= surprise_cutoff]
100 | return filtered_tokens
101 |
102 | def run_chunk(self, prompt: str) -> str:
103 | """
104 | Runs the prompt optimization technique on a chunk of the prompt.
105 |
106 | Args:
107 | prompt (str): The chunk of the prompt.
108 |
109 | Returns:
110 | str: The optimized chunk of the prompt.
111 | """
112 | entropy_mapping = self.generate_confidence_values(prompt)
113 | filtered_tokens = self.percentile_cutoff_tokens(entropy_mapping)
114 | optimized_prompt = self.tokenizer.decode(filtered_tokens)
115 | return optimized_prompt
116 |
117 | def optimize(self, prompt: str) -> str:
118 | """
119 | Runs the prompt optimization technique on the prompt.
120 | Args:
121 | prompt (str): The prompt text.
122 |
123 | Returns:
124 | str: The optimized prompt text.
125 | """
126 | max_l = int(0.7 * self.model.config.max_position_embeddings)
127 | tokens = prompt.split()
128 | opti_prompt = ""
129 | for idx in range(0, len(tokens), max_l):
130 | part_prompt = " ".join(tokens[idx : idx + max_l])
131 | part_opti_prompt = self.run_chunk(part_prompt)
132 | opti_prompt += part_opti_prompt
133 | return opti_prompt
134 |
--------------------------------------------------------------------------------
/prompt_optimizer/poptim/lemmatizer_optim.py:
--------------------------------------------------------------------------------
1 | import nltk
2 | from nltk.corpus import wordnet
3 | from nltk.stem import WordNetLemmatizer
4 |
5 | from prompt_optimizer.poptim.base import PromptOptim
6 |
7 |
8 | class LemmatizerOptim(PromptOptim):
9 | """
10 | LemmatizerOptim is a prompt optimization technique based on lemmatization.
11 |
12 | It inherits from the PromptOptim base class.
13 |
14 | Example:
15 | >>> from prompt_optimizer.poptim import LemmatizerOptim
16 | >>> p_optimizer = LemmatizerOptim()
17 | >>> res = p_optimizer("example prompt...")
18 | >>> optimized_prompt = res.content
19 | """
20 |
21 | def __init__(self, verbose: bool = False, metrics: list = []):
22 | """
23 | Initializes the LemmatizerOptim.
24 |
25 | Args:
26 | verbose (bool, optional): Flag indicating whether to enable verbose output. Defaults to False.
27 | metrics (list, optional): A list of metric names to evaluate during optimization. Defaults to an empty list.
28 | """
29 | super().__init__(verbose, metrics)
30 | self.lemmatizer = WordNetLemmatizer()
31 | nltk.download("averaged_perceptron_tagger")
32 | nltk.download("wordnet")
33 |
34 | def get_wordnet_pos(self, word: str) -> str:
35 | """
36 | Maps the POS tag from NLTK to WordNet POS tags.
37 |
38 | Args:
39 | word (str): The word to determine the POS tag.
40 |
41 | Returns:
42 | str: The WordNet POS tag.
43 | """
44 | tag = nltk.pos_tag([word])[0][1][0].upper()
45 | tag_dict = {
46 | "J": wordnet.ADJ,
47 | "N": wordnet.NOUN,
48 | "V": wordnet.VERB,
49 | "R": wordnet.ADV,
50 | }
51 | return tag_dict.get(tag, wordnet.NOUN)
52 |
53 | def optimize(self, prompt: str) -> str:
54 | """
55 | Runs the lemmatizer prompt optimization technique on the prompt.
56 |
57 | Args:
58 | prompt (str): The prompt text.
59 |
60 | Returns:
61 | str: The optimized prompt text.
62 | """
63 | words = prompt.split()
64 | lemmatized_words = [
65 | self.lemmatizer.lemmatize(word, self.get_wordnet_pos(word))
66 | for word in words
67 | ]
68 | opti_prompt = " ".join(lemmatized_words)
69 | return opti_prompt
70 |
--------------------------------------------------------------------------------
/prompt_optimizer/poptim/logger.py:
--------------------------------------------------------------------------------
1 | # TODO: Remove
2 |
3 | import logging
4 |
5 |
6 | def configure_logger(log_file=None):
7 | logger.setLevel(logging.INFO)
8 |
9 | formatter = logging.Formatter(
10 | "%(asctime)s - %(name)s - %(levelname)s - %(message)s"
11 | )
12 |
13 | stream_handler = logging.StreamHandler()
14 | stream_handler.setFormatter(formatter)
15 | logger.addHandler(stream_handler)
16 |
17 | if log_file:
18 | file_handler = logging.FileHandler(log_file)
19 | file_handler.setFormatter(formatter)
20 | logger.addHandler(file_handler)
21 |
22 |
23 | logger = logging.getLogger(__name__)
24 | configure_logger()
25 |
--------------------------------------------------------------------------------
/prompt_optimizer/poptim/name_replace_optim.py:
--------------------------------------------------------------------------------
1 | import nltk
2 |
3 | from prompt_optimizer.poptim.base import PromptOptim
4 |
5 |
6 | class NameReplaceOptim(PromptOptim):
7 | """
8 | NameReplaceOptim is a prompt optimization technique based on replacing names in the prompt.
9 | Some names have lower token count (1) than others. Higher token count names can be replaced by
10 | such names to reduce token complexity. `self.opti_names` contains the pre-made list of such names
11 | for `tiktokenizer`. The list will need to be modified for other tokenizers.
12 |
13 | It inherits from the PromptOptim base class.
14 |
15 | Example:
16 | >>> from prompt_optimizer.poptim import NameReplaceOptim
17 | >>> p_optimizer = NameReplaceOptim()
18 | >>> res = p_optimizer("example prompt...")
19 | >>> optimized_prompt = res.content
20 | """
21 |
22 | def __init__(self, verbose: bool = False, metrics: list = []):
23 | """
24 | Initializes the NameReplaceOptim.
25 |
26 | Args:
27 | verbose (bool, optional): Flag indicating whether to enable verbose output. Defaults to False.
28 | metrics (list, optional): A list of metric names to evaluate during optimization. Defaults to an empty list.
29 | """
30 | super().__init__(verbose, metrics)
31 | self.opti_names = self.get_opti_names()
32 |
33 | def download(self):
34 | """
35 | Downloads the required NLTK resources.
36 | """
37 | nltk.download("punkt")
38 | nltk.download("averaged_perceptron_tagger")
39 | nltk.download("maxent_ne_chunker")
40 | nltk.download("words")
41 |
42 | def process(self, text: str) -> nltk.Tree:
43 | """
44 | Processes the text using NLTK to identify named entities.
45 |
46 | Args:
47 | text (str): The text to process.
48 |
49 | Returns:
50 | nltk.Tree: The parsed sentence tree containing named entities.
51 | """
52 | tokens = nltk.tokenize.word_tokenize(text)
53 | pos = nltk.pos_tag(tokens)
54 | sentence_tree = nltk.ne_chunk(pos, binary=False)
55 | return sentence_tree
56 |
57 | def get_opti_names(self) -> list:
58 | """
59 | Retrieves the list of optimized names.
60 |
61 | Returns:
62 | list: The list of optimized names.
63 | """
64 | opti_names = """Rene
65 | Asa
66 | Zion
67 | Avery
68 | Gray
69 | Morgan
70 | Story
71 | Arden
72 | Kit
73 | Lux
74 | Sol
75 | Avery
76 | Pat
77 | Sky
78 | Arden
79 | Clair
80 | Storm
81 | Ellery
82 | Arin
83 | Sol
84 | Alpha
85 | Arie
86 | Rio
87 | Isa
88 | Aris
89 | Ara
90 | Adel
91 | Tam
92 | Lin
93 | Aly
94 | Bao
95 | Tru
96 | True
97 | Toy
98 | Adi
99 | Cache
100 | Chi
101 | Han
102 | Amil
103 | Amel
104 | Eri
105 | Truth
106 | Hoa
107 | Indy
108 | Vertis
109 | Chai
110 | Ottie
111 | Ary
112 | Aki
113 | Rei
114 | Bay
115 | Ova
116 | Shell
117 | Rael
118 | Gal
119 | Sher
120 | Elim
121 | Dae
122 | Zell
123 | Wen
124 | Audi"""
125 | opti_names = [name.strip() for name in opti_names.split()]
126 | return opti_names
127 |
128 | def gen_name_map(self, text: str) -> dict:
129 | """
130 | Generates a mapping of names in the prompt to optimized names.
131 |
132 | Args:
133 | text (str): The prompt text.
134 |
135 | Returns:
136 | dict: The mapping of names to optimized names.
137 | """
138 | name_list = []
139 | try:
140 | sentence_tree = self.process(text)
141 | except Exception:
142 | self.download()
143 | sentence_tree = self.process(text)
144 |
145 | for subtree in sentence_tree.subtrees(filter=lambda t: t.label() == "PERSON"):
146 | person = []
147 | name = ""
148 |
149 | for leaf in subtree.leaves():
150 | person.append(leaf[0])
151 |
152 | if len(person) > 1:
153 | for part in person:
154 | name += part + " "
155 |
156 | name = name.strip()
157 |
158 | if name not in name_list:
159 | name_list.append(name)
160 |
161 | mapping = dict(zip(name_list[: len(self.opti_names)], self.opti_names))
162 | return mapping
163 |
164 | def opti_name_replace(self, text: str, mapping: dict) -> str:
165 | """
166 | Replaces names in the text with optimized names based on the mapping.
167 |
168 | Args:
169 | text (str): The text to perform name replacement.
170 | mapping (dict): The mapping of names to optimized names.
171 |
172 | Returns:
173 | str: The text with replaced names.
174 | """
175 | for old_name in mapping:
176 | new_name = mapping[old_name]
177 | text = text.replace(old_name, new_name)
178 | return text
179 |
180 | def optimize(self, prompt: str) -> str:
181 | """
182 | Runs the prompt optimization technique on the prompt.
183 |
184 | Args:
185 | prompt (str): The prompt text.
186 |
187 | Returns:
188 | str: The optimized prompt text.
189 | """
190 | mapping = self.gen_name_map(prompt)
191 | opti_prompt = self.opti_name_replace(prompt, mapping)
192 | return opti_prompt
193 |
--------------------------------------------------------------------------------
/prompt_optimizer/poptim/pulp_optim.py:
--------------------------------------------------------------------------------
1 | from pulp import LpBinary, LpMinimize, LpProblem, LpVariable, lpSum
2 |
3 | from prompt_optimizer.poptim.base import PromptOptim
4 |
5 |
6 | class PulpOptim(PromptOptim):
7 | """
8 | PulpOptim is a prompt optimization technique based on integer linear programming using the Pulp library.
9 |
10 | It inherits from the PromptOptim base class.
11 |
12 | Example:
13 | >>> from prompt_optimizer.poptim import PulpOptim
14 | >>> p_optimizer = PulpOptim(p=0.1)
15 | >>> res = p_optimizer("example prompt...")
16 | >>> optimized_prompt = res.content
17 | """
18 |
19 | def __init__(self, p: float = 0.1, verbose: bool = False, metrics: list = []):
20 | """
21 | Initializes the PulpOptim.
22 |
23 | Args:
24 | p (float, optional): The aggression factor controlling the reduction in the number of tokens. Defaults to 0.1. Higher `p` corresponds to lower token output count.
25 | verbose (bool, optional): Flag indicating whether to enable verbose output. Defaults to False.
26 | metrics (list, optional): A list of metric names to evaluate during optimization. Defaults to an empty list.
27 | """
28 | super().__init__(verbose, metrics)
29 | self.aggression = p # will reduce num tokens by aggression*100%
30 |
31 | def optimize(self, prompt: str) -> str:
32 | """
33 | Runs the prompt optimization technique on the prompt.
34 |
35 | Args:
36 | prompt (str): The prompt text.
37 |
38 | Returns:
39 | str: The optimized prompt text.
40 | """
41 | tokens = prompt.split()
42 | target_length = int(len(tokens) * (1 - self.aggression))
43 |
44 | x = LpVariable.dicts("x", range(len(tokens)), cat=LpBinary)
45 |
46 | # Define the objective function to minimize the number of deleted tokens
47 | model = LpProblem("Extractive Compression", LpMinimize)
48 | model += lpSum([1 - x[i] for i in range(len(tokens))])
49 |
50 | # Constraints to ensure that the compressed text has the target length
51 | model += lpSum([x[i] for i in range(len(tokens))]) == target_length
52 |
53 | # Constraints for compressed text is a subsequence of the original text
54 | for i in range(len(tokens)):
55 | for j in range(i + 1, len(tokens)):
56 | if tokens[i] == tokens[j]:
57 | model += x[i] <= x[j]
58 |
59 | # Solve the optimization problem
60 | model.solve()
61 |
62 | # Extract the indices of the selected tokens
63 | selected_indices = [i for i in range(len(tokens)) if x[i].value() == 1]
64 |
65 | # Generate the compressed text
66 | opti_prompt = " ".join([tokens[i] for i in selected_indices])
67 | return opti_prompt
68 |
--------------------------------------------------------------------------------
/prompt_optimizer/poptim/punctuation_optim.py:
--------------------------------------------------------------------------------
1 | import string
2 |
3 | from prompt_optimizer.poptim.base import PromptOptim
4 |
5 |
6 | class PunctuationOptim(PromptOptim):
7 | """
8 | PunctuationOptim is a prompt optimization technique that removes punctuation marks from the prompt.
9 | LLMs can infer punctuations themselves in most cases, remove them.
10 |
11 | It inherits from the PromptOptim base class.
12 |
13 | Example:
14 | >>> from prompt_optimizer.poptim import PunctuationOptim
15 | >>> p_optimizer = PunctuationOptim()
16 | >>> res = p_optimizer("example prompt...")
17 | >>> optimized_prompt = res.content
18 | """
19 |
20 | def __init__(self, verbose: bool = False, metrics: list = [], **kwargs):
21 | """
22 | Initializes the PunctuationOptim.
23 |
24 | Args:
25 | verbose (bool, optional): Flag indicating whether to enable verbose output. Defaults to False.
26 | metrics (list, optional): A list of metric names to evaluate during optimization. Defaults to an empty list.
27 | """
28 | super().__init__(verbose, metrics, **kwargs)
29 |
30 | def optimize(self, prompt: str) -> str:
31 | """
32 | Runs the prompt optimization technique on the prompt.
33 |
34 | Args:
35 | prompt (str): The prompt text.
36 |
37 | Returns:
38 | str: The optimized prompt text with punctuation marks removed.
39 | """
40 | opti_prompt = prompt.translate(str.maketrans("", "", string.punctuation))
41 | return opti_prompt
42 |
--------------------------------------------------------------------------------
/prompt_optimizer/poptim/sequential.py:
--------------------------------------------------------------------------------
1 | from typing import Any, List
2 |
3 | from prompt_optimizer.poptim.base import PromptOptim
4 |
5 | from .utils import DotDict
6 |
7 |
8 | class Sequential:
9 | """
10 | Sequential is a class that represents a sequential composition of prompt optimization techniques.
11 |
12 | It applies a series of optimization techniques in sequence to the prompt.
13 |
14 |
15 |
16 |
17 | Example:
18 | >>> optim1 = SomeOptimizationTechnique()
19 | >>> optim2 = AnotherOptimizationTechnique()
20 | >>> seq = Sequential(optim1, optim2)
21 | >>> optimized_prompt = seq(prompt)
22 |
23 | Args:
24 | *optims: Variable-length argument list of prompt optimization techniques.
25 |
26 | Attributes:
27 | optims (list): A list of prompt optimization techniques.
28 |
29 | """
30 |
31 | def __init__(self, *optims: PromptOptim):
32 | """
33 | Initializes the Sequential object with the specified prompt optimization techniques.
34 |
35 | Args:
36 | *optims: Variable-length argument list of prompt optimization techniques.
37 | """
38 | self.optims: List[PromptOptim] = list(optims)
39 |
40 | def __call__(self, x: Any) -> Any:
41 | """
42 | Applies the sequential composition of prompt optimization techniques to the prompt.
43 |
44 | Args:
45 | x (Any): The input prompt.
46 |
47 | Returns:
48 | Any: The optimized prompt after applying the sequential optimizations.
49 | """
50 | d = DotDict()
51 | d.content = x
52 | for optim in self.optims:
53 | d = optim(d.content)
54 | return d
55 |
--------------------------------------------------------------------------------
/prompt_optimizer/poptim/stemmer_optim.py:
--------------------------------------------------------------------------------
1 | from nltk.stem import PorterStemmer
2 |
3 | from prompt_optimizer.poptim.base import PromptOptim
4 |
5 |
6 | class StemmerOptim(PromptOptim):
7 | """
8 | StemmerOptim is a prompt optimization technique that applies stemming to the prompt.
9 |
10 | Stemming reduces words to their base or root form, removing suffixes and prefixes.
11 |
12 | Example:
13 | >>> from prompt_optimizer.poptim import StemmerOptim
14 | >>> p_optimizer = StemmerOptim()
15 | >>> res = p_optimizer("example prompt...")
16 | >>> optimized_prompt = res.content
17 |
18 | """
19 |
20 | def __init__(self, verbose: bool = False, metrics: list = []):
21 | """
22 | Initializes the StemmerOptim object with the specified parameters.
23 |
24 | Args:
25 | verbose (bool, optional): If True, print verbose information during optimization. Defaults to False.
26 | metrics (list, optional): List of metrics to evaluate the optimization. Defaults to [].
27 | """
28 | super().__init__(verbose, metrics)
29 | self.stemmer = PorterStemmer()
30 |
31 | def optimize(self, prompt: str) -> str:
32 | """
33 | Applies stemming to the prompt.
34 |
35 | Args:
36 | prompt (str): The input prompt.
37 |
38 | Returns:
39 | str: The optimized prompt after applying stemming.
40 | """
41 | words = prompt.split()
42 | stemmed_words = [self.stemmer.stem(word) for word in words]
43 | opti_prompt = " ".join(stemmed_words)
44 | return opti_prompt
45 |
--------------------------------------------------------------------------------
/prompt_optimizer/poptim/stop_word_optim.py:
--------------------------------------------------------------------------------
1 | import nltk
2 |
3 | from prompt_optimizer.poptim.base import PromptOptim
4 |
5 |
6 | class StopWordOptim(PromptOptim):
7 | """
8 | StopWordOptim is a prompt optimization technique that removes stop words from the prompt.
9 |
10 | Stop words are commonly used words (e.g., "the", "is", "in") that are often considered insignificant in natural language processing tasks.
11 |
12 | Example:
13 | >>> from prompt_optimizer.poptim import StopWordOptim
14 | >>> p_optimizer = StopWordOptim()
15 | >>> res = p_optimizer("example prompt...")
16 | >>> optimized_prompt = res.content
17 |
18 | """
19 |
20 | def __init__(self, verbose: bool = False, metrics: list = []):
21 | """
22 | Initializes the StopWordOptim object with the specified parameters.
23 |
24 | Args:
25 | verbose (bool, optional): If True, print verbose information during optimization. Defaults to False.
26 | metrics (list, optional): List of metrics to evaluate the optimization. Defaults to [].
27 | """
28 | super().__init__(verbose, metrics)
29 | try:
30 | self.stop_words = set(nltk.corpus.stopwords.words("english"))
31 | except Exception:
32 | nltk.download("stopwords")
33 | self.stop_words = set(nltk.corpus.stopwords.words("english"))
34 |
35 | def optimize(self, prompt: str) -> str:
36 | """
37 | Removes stop words from the prompt.
38 |
39 | Args:
40 | prompt (str): The input prompt.
41 |
42 | Returns:
43 | str: The optimized prompt after removing stop words.
44 | """
45 | words = prompt.split()
46 | filtered_words = [word for word in words if word.lower() not in self.stop_words]
47 | opti_prompt = " ".join(filtered_words)
48 | return opti_prompt
49 |
--------------------------------------------------------------------------------
/prompt_optimizer/poptim/synonym_replace_optim.py:
--------------------------------------------------------------------------------
1 | import random
2 |
3 | import nltk
4 | import tiktoken
5 | from nltk.corpus import wordnet
6 |
7 | from prompt_optimizer.poptim.base import PromptOptim
8 |
9 |
10 | class SynonymReplaceOptim(PromptOptim):
11 | """
12 | SynonymReplaceOptim is a prompt optimization technique that replaces words in the prompt with their synonyms.
13 |
14 | Synonyms are words that have similar meanings to the original word. Sometimes a synonym has lower token count
15 | than the original word.
16 |
17 | Example:
18 | >>> from prompt_optimizer.poptim import SynonymReplaceOptim
19 | >>> p_optimizer = SynonymReplaceOptim()
20 | >>> res = p_optimizer("example prompt...")
21 | >>> optimized_prompt = res.content
22 | """
23 |
24 | def __init__(self, verbose: bool = False, metrics: list = [], p: float = 0.5):
25 | """
26 | Initializes the SynonymReplaceOptim object with the specified parameters.
27 |
28 | Args:
29 | verbose (bool, optional): If True, print verbose information during optimization. Defaults to False.
30 | metrics (list, optional): List of metrics to evaluate the optimization. Defaults to [].
31 | p (float, optional): Probability of replacing a word with a synonym. Defaults to 0.5.
32 | """
33 | super().__init__(verbose, metrics)
34 | self.p = p
35 | nltk.download("wordnet")
36 | self.tokenizer = tiktoken.get_encoding("cl100k_base")
37 |
38 | def get_word_pos(self, word: str) -> str:
39 | """
40 | Get the part of speech of a word.
41 |
42 | Args:
43 | word (str): The word.
44 |
45 | Returns:
46 | str: The part of speech of the word.
47 | """
48 | pos = wordnet.synset(word + ".n.01").pos()
49 | if pos.startswith("n"):
50 | return "n"
51 | elif pos.startswith("v"):
52 | return "v"
53 | elif pos.startswith("a"):
54 | return "a"
55 | elif pos.startswith("r"):
56 | return "r"
57 | else:
58 | return None
59 |
60 | def syn_replace(self, word: str) -> str:
61 | """
62 | Replace a word with its synonym.
63 |
64 | Args:
65 | word (str): The word.
66 |
67 | Returns:
68 | str: The best replacement synonym for the word.
69 | """
70 | best_replacement = word
71 | best_l = len(self.tokenizer.encode(word))
72 | if best_l > 1:
73 | for syn in wordnet.synsets(word):
74 | for lemma in syn.lemmas():
75 | synonym_word = lemma.name()
76 | l_new = len(self.tokenizer.encode(synonym_word))
77 | if l_new < best_l:
78 | best_replacement = synonym_word
79 | return best_replacement
80 |
81 | def optimize(self, prompt: str) -> str:
82 | """
83 | Replaces words in the prompt with their synonyms.
84 |
85 | Args:
86 | prompt (str): The input prompt.
87 |
88 | Returns:
89 | str: The optimized prompt with replaced synonyms.
90 | """
91 | words = prompt.split()
92 | opti_words = []
93 | for word in words:
94 | new_word = self.syn_replace(word)
95 | if new_word != word and random.uniform(0, 1) <= self.p:
96 | opti_words.append(new_word)
97 | else:
98 | opti_words.append(word)
99 |
100 | return " ".join(opti_words)
101 |
--------------------------------------------------------------------------------
/prompt_optimizer/poptim/utils.py:
--------------------------------------------------------------------------------
1 | from typing import Any, Callable, List, Tuple
2 |
3 |
4 | class DotDict(dict):
5 | """
6 | DotDict is a subclass of the built-in dict class that allows accessing dictionary keys using dot notation.
7 | It provides the ability to get and set attributes as if they were dictionary keys.
8 |
9 | Example:
10 | d = DotDict()
11 | d['key'] = 'value'
12 | print(d.key) # Output: 'value'
13 | """
14 |
15 | def __getattr__(self, attr: str) -> Any:
16 | """
17 | Get the value associated with the given attribute.
18 |
19 | Args:
20 | attr (str): The attribute name.
21 |
22 | Returns:
23 | Any: The value associated with the attribute.
24 |
25 | Raises:
26 | AttributeError: If the attribute does not exist in the dictionary.
27 | """
28 | if attr in self:
29 | return self[attr]
30 | raise AttributeError(
31 | f"'{self.__class__.__name__}' object has no attribute '{attr}'"
32 | )
33 |
34 | def __setattr__(self, attr: str, value: Any) -> None:
35 | """
36 | Set the value associated with the given attribute.
37 |
38 | Args:
39 | attr (str): The attribute name.
40 | value (Any): The value to be associated with the attribute.
41 |
42 | Returns:
43 | None
44 | """
45 | self[attr] = value
46 |
47 |
48 | class ParseError(Exception):
49 | """
50 | ParseError is a custom exception class raised when a parsing error occurs.
51 | It inherits from the built-in Exception class.
52 |
53 | Attributes:
54 | message (str): The error message describing the parsing error.
55 | prompt (str): The prompt where the parsing error occurred.
56 | """
57 |
58 | def __init__(self, message: str, prompt: str) -> None:
59 | """
60 | Initialize a new ParseError instance.
61 |
62 | Args:
63 | message (str): The error message describing the parsing error.
64 | prompt (str): The prompt where the parsing error occurred.
65 |
66 | Returns:
67 | None
68 | """
69 | super().__init__(message)
70 | self.prompt = prompt
71 |
72 | def __str__(self) -> str:
73 | """
74 | Return a string representation of the ParseError instance.
75 |
76 | Returns:
77 | str: A formatted string representing the ParseError instance.
78 | Example: "ParseError: in `Prompt`: "
79 | """
80 | return f"ParseError: {self.args[0]} in `Prompt`: {self.prompt}"
81 |
82 |
83 | def parse_protect_tags(prompt: str, protect_tag: str) -> Tuple[List[str], List[str]]:
84 | """
85 | Parse the given prompt and extract protected chunks enclosed by protect tags.
86 |
87 | Args:
88 | prompt (str): The prompt string to parse.
89 | protect_tag (str): The protect tag used to enclose the protected chunks.
90 |
91 | Returns:
92 | Tuple[List[str], List[str]]: A tuple containing two lists.
93 | - The first list contains the chunks of the prompt that are not protected.
94 | - The second list contains the protected chunks extracted from the prompt.
95 |
96 | Raises:
97 | ParseError: If there are nested protect tags, an unclosed protect tag, or invalid protect tag sequences.
98 | """
99 | protect_start_tag = f"<{protect_tag}>"
100 | protect_end_tag = f"{protect_tag}>"
101 |
102 | chunks = []
103 | protected_chunks = []
104 |
105 | stack = []
106 | start_idx = 0
107 |
108 | for i in range(len(prompt)):
109 | if prompt[i : i + len(protect_start_tag)] == protect_start_tag:
110 | if len(stack) != 0: # nested ignore tags make no sense
111 | raise ParseError("Nested ignore tags not allowed", prompt)
112 |
113 | stack.append(i)
114 | chunks.append(prompt[start_idx:i])
115 |
116 | elif prompt[i : i + len(protect_end_tag)] == protect_end_tag:
117 | start_idx = i + len(protect_end_tag)
118 | if len(stack) == 0:
119 | raise ParseError(
120 | f"Invalid protect tag sequence. {protect_end_tag} must follow an unclosed {protect_start_tag}",
121 | prompt,
122 | )
123 |
124 | protect_start_index = stack.pop()
125 | protect_content = prompt[protect_start_index + len(protect_start_tag) : i]
126 | protected_chunks.append(protect_content)
127 |
128 | if protect_content.startswith(
129 | protect_start_tag
130 | ) or protect_content.endswith(protect_end_tag):
131 | raise ParseError("Invalid protect tag sequence.", prompt)
132 |
133 | if len(stack) > 0:
134 | raise ParseError(
135 | f"All {protect_start_tag} must be followed by a corresponding {protect_end_tag}",
136 | prompt,
137 | )
138 |
139 | chunks.append(prompt[start_idx:])
140 | assert (
141 | len(chunks) == len(protected_chunks) + 1
142 | ), f"Invalid tag parsing for string: {prompt}"
143 |
144 | return chunks, protected_chunks
145 |
146 |
147 | def protected_runner(run: Callable) -> Callable:
148 | """
149 | Decorator function that runs the provided 'run' function in chunks for a given object and prompt.
150 | It extracts protected chunks from the prompt and runs the 'run' function on each non-protected chunk.
151 |
152 | Args:
153 | run (Callable): The function to run on each non-protected chunk.
154 |
155 | Returns:
156 | Callable: A wrapper function that performs the chunked execution of the 'run' function.
157 |
158 | Example:
159 | @protected_runner
160 | def my_run_function(obj, prompt, *args, **kwargs):
161 | # Perform some operations on prompt
162 | return optimized_prompt
163 |
164 | # Usage
165 | optimized_result = my_run_function(my_obj, my_prompt, my_args, my_kwargs)
166 | """
167 |
168 | def run_in_chunks(obj: object, prompt: str, *args, **kwargs) -> str:
169 | protect_tag = obj.protect_tag
170 | opti_prompt = ""
171 |
172 | if protect_tag is not None:
173 | chunks, protected_chunks = parse_protect_tags(prompt, protect_tag)
174 | protected_chunks.append("") # to make indexing easier
175 |
176 | for i, chunk in enumerate(chunks):
177 | if len(chunk):
178 | opti_chunk = run(obj, chunk, *args, **kwargs)
179 | else:
180 | opti_chunk = ""
181 | opti_prompt += opti_chunk + protected_chunks[i]
182 |
183 | elif len(prompt):
184 | opti_prompt = run(obj, prompt, *args, **kwargs)
185 |
186 | else:
187 | opti_prompt = prompt
188 |
189 | return opti_prompt
190 |
191 | return run_in_chunks
192 |
--------------------------------------------------------------------------------
/prompt_optimizer/visualize/__init__.py:
--------------------------------------------------------------------------------
1 | from prompt_optimizer.visualize.stringdiffer import StringDiffer
2 |
3 | __all__ = ["StringDiffer"]
4 |
--------------------------------------------------------------------------------
/prompt_optimizer/visualize/stringdiffer.py:
--------------------------------------------------------------------------------
1 | from difflib import ndiff
2 |
3 |
4 | class StringDiffer:
5 | def __init__(self):
6 | """
7 | Initializes a StringDiffer object with the original and optimized strings.
8 | """
9 | pass
10 |
11 | def __call__(self, original: str, optimized: str) -> None:
12 | """
13 | Prints the visualized difference between the original and optimized strings.
14 | Deletions are shown in red, insertions in green, and unchanged parts in default color.
15 |
16 | Args:
17 | original (str): The original string.
18 | optimized (str): The optimized string.
19 | """
20 | original = str(original)
21 | optimized = str(optimized)
22 |
23 | diff = list(ndiff(original, optimized))
24 | output = ""
25 | for op, _, value in diff:
26 | if op == "-":
27 | output += f"\033[91m{value}\033[0m" # Red color for deletions
28 | elif op == "+":
29 | output += f"\033[92m{value}\033[0m" # Green color for insertions
30 | else:
31 | output += value
32 | print(output)
33 |
--------------------------------------------------------------------------------
/prompt_optimizer/wrapper/__init__.py:
--------------------------------------------------------------------------------
1 | from prompt_optimizer.wrapper.base import Wrapper
2 | from prompt_optimizer.wrapper.openai import OpenAIWrapper
3 | from prompt_optimizer.wrapper.sql_db import SQLDBManager
4 |
5 | __all__ = [
6 | "OpenAIWrapper",
7 | "SQLDBManager",
8 | "Wrapper",
9 | ]
10 |
--------------------------------------------------------------------------------
/prompt_optimizer/wrapper/base.py:
--------------------------------------------------------------------------------
1 | from abc import ABC, abstractmethod
2 |
3 | # import tiktoken
4 |
5 |
6 | class Wrapper(ABC):
7 | """
8 | Abstract base class for a wrapper.
9 |
10 | Attributes:
11 | db_manager: The database manager object.
12 | poptimizer: The poptimizer object.
13 | tokenizer: The tokenizer object.
14 | """
15 |
16 | def __init__(self, db_manager, poptimizer):
17 | """
18 | Initializes a new instance of the Wrapper class.
19 |
20 | Args:
21 | db_manager: The database manager object.
22 | poptimizer: The poptimizer object.
23 | """
24 | self.db_manager = db_manager
25 | self.poptimizer = poptimizer
26 | # self.tokenizer = tiktoken.get_encoding("cl100k_base")
27 |
28 | # def token_count(
29 | # self, messages: Union[List[Dict[str, str]], str], json: bool = True
30 | # ) -> int:
31 | # """
32 | # Calculates the total token count for the given messages.
33 |
34 | # Args:
35 | # messages: The list of messages or a single message string.
36 | # json: Indicates whether the messages are in JSON format (default: True).
37 |
38 | # Returns:
39 | # The total token count.
40 |
41 | # Raises:
42 | # TypeError: If messages is not a list or a string.
43 | # """
44 | # if json is True:
45 | # c = sum([len(self.tokenizer.encode(m["content"])) for m in messages])
46 | # elif isinstance(messages, list):
47 | # c = sum([len(self.tokenizer.encode(m)) for m in messages])
48 | # else:
49 | # c = len(self.tokenizer.encode(messages))
50 | # return c
51 |
52 | @abstractmethod
53 | def wrap(self, *args, **kwargs):
54 | """
55 | Abstract method for wrapping.
56 |
57 | Args:
58 | *args: Variable length argument list.
59 | **kwargs: Arbitrary keyword arguments.
60 | """
61 | pass
62 |
--------------------------------------------------------------------------------
/prompt_optimizer/wrapper/openai.py:
--------------------------------------------------------------------------------
1 | import json
2 | import time
3 | from typing import Any, Callable, Dict
4 |
5 | import tiktoken
6 |
7 | from prompt_optimizer.wrapper.base import Wrapper
8 |
9 |
10 | class OpenAIWrapper(Wrapper):
11 | """
12 | Wrapper class for OpenAI API.
13 |
14 | Inherits from the base Wrapper class.
15 |
16 | Attributes:
17 | db_manager: The database manager object.
18 | poptimizer: The poptimizer object.
19 | """
20 |
21 | def __init__(self, db_manager, poptimizer):
22 | """
23 | Initializes a new instance of the OpenAIWrapper class.
24 |
25 | Args:
26 | db_manager: The database manager object.
27 | poptimizer: The poptimizer object.
28 | """
29 | super().__init__(db_manager, poptimizer)
30 |
31 | def num_tokens_from_messages(self, messages, model="gpt-3.5-turbo-0301"):
32 | """
33 | Source: https://stackoverflow.com/a/76044069
34 | https://github.com/openai/openai-cookbook/blob/main/examples/How_to_count_tokens_with_tiktoken.ipynb
35 | Returns the number of tokens used by a list of messages."""
36 | try:
37 | encoding = tiktoken.encoding_for_model(model)
38 | except KeyError:
39 | print("Warning: model not found. Using cl100k_base encoding.")
40 | encoding = tiktoken.get_encoding("cl100k_base")
41 | if model == "gpt-3.5-turbo":
42 | print(
43 | "Warning: gpt-3.5-turbo may change over time. Returning num tokens assuming gpt-3.5-turbo-0301."
44 | )
45 | return self.num_tokens_from_messages(messages, model="gpt-3.5-turbo-0301")
46 | elif model == "gpt-4":
47 | print(
48 | "Warning: gpt-4 may change over time. Returning num tokens assuming gpt-4-0314."
49 | )
50 | return self.num_tokens_from_messages(messages, model="gpt-4-0314")
51 | elif model == "gpt-3.5-turbo-0301":
52 | tokens_per_message = (
53 | 4 # every message follows <|start|>{role/name}\n{content}<|end|>\n
54 | )
55 | tokens_per_name = -1 # if there's a name, the role is omitted
56 | elif model == "gpt-4-0314":
57 | tokens_per_message = 3
58 | tokens_per_name = 1
59 | else:
60 | raise NotImplementedError(f"""not implemented for model {model}""")
61 | num_tokens = 0
62 |
63 | if type(messages) == "list":
64 | for message in messages:
65 | num_tokens += tokens_per_message
66 | for key, value in message.items():
67 | num_tokens += len(encoding.encode(value))
68 | if key == "name":
69 | num_tokens += tokens_per_name
70 | num_tokens += 3 # every reply is primed with <|start|>assistant<|message|>
71 | elif type(messages) == "str":
72 | num_tokens += len(encoding.encode(messages))
73 | return num_tokens
74 |
75 | def wrap(self, openai_func: Callable[..., Any], *args, **kwargs) -> Dict[str, Any]:
76 | """
77 | Wraps the OpenAI function with additional functionality.
78 |
79 | Args:
80 | openai_func: The OpenAI function to be wrapped.
81 | *args: Variable length argument list.
82 | **kwargs: Arbitrary keyword arguments.
83 |
84 | Returns:
85 | The response from the OpenAI function.
86 |
87 | Raises:
88 | KeyError: If the 'model' or 'messages' key is missing in kwargs.
89 | """
90 | model = kwargs["model"]
91 | timestamp = int(time.time())
92 | messages_before = kwargs["messages"]
93 |
94 | if self.poptimizer is not None:
95 | start_time = time.time()
96 | optimized_messages = self.poptimizer.run_json(messages_before)
97 | optimizer_runtime = time.time() - start_time
98 | kwargs["messages"] = optimized_messages
99 | else:
100 | optimizer_runtime = 0
101 | optimized_messages = {}
102 |
103 | prompt_before_token_count = self.num_tokens_from_messages(messages_before)
104 | prompt_after_token_count = self.num_tokens_from_messages(optimized_messages)
105 |
106 | request_start_time = time.time()
107 | try:
108 | response = openai_func(*args, **kwargs)
109 | continuation_token_count = response["usage"]["completion_tokens"]
110 | continuation = json.dumps(response.choices[0])
111 | error = 0
112 | error_name = "None"
113 | except Exception as e:
114 | error = 1
115 | error_name = type(e).__name__
116 | continuation = "None"
117 | continuation_token_count = 0
118 |
119 | request_runtime = time.time() - request_start_time
120 |
121 | if self.db_manager:
122 | with self.db_manager:
123 | self.db_manager.add(
124 | [
125 | timestamp,
126 | self.db_manager.username,
127 | json.dumps(messages_before),
128 | json.dumps(optimized_messages),
129 | continuation,
130 | prompt_before_token_count,
131 | prompt_after_token_count,
132 | continuation_token_count,
133 | model,
134 | error,
135 | error_name,
136 | optimizer_runtime,
137 | request_runtime,
138 | ]
139 | )
140 | return response
141 |
142 | def __call__(self, *args, **kwargs) -> Dict[str, Any]:
143 | """
144 | Calls the OpenAIWrapper instance as a function.
145 |
146 | Args:
147 | *args: Variable length argument list.
148 | **kwargs: Arbitrary keyword arguments.
149 |
150 | Returns:
151 | The response from the OpenAI function.
152 | """
153 | return self.wrap(*args, **kwargs)
154 |
--------------------------------------------------------------------------------
/prompt_optimizer/wrapper/sql_db.py:
--------------------------------------------------------------------------------
1 | import os
2 | import sqlite3
3 | from typing import Optional, Tuple
4 |
5 |
6 | class SQLDBManager:
7 | """
8 | A class to manage an SQLite database.
9 |
10 | Attributes:
11 | database_name: The name of the SQLite database file.
12 | connection: The database connection object.
13 | cursor: The database cursor object.
14 | """
15 |
16 | def __init__(
17 | self, project_name: str = "default_project", database_path: Optional[str] = None
18 | ):
19 | """
20 | Initializes a new SQLDBManager object.
21 |
22 | Args:
23 | project_name: The name of the project.
24 | database_path: The path to the SQLite database file.
25 | """
26 | if database_path is None:
27 | home_dir = os.path.expanduser("~")
28 | database_dir = os.path.join(home_dir, ".prompt_optim")
29 | os.makedirs(database_dir, exist_ok=True)
30 | self.database_path = os.path.join(database_dir, "default.db")
31 | else:
32 | self.database_path = database_path
33 |
34 | self.connection = None
35 | self.cursor = None
36 | self.project_name = project_name
37 | self.table_name = self.project_name
38 | self.username = "default"
39 |
40 | def set_user(self, username):
41 | self.username = username
42 |
43 | def __enter__(self):
44 | """
45 | Establishes the database connection and cursor when entering the context.
46 | """
47 | self.connect()
48 | self.create_table()
49 | return self
50 |
51 | def __exit__(self, exc_type, exc_value, traceback):
52 | """
53 | Closes the database connection and cursor when exiting the context.
54 | """
55 | self.close()
56 |
57 | def connect(self):
58 | """
59 | Connects to the SQLite database.
60 | """
61 | try:
62 | self.connection = sqlite3.connect(self.database_path)
63 | self.cursor = self.connection.cursor()
64 | except sqlite3.Error as e:
65 | print(f"Error connecting to the SQLite database: {e}")
66 |
67 | def create_table(self):
68 | """
69 | Creates a table in the database if it doesn't exist.
70 |
71 | Args:
72 | table_name: The name of the table.
73 | """
74 | try:
75 | self.cursor.execute(
76 | f"""CREATE TABLE IF NOT EXISTS {self.table_name} (
77 | id INTEGER PRIMARY KEY AUTOINCREMENT,
78 | timestamp DATETIME,
79 | username TEXT,
80 | prompt_before TEXT,
81 | prompt_after TEXT,
82 | continuation TEXT,
83 | prompt_before_token_count INTEGER,
84 | prompt_after_token_count INTEGER,
85 | continuation_token_count INTEGER,
86 | model_name TEXT,
87 | error INTEGER,
88 | error_name TEXT,
89 | optimizer_latency FLOAT,
90 | request_latency FLOAT
91 | )"""
92 | )
93 |
94 | except sqlite3.Error as e:
95 | print(f"Error creating table: {e}")
96 |
97 | def add(self, data: Tuple) -> bool:
98 | """
99 | Adds data to the specified table.
100 |
101 | Args:
102 | data: A tuple containing the data to be added.
103 |
104 | Returns:
105 | bool: `True` if successfully inserted values else `False`.
106 | """
107 | try:
108 | self.cursor.execute(
109 | f"""INSERT INTO {self.table_name} (
110 | timestamp,
111 | username,
112 | prompt_before,
113 | prompt_after,
114 | continuation,
115 | prompt_before_token_count,
116 | prompt_after_token_count,
117 | continuation_token_count,
118 | model_name,
119 | error,
120 | error_name,
121 | optimizer_latency,
122 | request_latency
123 | ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)""",
124 | data,
125 | )
126 | self.connection.commit()
127 |
128 | except sqlite3.Error as e:
129 | print(f"Error adding data: {e}")
130 | return False
131 |
132 | return True
133 |
134 | def close(self):
135 | """
136 | Closes the database connection and cursor.
137 | """
138 | if self.cursor:
139 | self.cursor.close()
140 | if self.connection:
141 | self.connection.close()
142 |
--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
1 | [tool.poetry]
2 | name = "prompt-optimizer"
3 | version = "0.2.1"
4 | description = ""
5 | authors = ["Vaibhav Kumar <34630911+TimeTraveller-San@users.noreply.github.com>"]
6 | readme = "README.md"
7 | packages = [{include = "prompt_optimizer"}]
8 |
9 | [tool.poetry.scripts]
10 | prompt-optimizer = "prompt_optimizer.cli.main:main"
11 |
12 | [tool.poetry.dependencies]
13 | python = ">=3.8.1,<4.0"
14 | transformers = "^4.27.4"
15 | torch = "^2.0.0"
16 | pulp = "^2.7.0"
17 | nltk = "^3.8.1"
18 | tiktoken = "^0.3.3"
19 | autocorrect = "^2.6.1"
20 | black = "^23.3.0"
21 | isort = "^5.12.0"
22 | openai = "^0.27.8"
23 | python-dotenv = "^1.0.0"
24 |
25 | [tool.poetry.group.dev.dependencies]
26 | sphinx = "^6.1.3"
27 | sphinx-book-theme = "^1.0.1"
28 | ruff = "^0.0.261"
29 | mypy = "^1.2.0"
30 | pytest = "^7.3.0"
31 | black = "^23.3.0"
32 | langchain = "^0.0.173"
33 | myst-parser = "^1.0.0"
34 |
35 | [tool.poetry.group.docs.dependencies]
36 | autodoc_pydantic = "^1.8.0"
37 | nbsphinx = "^0.8.9"
38 | sphinx-autobuild = "^2021.3.14"
39 | sphinx_rtd_theme = "^1.0.0"
40 | sphinx-typlog-theme = "^0.8.0"
41 | toml = "^0.10.2"
42 | linkchecker = "^10.2.1"
43 | sphinx-copybutton = "^0.5.1"
44 |
45 | [tool.poetry.group.test.dependencies]
46 | pytest = "^7.2.0"
47 | pytest-cov = "^4.0.0"
48 | pytest-dotenv = "^0.5.2"
49 | duckdb-engine = "^0.7.0"
50 | pytest-watcher = "^0.2.6"
51 | freezegun = "^1.2.2"
52 | responses = "^0.22.0"
53 | pytest-asyncio = "^0.20.3"
54 |
55 | [tool.ruff]
56 | select = [
57 | "E", # pycodestyle
58 | "F", # pyflakes
59 | "I", # isort
60 | ]
61 | line-length = 88
62 | fixable = ["A", "B", "C", "D", "E", "F", "G", "I", "N", "Q", "S", "T", "W", "ANN", "ARG", "BLE", "COM", "DJ", "DTZ", "EM", "ERA", "EXE", "FBT", "ICN", "INP", "ISC", "NPY", "PD", "PGH", "PIE", "PL", "PT", "PTH", "PYI", "RET", "RSE", "RUF", "SIM", "SLF", "TCH", "TID", "TRY", "UP", "YTT"]
63 | ignore = ["E501", "F403"]
64 |
65 |
66 | [tool.mypy]
67 | ignore_missing_imports = "True"
68 | disallow_untyped_defs = "True"
69 | exclude = ["notebooks"]
70 |
71 | [build-system]
72 | requires = ["poetry-core"]
73 | build-backend = "poetry.core.masonry.api"
--------------------------------------------------------------------------------
/tests/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vaibkumr/prompt-optimizer/e57a6283cda1491cd8dd00fd9e29b10735fc5806/tests/__init__.py
--------------------------------------------------------------------------------
/tests/unit_tests/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vaibkumr/prompt-optimizer/e57a6283cda1491cd8dd00fd9e29b10735fc5806/tests/unit_tests/__init__.py
--------------------------------------------------------------------------------
/tests/unit_tests/data/prompt1.txt:
--------------------------------------------------------------------------------
1 | Instructions: You will be presented with an audit question and four options A through D. Your task is to select the option that is closest to the correctanswer. For each question you must select one of the following answers: 'A', 'B', 'C', 'D'. Do not explain. Do not include the number in your response.
2 | Which of the following is typically not included in the inquiry letter sent an unfavorable outcome. B. A disclaimer regarding the likelihood of settlement of pendinglitigation. C. An estimate of the range of potential loss. D. A listing of pending or threatened litigation, claims, or assessments. For each question you must select one of the following answers: 'A', 'B', 'C', 'D'.
3 |
4 |
--------------------------------------------------------------------------------
/tests/unit_tests/data/prompt2.txt:
--------------------------------------------------------------------------------
1 | Some economists have responded positively to Bitcoin, including
2 | Francois R. Velde, senior economist of the Federal Reserve in Chicago
3 | who described it as "an elegant solution to the problem of creating a
4 | digital currency." In November 2013 Richard Branson announced that
5 | Virgin Galactic would accept Bitcoin as payment, saying that he had invested
6 | in Bitcoin and found it "fascinating how a whole new global currency
7 | has been created", encouraging others to also invest in Bitcoin.
8 | Other economists commenting on Bitcoin have been critical.
9 | Economist Paul Krugman has suggested that the structure of the currency
10 | incentivizes hoarding and that its value derives from the expectation that
11 | others will accept it as payment. Economist Larry Summers has expressed
12 | a "wait and see" attitude when it comes to Bitcoin. Nick Colas, a market
13 | strategist for ConvergEx Group, has remarked on the effect of increasing
14 | use of Bitcoin and its restricted supply, noting, "When incremental
15 | adoption meets relatively fixed supply, it should be no surprise that
16 | prices go up. And that’s exactly what is happening to BTC prices. Francois also said"
--------------------------------------------------------------------------------
/tests/unit_tests/test_autocorrect_optim.py:
--------------------------------------------------------------------------------
1 | from tests.unit_tests import utils
2 | from prompt_optimizer.metric import BERTScoreMetric, TokenMetric
3 | from prompt_optimizer.poptim import AutocorrectOptim
4 |
5 |
6 | def test_autocorrect_optim():
7 | prompt = utils.load_prompt("prompt1.txt")
8 | p_optimizer = AutocorrectOptim(
9 | verbose=True, metrics=[TokenMetric(), BERTScoreMetric()]
10 | )
11 | optimized_prompt = p_optimizer(prompt)
12 | assert len(optimized_prompt) > 0, "Failed!"
13 |
--------------------------------------------------------------------------------
/tests/unit_tests/test_entropy_optim.py:
--------------------------------------------------------------------------------
1 | from tests.unit_tests import utils
2 | from prompt_optimizer.metric import TokenMetric
3 | from prompt_optimizer.poptim import EntropyOptim
4 |
5 |
6 | def test_entropy_optim():
7 | prompt = utils.load_prompt("prompt1.txt")
8 | p_optimizer = EntropyOptim(verbose=True, p=0.1, metrics=[TokenMetric()])
9 | optimized_prompt = p_optimizer(prompt)
10 | assert len(optimized_prompt) > 0, "Failed!"
11 |
--------------------------------------------------------------------------------
/tests/unit_tests/test_lemmatizer_optim.py:
--------------------------------------------------------------------------------
1 | from tests.unit_tests import utils
2 | from prompt_optimizer.metric import TokenMetric
3 | from prompt_optimizer.poptim import LemmatizerOptim
4 |
5 |
6 | def test_autocorrect_optim():
7 | prompt = utils.load_prompt("prompt1.txt")
8 | p_optimizer = LemmatizerOptim(verbose=True, metrics=[TokenMetric()])
9 | optimized_prompt = p_optimizer(prompt)
10 | assert len(optimized_prompt) > 0, "Failed!"
11 |
--------------------------------------------------------------------------------
/tests/unit_tests/test_name_replace_optim.py:
--------------------------------------------------------------------------------
1 | from tests.unit_tests import utils
2 | from prompt_optimizer.metric import TokenMetric
3 | from prompt_optimizer.poptim import NameReplaceOptim
4 |
5 |
6 | def test_name_replace_optim():
7 | prompt = utils.load_prompt("prompt2.txt")
8 | p_optimizer = NameReplaceOptim(verbose=True, metrics=[TokenMetric()])
9 | optimized_prompt = p_optimizer(prompt)
10 | assert len(optimized_prompt) > 0, "Failed!"
11 |
--------------------------------------------------------------------------------
/tests/unit_tests/test_openai_wrapper.py:
--------------------------------------------------------------------------------
1 | import os
2 | import openai
3 | from prompt_optimizer.poptim import StopWordOptim
4 | from prompt_optimizer.wrapper.sql_db import SQLDBManager
5 | from prompt_optimizer.wrapper.openai import OpenAIWrapper
6 | from dotenv import load_dotenv
7 |
8 | load_dotenv()
9 | openai.api_key = os.getenv("OPENAI_API_KEY")
10 |
11 |
12 | def test_openai_wrapper():
13 | # p_optimizer = StopWordOptim(verbose=True)
14 | # sql_db = SQLDBManager()
15 | # oai_wrapper = OpenAIWrapper(sql_db, p_optimizer)
16 | # response = oai_wrapper(
17 | # openai.ChatCompletion.create,
18 | # model="gpt-3.5-turbo",
19 | # messages=[
20 | # {"role": "system", "content": "You are a helpful assistant."},
21 | # {"role": "user", "content": "Who won the world series in 2020?"},
22 | # {"role": "assistant", "content": "The Los Angeles Dodgers won the World Series in 2020."},
23 | # {"role": "user", "content": "Where was it played?"}
24 | # ]
25 | # )
26 | # print(f"response: {response}")
27 | response = True
28 |
29 | assert response is not None, "Failed!"
30 |
31 |
32 | test_openai_wrapper()
--------------------------------------------------------------------------------
/tests/unit_tests/test_protected_tags.py:
--------------------------------------------------------------------------------
1 | from tests.unit_tests import utils
2 | from prompt_optimizer.poptim import PunctuationOptim
3 |
4 |
5 | def test_punctuation_optim():
6 | prompt = "Yharnam is a fictional city that is the primary setting of Bloodborne , a 2015 video game developed by FromSoftware."
7 | p_optimizer = PunctuationOptim(protect_tag="pt", verbose=True)
8 | optimized_prompt = p_optimizer(prompt)
9 | print(prompt)
10 | print(optimized_prompt)
11 | assert "," in optimized_prompt.content, "protect tags not working"
12 |
--------------------------------------------------------------------------------
/tests/unit_tests/test_punctuation_optim.py:
--------------------------------------------------------------------------------
1 | from tests.unit_tests import utils
2 | from prompt_optimizer.metric import TokenMetric
3 | from prompt_optimizer.poptim import PunctuationOptim
4 |
5 |
6 | def test_punctuation_optim():
7 | prompt = utils.load_prompt("prompt1.txt")
8 | p_optimizer = PunctuationOptim(verbose=True, metrics=[TokenMetric()])
9 | optimized_prompt = p_optimizer(prompt)
10 | assert len(optimized_prompt) > 0, "Failed!"
11 |
--------------------------------------------------------------------------------
/tests/unit_tests/test_sequential.py:
--------------------------------------------------------------------------------
1 | from tests.unit_tests import utils
2 | from prompt_optimizer.poptim import (
3 | AutocorrectOptim,
4 | LemmatizerOptim,
5 | PunctuationOptim,
6 | Sequential,
7 | )
8 |
9 |
10 | def test_sequential():
11 | prompt = utils.load_prompt("prompt1.txt")
12 |
13 | p_optimizer = Sequential(
14 | LemmatizerOptim(verbose=True),
15 | PunctuationOptim(verbose=True),
16 | AutocorrectOptim(verbose=True),
17 | )
18 | optimized_prompt = p_optimizer(prompt)
19 | assert len(optimized_prompt) > 0, "Failed!"
20 |
--------------------------------------------------------------------------------
/tests/unit_tests/test_sql_db.py:
--------------------------------------------------------------------------------
1 | from datetime import datetime
2 | from prompt_optimizer.wrapper import SQLDBManager
3 |
4 | def test_db():
5 | db_manager = SQLDBManager("temp", "temp.db")
6 | with db_manager:
7 | data = [
8 | datetime.now(),
9 | "test_user",
10 | "prompt before",
11 | "prompt after",
12 | "continuation",
13 | 2,
14 | 2,
15 | 1,
16 | "text-davinci-003",
17 | 0,
18 | "None",
19 | 0.12,
20 | 0.11
21 | ]
22 | success = db_manager.add(data)
23 | assert success, "failed"
24 |
--------------------------------------------------------------------------------
/tests/unit_tests/test_stop_word_optim.py:
--------------------------------------------------------------------------------
1 | from tests.unit_tests import utils
2 | from prompt_optimizer.metric import TokenMetric
3 | from prompt_optimizer.poptim import StopWordOptim
4 |
5 |
6 | def test_stop_word_optim():
7 | prompt = utils.load_prompt("prompt1.txt")
8 | p_optimizer = StopWordOptim(verbose=True, metrics=[TokenMetric()])
9 | optimized_prompt = p_optimizer(prompt)
10 | assert len(optimized_prompt) > 0, "Failed!"
11 |
--------------------------------------------------------------------------------
/tests/unit_tests/test_synonym_replace_optim.py:
--------------------------------------------------------------------------------
1 | from tests.unit_tests import utils
2 | from prompt_optimizer.metric import TokenMetric
3 | from prompt_optimizer.poptim import SynonymReplaceOptim
4 |
5 |
6 | def test_synonym_replace_optim():
7 | prompt = utils.load_prompt("prompt1.txt")
8 | p_optimizer = SynonymReplaceOptim(verbose=True, p=1.0, metrics=[TokenMetric()])
9 | optimized_prompt = p_optimizer(prompt)
10 | assert len(optimized_prompt) > 0, "Failed!"
11 |
--------------------------------------------------------------------------------
/tests/unit_tests/utils.py:
--------------------------------------------------------------------------------
1 | import os
2 |
3 |
4 | def load_prompt(prompt_f):
5 | file_path = os.path.abspath(
6 | os.path.join(os.path.dirname(__file__), "data", prompt_f)
7 | )
8 | with open(file_path, "r") as f:
9 | data = f.read()
10 | return data
11 |
--------------------------------------------------------------------------------