├── .flake8
├── .gitignore
├── .mypy.ini
├── .pre-commit-config.yaml
├── LICENSE
├── README.md
├── docs
    ├── Makefile
    ├── publish.sh
    └── source
    │   ├── README.md
    │   ├── api.rst
    │   ├── cli.rst
    │   ├── conf.py
    │   └── index.rst
├── examples
    ├── __pycache__
    │   └── use_cases.cpython-38.pyc
    ├── python_examples.py
    ├── readme_examples.py
    └── readme_hello_world.py
├── promptimize
    ├── __init__.py
    ├── cli.py
    ├── crawler.py
    ├── evals.py
    ├── prompt_cases.py
    ├── reports.py
    ├── simple_jinja.py
    ├── suite.py
    └── utils.py
├── publish.sh
├── pyproject.toml
├── requirements-dev.txt
├── requirements-examples.txt
├── requirements.txt
└── setup.py


/.flake8:
--------------------------------------------------------------------------------
1 | [flake8]
2 | max-line-length = 100
3 | exclude = tests/*
4 | max-complexity = 10
5 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | *__pycache__*
2 | build/
3 | dist/
4 | work/*
5 | promptimize.egg-info/
6 | 


--------------------------------------------------------------------------------
/.mypy.ini:
--------------------------------------------------------------------------------
1 | [mypy]
2 | ignore_missing_imports = True
3 | exclude = ['docs/']
4 | 


--------------------------------------------------------------------------------
/.pre-commit-config.yaml:
--------------------------------------------------------------------------------
 1 | #
 2 | # Licensed to the Apache Software Foundation (ASF) under one or more
 3 | # contributor license agreements.  See the NOTICE file distributed with
 4 | # this work for additional information regarding copyright ownership.
 5 | # The ASF licenses this file to You under the Apache License, Version 2.0
 6 | # (the "License"); you may not use this file except in compliance with
 7 | # the License.  You may obtain a copy of the License at
 8 | #
 9 | #    http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing, software
12 | # distributed under the License is distributed on an "AS IS" BASIS,
13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | # See the License for the specific language governing permissions and
15 | # limitations under the License.
16 | #
17 | repos:
18 |   #- repo: https://github.com/pre-commit/mirrors-mypy
19 |   #  rev: v0.941
20 |   #  hooks:
21 |   #    - id: mypy
22 |   #      additional_dependencies: [types-all]
23 |   - repo: https://github.com/pre-commit/pre-commit-hooks
24 |     rev: v4.4.0
25 |     hooks:
26 |       - id: check-added-large-files
27 |         exclude: \.(geojson)$
28 |       - id: check-yaml
29 |         exclude: ^helm/superset/templates/
30 |       - id: debug-statements
31 |       - id: end-of-file-fixer
32 |       - id: trailing-whitespace
33 |         args: ["--markdown-linebreak-ext=md"]
34 |   - repo: https://github.com/psf/black
35 |     rev: 23.9.1
36 |     hooks:
37 |       - id: black
38 |         language_version: python3
39 |   - repo: https://github.com/pre-commit/mirrors-prettier
40 |     rev: v3.0.3 # Use the sha or tag you want to point at
41 |     hooks:
42 |       - id: prettier
43 |         args: ["--ignore-path=./superset-frontend/.prettierignore"]
44 |         files: "superset-frontend"
45 |   # blacklist unsafe functions like make_url (see #19526)
46 |   - repo: https://github.com/skorokithakis/blacklist-pre-commit-hook
47 |     rev: e2f070289d8eddcaec0b580d3bde29437e7c8221
48 |     hooks:
49 |       - id: blacklist
50 |         args: ["--blacklisted-names=make_url", "--ignore=tests/"]
51 |   - repo: https://github.com/pycqa/flake8
52 |     rev: '6.1.0'
53 |     hooks:
54 |       - id: flake8
55 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
  1 | 
  2 |                                  Apache License
  3 |                            Version 2.0, January 2004
  4 |                         http://www.apache.org/licenses/
  5 | 
  6 |    TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
  7 | 
  8 |    1. Definitions.
  9 | 
 10 |       "License" shall mean the terms and conditions for use, reproduction,
 11 |       and distribution as defined by Sections 1 through 9 of this document.
 12 | 
 13 |       "Licensor" shall mean the copyright owner or entity authorized by
 14 |       the copyright owner that is granting the License.
 15 | 
 16 |       "Legal Entity" shall mean the union of the acting entity and all
 17 |       other entities that control, are controlled by, or are under common
 18 |       control with that entity. For the purposes of this definition,
 19 |       "control" means (i) the power, direct or indirect, to cause the
 20 |       direction or management of such entity, whether by contract or
 21 |       otherwise, or (ii) ownership of fifty percent (50%) or more of the
 22 |       outstanding shares, or (iii) beneficial ownership of such entity.
 23 | 
 24 |       "You" (or "Your") shall mean an individual or Legal Entity
 25 |       exercising permissions granted by this License.
 26 | 
 27 |       "Source" form shall mean the preferred form for making modifications,
 28 |       including but not limited to software source code, documentation
 29 |       source, and configuration files.
 30 | 
 31 |       "Object" form shall mean any form resulting from mechanical
 32 |       transformation or translation of a Source form, including but
 33 |       not limited to compiled object code, generated documentation,
 34 |       and conversions to other media types.
 35 | 
 36 |       "Work" shall mean the work of authorship, whether in Source or
 37 |       Object form, made available under the License, as indicated by a
 38 |       copyright notice that is included in or attached to the work
 39 |       (an example is provided in the Appendix below).
 40 | 
 41 |       "Derivative Works" shall mean any work, whether in Source or Object
 42 |       form, that is based on (or derived from) the Work and for which the
 43 |       editorial revisions, annotations, elaborations, or other modifications
 44 |       represent, as a whole, an original work of authorship. For the purposes
 45 |       of this License, Derivative Works shall not include works that remain
 46 |       separable from, or merely link (or bind by name) to the interfaces of,
 47 |       the Work and Derivative Works thereof.
 48 | 
 49 |       "Contribution" shall mean any work of authorship, including
 50 |       the original version of the Work and any modifications or additions
 51 |       to that Work or Derivative Works thereof, that is intentionally
 52 |       submitted to Licensor for inclusion in the Work by the copyright owner
 53 |       or by an individual or Legal Entity authorized to submit on behalf of
 54 |       the copyright owner. For the purposes of this definition, "submitted"
 55 |       means any form of electronic, verbal, or written communication sent
 56 |       to the Licensor or its representatives, including but not limited to
 57 |       communication on electronic mailing lists, source code control systems,
 58 |       and issue tracking systems that are managed by, or on behalf of, the
 59 |       Licensor for the purpose of discussing and improving the Work, but
 60 |       excluding communication that is conspicuously marked or otherwise
 61 |       designated in writing by the copyright owner as "Not a Contribution."
 62 | 
 63 |       "Contributor" shall mean Licensor and any individual or Legal Entity
 64 |       on behalf of whom a Contribution has been received by Licensor and
 65 |       subsequently incorporated within the Work.
 66 | 
 67 |    2. Grant of Copyright License. Subject to the terms and conditions of
 68 |       this License, each Contributor hereby grants to You a perpetual,
 69 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 70 |       copyright license to reproduce, prepare Derivative Works of,
 71 |       publicly display, publicly perform, sublicense, and distribute the
 72 |       Work and such Derivative Works in Source or Object form.
 73 | 
 74 |    3. Grant of Patent License. Subject to the terms and conditions of
 75 |       this License, each Contributor hereby grants to You a perpetual,
 76 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 77 |       (except as stated in this section) patent license to make, have made,
 78 |       use, offer to sell, sell, import, and otherwise transfer the Work,
 79 |       where such license applies only to those patent claims licensable
 80 |       by such Contributor that are necessarily infringed by their
 81 |       Contribution(s) alone or by combination of their Contribution(s)
 82 |       with the Work to which such Contribution(s) was submitted. If You
 83 |       institute patent litigation against any entity (including a
 84 |       cross-claim or counterclaim in a lawsuit) alleging that the Work
 85 |       or a Contribution incorporated within the Work constitutes direct
 86 |       or contributory patent infringement, then any patent licenses
 87 |       granted to You under this License for that Work shall terminate
 88 |       as of the date such litigation is filed.
 89 | 
 90 |    4. Redistribution. You may reproduce and distribute copies of the
 91 |       Work or Derivative Works thereof in any medium, with or without
 92 |       modifications, and in Source or Object form, provided that You
 93 |       meet the following conditions:
 94 | 
 95 |       (a) You must give any other recipients of the Work or
 96 |           Derivative Works a copy of this License; and
 97 | 
 98 |       (b) You must cause any modified files to carry prominent notices
 99 |           stating that You changed the files; and
100 | 
101 |       (c) You must retain, in the Source form of any Derivative Works
102 |           that You distribute, all copyright, patent, trademark, and
103 |           attribution notices from the Source form of the Work,
104 |           excluding those notices that do not pertain to any part of
105 |           the Derivative Works; and
106 | 
107 |       (d) If the Work includes a "NOTICE" text file as part of its
108 |           distribution, then any Derivative Works that You distribute must
109 |           include a readable copy of the attribution notices contained
110 |           within such NOTICE file, excluding those notices that do not
111 |           pertain to any part of the Derivative Works, in at least one
112 |           of the following places: within a NOTICE text file distributed
113 |           as part of the Derivative Works; within the Source form or
114 |           documentation, if provided along with the Derivative Works; or,
115 |           within a display generated by the Derivative Works, if and
116 |           wherever such third-party notices normally appear. The contents
117 |           of the NOTICE file are for informational purposes only and
118 |           do not modify the License. You may add Your own attribution
119 |           notices within Derivative Works that You distribute, alongside
120 |           or as an addendum to the NOTICE text from the Work, provided
121 |           that such additional attribution notices cannot be construed
122 |           as modifying the License.
123 | 
124 |       You may add Your own copyright statement to Your modifications and
125 |       may provide additional or different license terms and conditions
126 |       for use, reproduction, or distribution of Your modifications, or
127 |       for any such Derivative Works as a whole, provided Your use,
128 |       reproduction, and distribution of the Work otherwise complies with
129 |       the conditions stated in this License.
130 | 
131 |    5. Submission of Contributions. Unless You explicitly state otherwise,
132 |       any Contribution intentionally submitted for inclusion in the Work
133 |       by You to the Licensor shall be under the terms and conditions of
134 |       this License, without any additional terms or conditions.
135 |       Notwithstanding the above, nothing herein shall supersede or modify
136 |       the terms of any separate license agreement you may have executed
137 |       with Licensor regarding such Contributions.
138 | 
139 |    6. Trademarks. This License does not grant permission to use the trade
140 |       names, trademarks, service marks, or product names of the Licensor,
141 |       except as required for reasonable and customary use in describing the
142 |       origin of the Work and reproducing the content of the NOTICE file.
143 | 
144 |    7. Disclaimer of Warranty. Unless required by applicable law or
145 |       agreed to in writing, Licensor provides the Work (and each
146 |       Contributor provides its Contributions) on an "AS IS" BASIS,
147 |       WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
148 |       implied, including, without limitation, any warranties or conditions
149 |       of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
150 |       PARTICULAR PURPOSE. You are solely responsible for determining the
151 |       appropriateness of using or redistributing the Work and assume any
152 |       risks associated with Your exercise of permissions under this License.
153 | 
154 |    8. Limitation of Liability. In no event and under no legal theory,
155 |       whether in tort (including negligence), contract, or otherwise,
156 |       unless required by applicable law (such as deliberate and grossly
157 |       negligent acts) or agreed to in writing, shall any Contributor be
158 |       liable to You for damages, including any direct, indirect, special,
159 |       incidental, or consequential damages of any character arising as a
160 |       result of this License or out of the use or inability to use the
161 |       Work (including but not limited to damages for loss of goodwill,
162 |       work stoppage, computer failure or malfunction, or any and all
163 |       other commercial damages or losses), even if such Contributor
164 |       has been advised of the possibility of such damages.
165 | 
166 |    9. Accepting Warranty or Additional Liability. While redistributing
167 |       the Work or Derivative Works thereof, You may choose to offer,
168 |       and charge a fee for, acceptance of support, warranty, indemnity,
169 |       or other liability obligations and/or rights consistent with this
170 |       License. However, in accepting such obligations, You may act only
171 |       on Your own behalf and on Your sole responsibility, not on behalf
172 |       of any other Contributor, and only if You agree to indemnify,
173 |       defend, and hold each Contributor harmless for any liability
174 |       incurred by, or claims asserted against, such Contributor by reason
175 |       of your accepting any such warranty or additional liability.
176 | 
177 |    END OF TERMS AND CONDITIONS
178 | 
179 |    APPENDIX: How to apply the Apache License to your work.
180 | 
181 |       To apply the Apache License to your work, attach the following
182 |       boilerplate notice, with the fields enclosed by brackets "[]"
183 |       replaced with your own identifying information. (Don't include
184 |       the brackets!)  The text should be enclosed in the appropriate
185 |       comment syntax for the file format. We also recommend that a
186 |       file or class name and description of purpose be included on the
187 |       same "printed page" as the copyright notice for easier
188 |       identification within third-party archives.
189 | 
190 |    Copyright [yyyy] [name of copyright owner]
191 | 
192 |    Licensed under the Apache License, Version 2.0 (the "License");
193 |    you may not use this file except in compliance with the License.
194 |    You may obtain a copy of the License at
195 | 
196 |        http://www.apache.org/licenses/LICENSE-2.0
197 | 
198 |    Unless required by applicable law or agreed to in writing, software
199 |    distributed under the License is distributed on an "AS IS" BASIS,
200 |    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
201 |    See the License for the specific language governing permissions and
202 |    limitations under the License.
203 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # 💡 ¡promptimize! 💡
  2 | [![License](https://img.shields.io/badge/License-Apache%202.0-blue.svg)](https://opensource.org/licenses/Apache-2.0)
  3 | [![PyPI version](https://badge.fury.io/py/promptimize.svg)](https://badge.fury.io/py/promptimize)
  4 | 
  5 | <img src="https://user-images.githubusercontent.com/487433/229948453-36cbc2d1-e71f-4e87-9111-ab428bc96f4c.png" width=300/>
  6 | 
  7 | Promptimize is a prompt engineering **evaluation** and **testing** toolkit.
  8 | 
  9 | It accelerates and provides structure around prompt engineering at scale
 10 | with confidence, bringing some of the ideas behind test-driven
 11 | development (TDD) to engineering prompts.
 12 | 
 13 | With promptimize, you can:
 14 | 
 15 | - Define your "prompt cases" (think "test cases" but specific to evaluating
 16 |   prompts) as code and associate them with evaluation functions
 17 | - Generate prompt variations dynamically
 18 | - Execute and rank prompts test suites across different
 19 |   engines/models/temperature/settings and compare results, brining
 20 |   the hyperparameter tuning mindset to prompt engineering
 21 | - Get reports on your prompts' performance as you iterate. Answer question
 22 |   around how different prompt suites are performing against one-another.
 23 |   Which individual cases or categories of cases improved? regressed?
 24 | - Minimize API calls! only re-assess what changed as you change it
 25 | - Perform human if and where needed, introspected failed cases, overriding
 26 |   false negatives
 27 | 
 28 | In essence, promptimize provides a programmatic way to execute and fine-tune
 29 | your prompts and evaluation functions in Python, allowing you to iterate
 30 | quickly and with confidence.
 31 | 
 32 | ## Hello world - the simplest prompt examples
 33 | [more examples on GitHub](https://github.com/preset-io/promptimize/tree/main/examples)
 34 | ```python
 35 | # Brining some "prompt generator" classes - note that you can derive and extend those
 36 | from promptimize.prompts import PromptCase
 37 | 
 38 | # Bringing some useful eval function that help evaluating and scoring responses
 39 | # eval functions have a handle on the prompt object and are expected
 40 | # to return a score between 0 and 1
 41 | from promptimize import evals
 42 | 
 43 | # Promptimize will scan the target folder and find all Prompt objects
 44 | # and derivatives that are in the python modules
 45 | simple_prompts = [
 46 | 
 47 |     # Prompting "hello there" and making sure there's "hi" or "hello"
 48 |     # somewhere in the answer
 49 |     PromptCase("hello there!", lambda x: evals.any_word(x, ["hi", "hello"])),
 50 |     PromptCase(
 51 |         "name the top 50 guitar players!", lambda x: evals.all_words(x, ["frank zappa"])
 52 |     ),
 53 | ]
 54 | ```
 55 | 
 56 | ### The CLI's `run` command
 57 | ```
 58 | $ promptimize run --help
 59 | Usage: promptimize run [OPTIONS] PATH
 60 | 
 61 |   run some prompts
 62 | 
 63 | Options:
 64 |   -v, --verbose             Trigger more verbose output
 65 |   -f, --force               Force run, do not skip
 66 |   -h, --human               Human review, allowing a human to review and force
 67 |                             pass/fail each prompt case
 68 |   -r, --repair              Only re-run previously failed
 69 |   -x, --dry-run             DRY run, don't call the API
 70 |   --shuffle                 Shuffle the prompts in a random order
 71 |   -s, --style [json|yaml]   json or yaml formatting
 72 |   -m, --max-tokens INTEGER  max_tokens passed to the model
 73 |   -l, --limit INTEGER       limit how many prompt cases to run in a single
 74 |                             batch
 75 |   -t, --temperature FLOAT   max_tokens passed to the model
 76 |   -e, --engine TEXT         model as accepted by the openai API
 77 |   -k, --key TEXT            The keys to run
 78 |   -o, --output PATH
 79 |   -s, --silent
 80 | ```
 81 | 
 82 | Let's run those examples and produce a report `./report.yaml`
 83 | ```
 84 | $ promptimize run examples/ --output ./report.yaml
 85 | ```
 86 | 
 87 | ```yaml
 88 | 💡 ¡promptimize! 💡
 89 | # ----------------------------------------
 90 | # (1/2) [RUN] prompt: prompt-115868ef
 91 | # ----------------------------------------
 92 | key: prompt-115868ef
 93 | user_input: hello there!
 94 | prompt_hash: 115868ef
 95 | response: Hi there! How are you doing today?
 96 | execution:
 97 |   api_call_duration_ms: 883.8047981262207
 98 |   run_at: '2023-04-25T02:21:40.443077'
 99 |   score: 1.0
100 | 
101 | # ----------------------------------------
102 | # (2/2) [RUN] prompt: prompt-5c085656
103 | # ----------------------------------------
104 | key: prompt-5c085656
105 | user_input: name the top 10 guitar players!
106 | prompt_hash: 5c085656
107 | response: |-
108 |   1. Jimi Hendrix
109 |   2. Eric Clapton
110 |   {{ ... }}
111 |   11. Carlos Santana
112 | weight: 2
113 | execution:
114 |   api_call_duration_ms: 2558.135747909546
115 |   run_at: '2023-04-25T02:21:43.007529'
116 |   score: 0.0
117 | 
118 | # ----------------------------------------
119 | # Suite summary
120 | # ----------------------------------------
121 | suite_score: 0.3333333333333333
122 | git_info:
123 |   sha: 2cf28498ba0f
124 |   branch: main
125 |   dirty: true
126 | ```
127 | 
128 | ## Problem + POV
129 | 
130 | Thousands of product builders are currently trying to figure out how to
131 | bring the power of AI into the products and experiences they are building.
132 | The probabilistic (often semi-random, sometimes hectic) nature of LLMs
133 | makes this a challenge.
134 | 
135 | Prompt engineering is a huge piece of the puzzle in terms of how to do this
136 | right, especially given the complexity, risks, and drawbacks around
137 | model tuning.
138 | 
139 | We believe product builders need to tame AI through proper, rigorous
140 | **prompt engineering**. This allows making the probabilistic nature of
141 | AI more deterministic, or somewhat predictable, and allows builders to apply
142 | a hyperparameter tuning-type mindset and approach to prompt engineering.
143 | 
144 | Any prompt-generator logic that's going to be let loose in the wild inside
145 | a product should be thoroughly tested and evaluated with "prompt cases" that
146 | cover the breath of what people may do in a product.
147 | 
148 | In short, Promptimize allows you to test prompts at industrial scale,
149 | so that you can confidently use them in the products you are building.
150 | 
151 | ## Information Architecture
152 | 
153 | - **Prompt:** A Prompt instance is a certain test case, a single prompt
154 |   with an associated set of evaluation functions to rate its success.
155 | - **Evaluation:** An evaluation function that reads the response and returns
156 |   a success rate between `0` and `1`.
157 | - **Suite:** A Suite is a collection of Prompt; it's able to run things,
158 |   accumulate results, and print reports about its collection of use cases.
159 | - **Report**: a report is the compiled results of running a certain prompt
160 |   `Suite` or set of suites. Reports can be consumed, compared, and expanded.
161 | 
162 | ## Principles
163 | 
164 | - **Configuration as code:** All prompt cases, suites, and evaluations are
165 |   defined as code, which makes it easy to dynamically generate all sorts
166 |   of use cases and suites.
167 | - **Expressive**: a clean DSL that's to-the-point -> user prompt + assertions.
168 |   the actually prompt creation logic lives in the derivative class of `PromptCase`,
169 |   so that we can have clean, dense files that contain nice `Suite`s
170 | - **Support the iteration mindset:** making it easy for people to try things,
171 |   get suggestions from the AI, adapt, compare, and push forward
172 | - **Extensibility:** the toolkit is designed to be extremely hackable and
173 |   extensible. Hooks, extensions, high API surface.
174 | - **AI-powered:** the framework offers ways to expand your suites based
175 |   on the examples that exists. Use AI to generate more prompt cases!
176 | 
177 | 
178 | ## Interesting features / facts
179 | 
180 | Listing out a few features you should know about that you can start using as your
181 | suites of prompts become larger / more complex
182 | 
183 | * evaluation functions are assumed to return a value between 0 and 1.
184 |   contrarily to unit tests, prompt cases aren't boolean
185 | * prompts can be assigned a `weight` (default 1) this enables you to define
186 |   which prompts are more important than others for reporting purposes and suite evaluation
187 | * prompts can be assigned a `category`, this can be used in the reporting.
188 |   That helps understanding which categories are performing better than
189 |   others, or are most affected by iterations
190 | * The `Prompt` class `pre_run` and `post_run` hooks if you want to do
191 |   post-processing for instance. An example of that would be if you do a prompt
192 |   that expects GPT to generate code, and you'd like actually say run that code
193 |   and test it. In our SQL implementation, we run the SQL against the database
194 |   for instance and get a pandas dataframe back, and allow doing assertions
195 |   on the dataframe itself
196 | 
197 | 
198 | 
199 | ## Getting started
200 | 
201 | To install the Promptimize package, use the following command:
202 | ```bash
203 | pip install promptimize
204 | ```
205 | 
206 | First you'll need an openai API key, let's set it as an env var
207 | ```bash
208 | export OPENAI_API_KEY=sk-{{ REDACTED }}
209 | ```
210 | 
211 | Find the examples executed below
212 | [here](https://github.com/preset-io/promptimize/blob/main/examples/readme_examples.py)
213 | 
214 | ```bash
215 | # Clone the repo
216 | git clone git@github.com:preset-io/promptimize.git
217 | cd promptimize
218 | 
219 | # NOTE: CLI is `promptimize`, but `p9e` is a shorter synonym, can be used interchangibly
220 | # First let's run some of the examples
221 | p9e run ./examples
222 | 
223 | # Now the same but with verbose output
224 | p9e run ./examples --verbose --output ./report.yaml
225 | 
226 | ```
227 | ## Langchain
228 | 
229 | How does promptimize relate to `langchain`?
230 | 
231 | We think langchain is amazing and promptimize uses langchain under the
232 | hood to interact with openai, and has integration with langchain
233 | (see `LangchainPromptCase`, and the upcoming `LangchainChainPromptCase`
234 | and `LangchainAgntPromptCase`)
235 | While you don't have to use
236 | langchain, and could use promptimize on top of any python prompt generation
237 | whether it'd be another library or some home grown thing.
238 | 
239 | 
240 | ## Context
241 | 
242 | <img src="https://user-images.githubusercontent.com/487433/230508578-456a7040-1184-433a-a555-dceb7c28c32c.png" width="75" title="Max"/>
243 | 
244 | Where is `promptimize` coming from!? I'm (Maxime Beauchemin) a startup
245 | founder at <a href="https://www.preset.io">Preset</a> working on brining AI to BI
246 | (data exploration,
247 | and visualization). At Preset, we use `promptimize` to generate
248 | complex SQL based on natural language, and to suggest charts to users. We
249 | derive the `SimpleQuery` class to make it fitted to our specific use
250 | cases in our own prompt engineering repo. It's not my first open source project
251 | as the creator of
252 | [Apache Superset](https://github.com/apache/superset/) and
253 | [Apache Airflow](https://github.com/apache/airflow/)
254 | 
255 | 
256 | ## Contribute
257 | 
258 | This project is in its super early stages as of `0.2.0`, and contributions,
259 | contributors, and maintainers are highly encouraged. While it's a great time
260 | to onboard and influence the direction of the project, things are still
261 | evolving quickly. To get involved, open a GitHub issue
262 | or submit a pull request!
263 | 
264 | ## Links
265 | * [Blog - Mastering AI-Powered Product Development: Introducing Promptimize for Test-Driven Prompt Engineering](https://preset.io/blog/introducing-promptimize/)
266 | * [Preset Blog](https://preset.io/blog/)
267 | * [Promptimize DOCS](https://preset-io.github.io/promptimize/)
268 | 


--------------------------------------------------------------------------------
/docs/Makefile:
--------------------------------------------------------------------------------
 1 | # Minimal makefile for Sphinx documentation
 2 | #
 3 | 
 4 | # You can set these variables from the command line, and also
 5 | # from the environment for the first two.
 6 | SPHINXOPTS    ?=
 7 | SPHINXBUILD   ?= sphinx-build
 8 | SOURCEDIR     = source
 9 | BUILDDIR      = build
10 | 
11 | # Put it first so that "make" without argument is like "make help".
12 | help:
13 | 	@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
14 | 
15 | .PHONY: help Makefile
16 | 
17 | # Catch-all target: route all unknown targets to Sphinx using the new
18 | # "make mode" option.  $(O) is meant as a shortcut for $(SPHINXOPTS).
19 | %: Makefile
20 | 	cp $(SOURCEDIR)/../../README.md $(SOURCEDIR)/
21 | 	@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
22 | 


--------------------------------------------------------------------------------
/docs/publish.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # Clean up the build/html directory
 4 | rm -rf build/html
 5 | 
 6 | # Build the documentation
 7 | make html
 8 | 
 9 | # Change to the build/html directory
10 | cd build/html
11 | touch .nojekyll
12 | 
13 | # Initialize a new Git repository
14 | git init
15 | 
16 | # Add the generated files to the repository
17 | git add .
18 | 
19 | # Commit the changes
20 | git commit -a -m "Initial commit"
21 | 
22 | # Add the GitHub Pages remote repository
23 | git remote add origin https://github.com/preset-io/promptimize.git
24 | 
25 | # Push the changes to the GitHub Pages repository
26 | git push -f origin main:gh-pages
27 | 
28 | # Return to the original directory
29 | cd ../..
30 | 


--------------------------------------------------------------------------------
/docs/source/README.md:
--------------------------------------------------------------------------------
  1 | # 💡 ¡promptimize! 💡
  2 | <img src="https://user-images.githubusercontent.com/487433/229948453-36cbc2d1-e71f-4e87-9111-ab428bc96f4c.png" width=300/>
  3 | 
  4 | Promptimize is a prompt engineering evaluation and testing toolkit.
  5 | 
  6 | It accelerates and provides structure around prompt engineering at scale
  7 | with confidence, brigning some of the ideas behind test-driven
  8 | developmet (TDD) to engineering prompts.
  9 | 
 10 | With promptimize, you can:
 11 | 
 12 | - Define your "prompt cases" (think "test cases" but specific to evaluating
 13 |   prompts) as code and associate them with evaluation functions
 14 | - Generate prompt variations dynamically
 15 | - Execute and rank prompts test suites across different
 16 |   engines/models/temperature/settings and compare results, brining
 17 |   the hyperparameter tuning mindset to prompt engineering
 18 | - Get reports on your prompts' performance as you iterate. Answer question
 19 |   around how different prompt suites are performing against one-another.
 20 |   Which individual cases or categories of cases improved? regressed?
 21 | - Minimize API calls! only re-assess what changed as you change it
 22 | - Perform human if and where needed, introspected failed cases, overriding
 23 |   false negatives
 24 | 
 25 | In essence, promptimize provides a programmatic way to execute and fine-tune
 26 | your prompts and evaluation functions in Python, allowing you to iterate
 27 | quickly and with confidence.
 28 | 
 29 | ## Hello world - the simplest prompt examples
 30 | [more examples on GitHub](https://github.com/preset-io/promptimize/tree/master/examples)
 31 | ```python
 32 | # Brining some "prompt generator" classes - note that you can derive and extend those
 33 | from promptimize.prompts import SimplePrompt
 34 | 
 35 | # Bringing some useful eval function that help evaluating and scoring responses
 36 | # eval functions have a handle on the prompt object and are expected
 37 | # to return a score between 0 and 1
 38 | from promptimize import evals
 39 | 
 40 | # Promptimize will scan the target folder and find all Prompt objects
 41 | # and derivatives that are in the python modules
 42 | simple_prompts = [
 43 | 
 44 |     # Prompting "hello there" and making sure there's "hi" or "hello"
 45 |     # somewhere in the answer
 46 |     PromptCase("hello there!", lambda x: evals.any_word(x, ["hi", "hello"])),
 47 |     PromptCase(
 48 |         "name the top 50 guitar players!", lambda x: evals.all_words(x, ["frank zappa"])
 49 |     ),
 50 | ]
 51 | ```
 52 | 
 53 | ### The CLI
 54 | ```bash
 55 | $ promptimize -h
 56 | ```
 57 | 
 58 | 
 59 | ## Problem + POV
 60 | 
 61 | Thousands of product builders are currently trying to figure out how to
 62 | bring the power of AI into the products and experiences they are building.
 63 | The probabilistic (often semi-random, sometimes hectic) nature of LLMs
 64 | makes this a challenge.
 65 | 
 66 | Prompt engineering is a huge piece of the puzzle in terms of how to do this
 67 | right, especially given the complexity, risks, and drawbacks around
 68 | model tuning.
 69 | 
 70 | We believe product builders need to tame AI through proper, rigorous
 71 | **prompt engineering**. This allows making the probabilistic nature of
 72 | AI more deterministic, or somewhat predictable, and allows builders to apply
 73 | a hyperparameter tuning-type mindset and approach to prompt engineering.
 74 | 
 75 | Any prompt-generator logic that's going to be let loose in the wild inside
 76 | a product should be thoroughly tested and evaluated with "prompt cases" that
 77 | cover the breath of what people may do in a product.
 78 | 
 79 | In short, Promptimize allows you to test prompts at industrial scale,
 80 | so that you can confidently use them in the products you are building.
 81 | 
 82 | ## Information Architecture
 83 | 
 84 | - **Prompt:** A Prompt instance is a certain test case, a single prompt
 85 |   with an associated set of evaluation functions to rate its success.
 86 | - **Evaluation:** An evaluation function that reads the response and returns
 87 |   a success rate between `0` and `1`.
 88 | - **Suite:** A Suite is a collection of Prompt; it's able to run things,
 89 |   accumulate results, and print reports about its collection of use cases.
 90 | - **Report**: a report is the compiled results of running a certain prompt
 91 |   `Suite` or set of suites. Reports can be consumed, compared, and expanded.
 92 | 
 93 | ## Principles
 94 | 
 95 | - **Configuration as code:** All prompt cases, suites, and evaluations are
 96 |   defined as code, which makes it easy to dynamically generate all sorts
 97 |   of use cases and suites.
 98 | - **Expressive**: a clean DSL that's to-the-point -> user prompt + assertions.
 99 |   the actually prompt creation logic lives in the derivative class of `PromptCase`,
100 |   so that we can have clean, dense files that contain nice `Suite`s
101 | - **Support the iteration mindset:** making it easy for people to try things,
102 |   get suggestions from the AI, adapt, compare, and push forward
103 | - **Extensibility:** the toolkit is designed to be extremely hackable and
104 |   extensible. Hooks, extensions, high API surface.
105 | - **AI-powered:** the framework offers ways to expand your suites based
106 |   on the examples that exists. Use AI to generate more prompt cases!
107 | 
108 | 
109 | ## Interesting features / facts
110 | 
111 | Listing out a few features you should know about that you can start using as your
112 | suites of prompts become larger / more complex
113 | 
114 | * evaluation functions are assumed to return a value between 0 and 1.
115 |   contrarily to unit tests, prompt cases aren't boolean
116 | * prompts can be assigned a `weight` (default 1) this enables you to define
117 |   which prompts are more important than others for reporting purposes and suite evaluation
118 | * prompts can be assigned a `category`, this can be used in the reporting.
119 |   That helps understanding which categories are performing better than
120 |   others, or are most affected by iterations
121 | * The `Prompt` class `pre_run` and `post_run` hooks if you want to do
122 |   post-processing for instance. An example of that would be if you do a prompt
123 |   that expects GPT to generate code, and you'd like actually say run that code
124 |   and test it. In our SQL implementation, we run the SQL against the database
125 |   for instance and get a pandas dataframe back, and allow doing assertions
126 |   on the dataframe itself
127 | 
128 | 
129 | ## Setup
130 | 
131 | To install the Promptimize package, use the following command:
132 | ```bash
133 | pip install promptimize
134 | ```
135 | 
136 | ## Getting started
137 | 
138 | First you'll need an openai API key, let's set it as an env var
139 | ```bash
140 | export OPENAI_API_KEY=sk-{REDACTED}
141 | ```
142 | 
143 | Find the examples bellow [here](https://github.com/preset-io/promptimize/blob/master/examples/readme_examples.py)
144 | 
145 | ```python
146 | ```
147 | ```bash
148 | # NOTE: CLI is `promptimize`, but `p9e` is a shorter synonym, can be used interchangibly
149 | 
150 | # First let's run some of the examples
151 | p9e run ./examples
152 | 
153 | # Now the same but with verbose output
154 | p9e run ./examples --verbose
155 | 
156 | ```
157 | ## Langchain?
158 | 
159 | How does promptimize relate to `langchain`?
160 | 
161 | We think langchain is amazing and promptimize uses langchain under the
162 | hood to interact with openai, and has integration with langchain
163 | (see `LangchainPromptCase`, and the upcoming `LangchainChainPromptCase`
164 | and `LangchainAgntPromptCase`)
165 | While you don't have to use
166 | langchain, and could use promptimize on top of any python prompt generation
167 | whether it'd be another library or some home grown thing.
168 | 
169 | 
170 | ## Context
171 | 
172 | <img src="https://user-images.githubusercontent.com/487433/230508578-456a7040-1184-433a-a555-dceb7c28c32c.png" width="75" title="Max"/>
173 | 
174 | Where is `promptimize` coming from!? I'm (Maxime Beauchemin) a startup
175 | founder at <a href="www.preset.io">Preset</a> working on brining AI to BI
176 | (data exploration,
177 | and visualization). At Preset, we use `promptimize` to generate
178 | complex SQL based on natural language, and to suggest charts to users. We
179 | derive the `SimpleQuery` class to make it fitted to our specific use
180 | cases in our own prompt engineering repo. Not my first open source project
181 | as the creator of
182 | [Apache Superset](https://github.com/apache/superset/) and
183 | [Apache Airflow](https://github.com/apache/airflow/)
184 | 
185 | 
186 | ## Contribute
187 | 
188 | This project is in its super early stages as of `0.1.0`, and contributions,
189 | contributors, and maintainers are highly encouraged. While it's a great time
190 | to onboard and influence the direction of the project, things are still
191 | evolving quickly. To get involved, open a GitHub issue
192 | or submit a pull request!
193 | 
194 | ## Links
195 | * [Blog - Mastering AI-Powered Product Development: Introducing Promptimize for Test-Driven Prompt Engineering](https://preset.io/blog/)
196 | * [Preset Blog](https://preset.io/blog/)
197 | 


--------------------------------------------------------------------------------
/docs/source/api.rst:
--------------------------------------------------------------------------------
 1 | API
 2 | =============
 3 | 
 4 | PromptCase
 5 | ----------
 6 | 
 7 | .. automodule:: promptimize.prompt_cases
 8 |     :members:
 9 | 
10 | Suite
11 | -----
12 | 
13 | .. automodule:: promptimize.suite
14 |     :members:
15 | 
16 | Report
17 | ------
18 | 
19 | .. automodule:: promptimize.reports
20 |     :members:
21 | 
22 | Evaluators
23 | ----------
24 | 
25 | .. automodule:: promptimize.evals
26 |     :members:
27 | 
28 | Utils
29 | -----
30 | 
31 | .. automodule:: promptimize.utils
32 |     :members:
33 | 


--------------------------------------------------------------------------------
/docs/source/cli.rst:
--------------------------------------------------------------------------------
1 | The CLI
2 | =======
3 | 
4 | .. click:: promptimize:cli
5 |      :prog: promptimize
6 |      :show-nested:
7 | 


--------------------------------------------------------------------------------
/docs/source/conf.py:
--------------------------------------------------------------------------------
 1 | # Configuration file for the Sphinx documentation builder.
 2 | #
 3 | # For the full list of built-in configuration values, see the documentation:
 4 | # https://www.sphinx-doc.org/en/master/usage/configuration.html
 5 | 
 6 | # -- Project information -----------------------------------------------------
 7 | # https://www.sphinx-doc.org/en/master/usage/configuration.html#project-information
 8 | from typing import List
 9 | 
10 | project = "promptimize"
11 | copyright = "2023, Maxime Beauchemin"
12 | author = "Maxime Beauchemin"
13 | 
14 | # -- General configuration ---------------------------------------------------
15 | # https://www.sphinx-doc.org/en/master/usage/configuration.html#general-configuration
16 | 
17 | extensions: List = [
18 |     "sphinx.ext.autodoc",
19 |     "sphinx.ext.napoleon",  # Optional, for Google and NumPy-style docstrings
20 |     "recommonmark",  # If you're using the recommonmark extension
21 |     "sphinx_click.ext",
22 | ]
23 | 
24 | 
25 | templates_path = ["_templates"]
26 | exclude_patterns: List = []
27 | 
28 | 
29 | # -- Options for HTML output -------------------------------------------------
30 | # https://www.sphinx-doc.org/en/master/usage/configuration.html#options-for-html-output
31 | 
32 | html_theme = "alabaster"
33 | html_logo = (
34 |     "https://user-images.githubusercontent.com/"
35 |     "487433/229948453-36cbc2d1-e71f-4e87-9111-ab428bc96f4c.png"
36 | )
37 | html_static_path = ["_static"]
38 | 


--------------------------------------------------------------------------------
/docs/source/index.rst:
--------------------------------------------------------------------------------
 1 | .. include:: README.md
 2 |     :parser: markdown
 3 | 
 4 | .. toctree::
 5 |    :maxdepth: 2
 6 |    :caption: Content
 7 | 
 8 |    api
 9 |    cli
10 | 


--------------------------------------------------------------------------------
/examples/__pycache__/use_cases.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/preset-io/promptimize/9c1ffb32444ea69eae158d2cff72786ee07db1bd/examples/__pycache__/use_cases.cpython-38.pyc


--------------------------------------------------------------------------------
/examples/python_examples.py:
--------------------------------------------------------------------------------
  1 | """An example of how to test Python code generating prompts"""
  2 | import re
  3 | 
  4 | # Brining some "prompt generator" classes
  5 | from promptimize.prompt_cases import LangchainPromptCase
  6 | 
  7 | # Bringing some useful eval function that help evaluating and scoring responses
  8 | # eval functions have a handle on the prompt object and are expected
  9 | # to return a score between 0 and 1
 10 | from langchain import PromptTemplate
 11 | from langchain.output_parsers import StructuredOutputParser, ResponseSchema
 12 | import demjson
 13 | 
 14 | from RestrictedPython import compile_restricted, safe_globals, safe_builtins
 15 | from RestrictedPython.Guards import guarded_unpack_sequence
 16 | from RestrictedPython.Eval import default_guarded_getiter
 17 | 
 18 | 
 19 | response_schemas = [
 20 |     ResponseSchema(
 21 |         name="python_function",
 22 |         description="the python function itself",
 23 |     ),
 24 |     ResponseSchema(
 25 |         name="functon_name",
 26 |         description="the name of the function",
 27 |     ),
 28 |     ResponseSchema(name="test_cases", description="test cases"),
 29 |     ResponseSchema(
 30 |         name="hints",
 31 |         description="if any, any recommendations to the users about clarifying their prompt",
 32 |     ),
 33 | ]
 34 | 
 35 | output_parser = StructuredOutputParser.from_response_schemas(response_schemas)
 36 | format_instructions = output_parser.get_format_instructions().replace("\t", "  ")
 37 | """
 38 | * you include great useful docstrings and doctests that follow the Google conventions
 39 | """
 40 | 
 41 | template = """\
 42 | System: you are an AI that writes python function that accomplish specific tasks
 43 | 
 44 | Python guidelines:
 45 | * you follow the PEP8 conventions
 46 | * use 4 spaces indent, no tabs!
 47 | * use snake case (using underscores)
 48 | 
 49 | The output should be a VALID JSON blob with the following keys:
 50 | * "python_function" as a string with the python function code
 51 | * "function_name" as the name of the function
 52 | * "hints": as some hints about how to use the function
 53 | 
 54 | User: write a function that multipllies a number by 2 and returns the result
 55 | 
 56 | System:
 57 | {
 58 |     "python_function": "def multiply_by_two(number):\\n    return number * 2\\n"
 59 | 	"function_name": "multiply_by_two",
 60 | 	"hints": "This function is not that helpful as you can simply mulitply by two\\ninstead of calling this function"
 61 | }
 62 | 
 63 | User: {{ user_input }}
 64 | 
 65 | System:
 66 | """  # noqa
 67 | 
 68 | lc_template = PromptTemplate(
 69 |     input_variables=["user_input"],
 70 |     partial_variables={"format_instructions": format_instructions},
 71 |     template=template,
 72 |     template_format="jinja2",
 73 | )
 74 | 
 75 | 
 76 | def function_from_string(function_as_string, function_name):
 77 |     restricted_code = compile_restricted(function_as_string, "<inline code>", "exec")
 78 | 
 79 |     # Define a separate environment for the code to run in
 80 |     execution_globals = safe_globals.copy()
 81 |     execution_globals.update(
 82 |         {
 83 |             "__builtins__": safe_builtins,
 84 |             "_unpack_sequence_": guarded_unpack_sequence,
 85 |             "_getiter_": default_guarded_getiter,
 86 |         }
 87 |     )
 88 | 
 89 |     # Execute the code in the restricted environment
 90 |     exec(restricted_code, execution_globals)
 91 | 
 92 |     # Access the function from the restricted environment
 93 |     return execution_globals[function_name]
 94 | 
 95 | 
 96 | def test(func, args, expected_result):
 97 |     if func:
 98 |         if not isinstance(args, (list, tuple)):
 99 |             args = [args]
100 |         try:
101 |             result = func(*args)
102 |             if expected_result == result:
103 |                 return 1
104 |         except Exception:
105 |             return 0
106 |     return 0
107 | 
108 | 
109 | def decode_shitty_json(s):
110 |     json_match = re.search(r"\{[\s\S]*\}", s)
111 | 
112 |     if json_match:
113 |         json_string = json_match.group()
114 | 
115 |         # Parse the JSON string using demjson
116 |         json_data = demjson.decode(json_string)
117 | 
118 |         return json_data
119 |     return None
120 | 
121 | 
122 | def test_is_prime(prompt_case, val, exp):
123 |     return test(prompt_case.python_function, val, exp)
124 | 
125 | 
126 | class PythonGeneratorPrompt(LangchainPromptCase):
127 |     def post_run(self):
128 |         success = False
129 |         self.python_function = None
130 |         self.f = None
131 |         try:
132 |             self.response = decode_shitty_json(self.response)
133 |             success = True
134 |         except Exception as e:
135 |             self.error = str(e)
136 | 
137 |         if success:
138 |             # try:
139 |             f = function_from_string(
140 |                 self.response.get("python_function"), self.response.get("function_name")
141 |             )
142 |             self.python_function = f
143 |             self.f = f
144 |             # except Exception as e:
145 |             # self.error = str(e)
146 | 
147 | 
148 | prompts = [
149 |     PythonGeneratorPrompt(
150 |         lc_template,
151 |         key="is_prime",
152 |         user_input="write a function that tests if an number is a prime number, returns a boolean",
153 |         evaluators=[
154 |             lambda x: test(x.f, 2, True),
155 |             lambda x: test(x.f, 4, False),
156 |             lambda x: test(x.f, 7, True),
157 |             lambda x: test(x.f, 10, False),
158 |             lambda x: test(x.f, 11, True),
159 |             lambda x: test(x.f, 113, True),
160 |         ],
161 |     ),
162 |     PythonGeneratorPrompt(
163 |         lc_template,
164 |         key="gcd",
165 |         user_input="write a function that finds the greatest common divisor (GCD) of two numbers?",
166 |         evaluators=[
167 |             lambda x: test(x.f, [14, 28], 14),
168 |             lambda x: test(x.f, [56, 98], 14),
169 |             lambda x: test(x.f, [81, 153], 9),
170 |         ],
171 |     ),
172 |     PythonGeneratorPrompt(
173 |         lc_template,
174 |         key="factorial",
175 |         user_input="write a function that calculates the factorial of a given number",
176 |         evaluators=[
177 |             lambda x: test(x.f, 0, 1),
178 |             lambda x: test(x.f, 1, 1),
179 |             lambda x: test(x.f, 5, 120),
180 |             lambda x: test(x.f, 7, 5040),
181 |             lambda x: test(x.f, 10, 3628800),
182 |         ],
183 |     ),
184 |     PythonGeneratorPrompt(
185 |         lc_template,
186 |         key="is_palindrome",
187 |         user_input="write a function that determines if a given string is a palindrome",
188 |         evaluators=[
189 |             lambda x: test(x.f, "racecar", True),
190 |             lambda x: test(x.f, "hello", False),
191 |             lambda x: test(x.f, "madam", True),
192 |             lambda x: test(x.f, "python", False),
193 |             lambda x: test(x.f, "Aibohphobia", True),
194 |         ],
195 |     ),
196 |     PythonGeneratorPrompt(
197 |         lc_template,
198 |         key="fibonacci",
199 |         user_input=(
200 |             "write a function that generates the Fibonacci sequence ",
201 |             "up to a specified number of terms",
202 |         ),
203 |         evaluators=[
204 |             lambda x: test(x.f, 1, [0]),
205 |             lambda x: test(x.f, 2, [0, 1]),
206 |             lambda x: test(x.f, 5, [0, 1, 1, 2, 3]),
207 |             lambda x: test(x.f, 10, [0, 1, 1, 2, 3, 5, 8, 13, 21, 34]),
208 |             lambda x: test(x.f, 7, [0, 1, 1, 2, 3, 5, 8]),
209 |         ],
210 |     ),
211 |     PythonGeneratorPrompt(
212 |         lc_template,
213 |         key="sum_of_multiples",
214 |         user_input=(
215 |             "write a function that calculates the sum of all multiples ",
216 |             "of 3 and 5 below a given number",
217 |         ),
218 |         evaluators=[
219 |             lambda x: test(x.f, 10, 23),
220 |             lambda x: test(x.f, 20, 78),
221 |             lambda x: test(x.f, 30, 195),
222 |             lambda x: test(x.f, 50, 543),
223 |             lambda x: test(x.f, 100, 2418),
224 |         ],
225 |     ),
226 |     PythonGeneratorPrompt(
227 |         lc_template,
228 |         key="is_leap_year",
229 |         user_input="write a function that checks whether a given year is a leap year",
230 |         evaluators=[
231 |             lambda x: test(x.f, 2000, True),
232 |             lambda x: test(x.f, 1900, False),
233 |             lambda x: test(x.f, 2020, True),
234 |             lambda x: test(x.f, 2021, False),
235 |             lambda x: test(x.f, 2400, True),
236 |         ],
237 |     ),
238 |     PythonGeneratorPrompt(
239 |         lc_template,
240 |         key="longest_substring_without_repeating_chars",
241 |         user_input=(
242 |             "write a function that finds the longest substring of a ",
243 |             "given string without repeating characters",
244 |         ),
245 |         evaluators=[
246 |             lambda x: test(x.f, "abcabcbb", "abc"),
247 |             lambda x: test(x.f, "bbbbbb", "b"),
248 |             lambda x: test(x.f, "pwwkew", "wke"),
249 |             lambda x: test(x.f, "abcdefgh", "abcdefgh"),
250 |             lambda x: test(x.f, "abcbdacf", "bdacf"),
251 |         ],
252 |     ),
253 |     PythonGeneratorPrompt(
254 |         lc_template,
255 |         key="longest_common_prefix",
256 |         user_input="write a function that finds the longest common prefix of a list of strings",
257 |         evaluators=[
258 |             lambda x: test(x.f, ["flower", "flow", "flight"], "fl"),
259 |             lambda x: test(x.f, ["dog", "racecar", "car"], ""),
260 |             lambda x: test(x.f, ["interspecies", "interstellar", "interstate"], "inter"),
261 |             lambda x: test(x.f, ["prefix", "suffix", "infix"], ""),
262 |             lambda x: test(x.f, ["geeksforgeeks", "geeks", "geek"], "geek"),
263 |         ],
264 |     ),
265 |     PythonGeneratorPrompt(
266 |         lc_template,
267 |         key="sum_of_digits",
268 |         user_input="write a function that calculates the sum of the digits of a given number",
269 |         evaluators=[
270 |             lambda x: test(x.f, 123, 6),
271 |             lambda x: test(x.f, 456, 15),
272 |             lambda x: test(x.f, 789, 24),
273 |             lambda x: test(x.f, 1001, 2),
274 |             lambda x: test(x.f, 54321, 15),
275 |         ],
276 |     ),
277 |     PythonGeneratorPrompt(
278 |         lc_template,
279 |         key="decimal_to_binary",
280 |         user_input=(
281 |             "write a function that converts a given decimal number to " "its binary representation"
282 |         ),
283 |         evaluators=[
284 |             lambda x: test(x.f, 2, "10"),
285 |             lambda x: test(x.f, 7, "111"),
286 |             lambda x: test(x.f, 10, "1010"),
287 |             lambda x: test(x.f, 16, "10000"),
288 |             lambda x: test(x.f, 31, "11111"),
289 |         ],
290 |     ),
291 | ]
292 | 


--------------------------------------------------------------------------------
/examples/readme_examples.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Some basic examples for promptimize.
 3 | 
 4 | to run, simply execute `p9e ./examples/readme_examples.py`
 5 | """
 6 | # Brining some "prompt generator" classes
 7 | from promptimize.prompt_cases import PromptCase, TemplatedPromptCase
 8 | 
 9 | # Bringing some useful eval function that help evaluating and scoring responses
10 | # eval functions have a handle on the prompt object and are expected
11 | # to return a score between 0 and 1
12 | from promptimize import evals
13 | 
14 | # Promptimize will scan the target folder and find all Prompt objects
15 | # and derivatives that are in the python modules
16 | simple_prompts = [
17 |     # Prompting "hello there" and making sure there's "hi" or "hello"
18 |     # somewhere in the answer
19 |     PromptCase("hello there!", lambda x: evals.any_word(x.response, ["hi", "hello"])),
20 |     # Making sure 3 specific guitar players are in the top 10
21 |     # the score here is a percentage of the words found
22 |     PromptCase(
23 |         "who are the top 10 best guitar players of all time?",
24 |         lambda x: evals.percentage_of_words(
25 |             x.response, ["frank zappa", "david gilmore", "carlos santana"]
26 |         ),
27 |     ),
28 |     # GPT-ing myself and making sure the response mentions Superset and Airflow
29 |     PromptCase(
30 |         "who is Maxime Beauchemin, (the data guy...)?",
31 |         lambda x: evals.percentage_of_words(
32 |             x.response, ["superset", "airflow"], case_sensitive=False
33 |         ),
34 |     ),
35 | ]
36 | 
37 | # deriving TemplatedPromptCase to generate prompts that ask GPT to generate SQL
38 | # based on table schemas. The point here is you can derive the `Prompt`
39 | # class to create more specialized Prompt generators
40 | # For instance, the SqlPropt class defined bellow could be extended to fetch
41 | # schema definitions dynamically, acutally run the SQL, and allow
42 | # doing evals against the resultset.
43 | 
44 | 
45 | class SqlPrompt(TemplatedPromptCase):
46 |     # the TemplatedPromptCase expects a dict of defaults that can be overriden in the constructor
47 |     template_defaults = {"dialect": "Postgres"}
48 |     # The actual Jinja2 template
49 |     template = """\
50 |     given these SQL table schemas:
51 |         CREATE TABLE world_population (
52 |             country_name STRING,
53 |             year DATE,
54 |             population_total INT,
55 |         );
56 | 
57 |     So, can you write a SQL query for {{ dialect }} that answers this user prompt:
58 |     {{ input }}
59 |     """
60 | 
61 | 
62 | # Generating a few SQL prompts
63 | sql_prompts = [
64 |     SqlPrompt(
65 |         # you can pass a unique key that can we used to reference a prompt
66 |         key="sql-top-10-inc",
67 |         # the user input that'll be added in place of {{ input }} in the template above
68 |         user_input=(
69 |             "give me the top 10 countries with the highest net increase "
70 |             "of population over the past 25 years?"
71 |         ),
72 |         # the dialect template parameter, overriding the default set above
73 |         dialect="BigQuery",
74 |         # a simple validation function making sure the SQL starts with SELECT
75 |         evaluators=lambda x: 1 if x.response.strip().startswith("SELECT") else 0,
76 |     ),
77 | ]
78 | 


--------------------------------------------------------------------------------
/examples/readme_hello_world.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Some basic examples for promptimize.
 3 | 
 4 | to run, simply execute `p9e ./examples/readme_examples.py`
 5 | """
 6 | # Brining some "prompt generator" classes
 7 | from promptimize.prompt_cases import PromptCase
 8 | 
 9 | # Bringing some useful eval function that help evaluating and scoring responses
10 | # eval functions have a handle on the prompt object and are expected
11 | # to return a score between 0 and 1
12 | from promptimize import evals
13 | 
14 | # Promptimize will scan the target folder and find all Prompt objects
15 | # and derivatives that are in the python modules
16 | simple_prompts = [
17 |     # Prompting "hello there" and making sure there's "hi" or "hello"
18 |     # somewhere in the answer
19 |     PromptCase("hello there!", lambda x: evals.any_word(x.response, ["hi", "hello"])),
20 |     PromptCase(
21 |         "name the top 10 guitar players!",
22 |         lambda x: evals.all_words(x.response, ["zappa", "hendrix"]),
23 |         weight=2,
24 |     ),
25 | ]
26 | 


--------------------------------------------------------------------------------
/promptimize/__init__.py:
--------------------------------------------------------------------------------
1 | from promptimize.cli import cli  # noqa
2 | 


--------------------------------------------------------------------------------
/promptimize/cli.py:
--------------------------------------------------------------------------------
  1 | import click
  2 | 
  3 | from promptimize.crawler import discover_objects
  4 | from promptimize.prompt_cases import BasePromptCase
  5 | from promptimize.reports import Report
  6 | from promptimize.suite import Suite
  7 | 
  8 | 
  9 | @click.group(help="💡¡promptimize!💡 CLI. `p9e` works too!")
 10 | def cli():
 11 |     pass
 12 | 
 13 | 
 14 | @click.command(help="run some prompts")
 15 | @click.argument(
 16 |     "path",
 17 |     required=True,
 18 |     type=click.Path(exists=True),
 19 | )
 20 | @click.option("--verbose", "-v", is_flag=True, help="Trigger more verbose output")
 21 | @click.option("--force", "-f", is_flag=True, help="Force run, do not skip")
 22 | @click.option(
 23 |     "--human",
 24 |     "-h",
 25 |     is_flag=True,
 26 |     help="Human review, allowing a human to review and force pass/fail each prompt case",
 27 | )
 28 | @click.option("--repair", "-r", is_flag=True, help="Only re-run previously failed")
 29 | @click.option("--dry-run", "-x", is_flag=True, help="DRY run, don't call the API")
 30 | @click.option("--shuffle", is_flag=True, help="Shuffle the prompts in a random order")
 31 | @click.option(
 32 |     "--style",
 33 |     "-s",
 34 |     type=click.Choice(["json", "yaml"], case_sensitive=False),
 35 |     default="yaml",
 36 |     help="json or yaml formatting",
 37 | )
 38 | @click.option(
 39 |     "--max-tokens",
 40 |     "-m",
 41 |     type=click.INT,
 42 |     default=1000,
 43 |     help="max_tokens passed to the model",
 44 | )
 45 | @click.option(
 46 |     "--limit",
 47 |     "-l",
 48 |     type=click.INT,
 49 |     default=0,
 50 |     help="limit how many prompt cases to run in a single batch",
 51 | )
 52 | @click.option(
 53 |     "--temperature",
 54 |     "-t",
 55 |     type=click.FLOAT,
 56 |     default=0.5,
 57 |     help="the temperature passed to the model",
 58 | )
 59 | @click.option(
 60 |     "--engine",
 61 |     "-e",
 62 |     type=click.STRING,
 63 |     default="text-davinci-003",
 64 |     help="model as accepted by the openai API",
 65 | )
 66 | @click.option("--key", "-k", multiple=True, help="The keys to run")
 67 | @click.option(
 68 |     "--output",
 69 |     "-o",
 70 |     type=click.Path(),
 71 | )
 72 | @click.option("--silent", "-s", is_flag=True)
 73 | def run(
 74 |     path,
 75 |     verbose,
 76 |     force,
 77 |     dry_run,
 78 |     style,
 79 |     temperature,
 80 |     max_tokens,
 81 |     engine,
 82 |     key,
 83 |     output,
 84 |     silent,
 85 |     repair,
 86 |     human,
 87 |     shuffle,
 88 |     limit,
 89 | ):
 90 |     """Run some prompts/suites!"""
 91 |     click.secho("💡 ¡promptimize! 💡", fg="cyan")
 92 |     if dry_run:
 93 |         click.secho("# DRY RUN MODE ACTIVATED!", fg="red")
 94 |     uses_cases = discover_objects(path, BasePromptCase)
 95 |     completion_create_kwargs = {
 96 |         "engine": engine,
 97 |         "max_tokens": max_tokens,
 98 |         "temperature": temperature,
 99 |     }
100 | 
101 |     report = None
102 |     if output:
103 |         report = Report.from_path(output)
104 | 
105 |     suite = Suite(uses_cases, completion_create_kwargs)
106 |     suite.execute(
107 |         verbose=verbose,
108 |         style=style,
109 |         silent=silent,
110 |         report=report,
111 |         dry_run=dry_run,
112 |         keys=key,
113 |         force=force,
114 |         repair=repair,
115 |         human=human,
116 |         shuffle=shuffle,
117 |         limit=limit,
118 |     )
119 | 
120 |     if output:
121 |         output_report = Report.from_suite(suite)
122 |         if report:
123 |             output_report.merge(report)
124 |         click.secho(f"# Writing file output to {output}", fg="yellow")
125 |         output_report.write(output, style=style)
126 | 
127 | 
128 | cli.add_command(run)
129 | 
130 | 
131 | @click.command(help="report on how your suites of prompts are performing")
132 | @click.argument(
133 |     "path",
134 |     required=True,
135 |     type=click.Path(exists=True),
136 | )
137 | @click.option("--groupby", "-g", help="GROUPBY", default="category")
138 | def report(path, groupby):
139 |     """Get some summary of how your prompt suites are performing"""
140 |     click.secho(f"# Reading report @ {path}", fg="yellow")
141 |     report = Report.from_path(path)
142 |     report.print_summary(groupby)
143 | 
144 | 
145 | cli.add_command(report)
146 | 


--------------------------------------------------------------------------------
/promptimize/crawler.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | import importlib
 3 | import pkgutil
 4 | from pathlib import Path
 5 | from typing import List, Type, Any
 6 | 
 7 | 
 8 | def is_instance_or_derivative(obj: Any, object_type: Type) -> bool:
 9 |     return isinstance(obj, object_type)
10 | 
11 | 
12 | def discover_objects(path: str, object_type: Type) -> List[Any]:  # noqa
13 |     objects = []
14 |     folder_path = Path(path).resolve()
15 | 
16 |     def process_module(module):
17 |         # Iterate over the objects in the module
18 |         for name, obj in module.__dict__.items():
19 |             # Check if the object is an instance or derivative of the specified type
20 |             if is_instance_or_derivative(obj, object_type):
21 |                 objects.append(obj)
22 |             # Check if the object is a list or tuple containing instances or
23 |             # derivatives of the specified type
24 |             elif isinstance(obj, (list, tuple)):
25 |                 for item in obj:
26 |                     if is_instance_or_derivative(item, object_type):
27 |                         objects.append(item)
28 | 
29 |     # If the path points to a file, import the module and process it directly
30 |     if folder_path.is_file() and folder_path.suffix == ".py":
31 |         sys.path.insert(0, str(folder_path.parent))
32 |         module_name = folder_path.stem
33 |         module = importlib.import_module(module_name)
34 |         process_module(module)
35 | 
36 |     # If the path points to a directory, proceed with the existing logic
37 |     elif folder_path.is_dir():
38 |         # Add the folder to the Python path to enable importing modules from it
39 |         if folder_path not in sys.path:
40 |             sys.path.insert(0, str(folder_path))
41 | 
42 |         # Iterate over all the modules in the folder
43 |         for _, module_name, _ in pkgutil.iter_modules([str(folder_path)]):
44 |             # Import the module
45 |             module = importlib.import_module(module_name)
46 |             process_module(module)
47 | 
48 |     return objects
49 | 


--------------------------------------------------------------------------------
/promptimize/evals.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Eval functions that be used/reused with Prompts
  3 | 
  4 | All functions here are expected to:
  5 | * receive a response string
  6 | * [optional] receive arbitrary extra context
  7 | * return a value from 0 to 1, 0 representing failing at the task, 1 full
  8 | success, and a range in-between
  9 | """
 10 | 
 11 | from typing import List
 12 | 
 13 | 
 14 | def percentage_of_words(response: str, words: List[str], case_sensitive: bool = False) -> float:
 15 |     """
 16 |     Calculate the percentage of words from a list that are present in the given response.
 17 | 
 18 |     Args:
 19 |         response (str): The string to search for words.
 20 |         words (List[str]): A list of words to check for their presence in the response.
 21 |         case_sensitive (bool, optional): If True, the search will be case-sensitive;
 22 |                                          otherwise, it will be case-insensitive. Defaults to False.
 23 | 
 24 |     Returns:
 25 |         float: The percentage of words found in the response (0.0 to 1.0).
 26 | 
 27 |     Examples:
 28 |     >>> percentage_of_words_in_response("This is an Example string.", ["example", "test"])
 29 |     0.5
 30 |     >>> percentage_of_words_in_response(
 31 |         "This is an Example string.", ["Example"], case_sensitive=True)
 32 |     1.0
 33 |     >>> percentage_of_words_in_response(
 34 |         "This is an Example string.", ["example"], case_sensitive=True)
 35 |     0.0
 36 |     >>> percentage_of_words_in_response("This is an Example string.", ["notfound"])
 37 |     0.0
 38 |     """
 39 |     if not case_sensitive:
 40 |         response = response.lower()
 41 |         words = [w.lower() for w in words]
 42 | 
 43 |     return sum(1 if w in response else 0 for w in words) / len(words)
 44 | 
 45 | 
 46 | def _common_word_search(
 47 |     response: str, words: List[str], case_sensitive: bool, match_type: str
 48 | ) -> int:
 49 |     if not case_sensitive:
 50 |         response = response.lower()
 51 |         words = [w.lower() for w in words]
 52 | 
 53 |     if match_type == "any":
 54 |         return 1 if any(w in response for w in words) else 0
 55 |     elif match_type == "all":
 56 |         return 1 if all(w in response for w in words) else 0
 57 |     else:
 58 |         raise ValueError("Invalid match_type. Accepted values are 'any' or 'all'.")
 59 | 
 60 | 
 61 | def any_word(response: str, words: List[str], case_sensitive: bool = False) -> int:
 62 |     """
 63 |     Check if any word from a list of words is present in the given response and return 1 or 0.
 64 | 
 65 |     Args:
 66 |         response (str): The string to search for words.
 67 |         words (List[str]): A list of words to check for their presence in the response.
 68 |         case_sensitive (bool, optional): If True, the search will be case-sensitive;
 69 |                                          otherwise, it will be case-insensitive. Defaults to False.
 70 | 
 71 |     Returns:
 72 |         int: 1 if any word from the list is found in the response; otherwise, 0.
 73 | 
 74 |     Examples:
 75 |     >>> any_word("This is an Example string.", ["example", "test"])
 76 |     1
 77 |     >>> any_word("This is an Example string.", ["Example"], case_sensitive=True)
 78 |     1
 79 |     >>> any_word("This is an Example string.", ["example"], case_sensitive=True)
 80 |     0
 81 |     >>> any_word("This is an Example string.", ["notfound"])
 82 |     0
 83 |     """
 84 |     return _common_word_search(response, words, case_sensitive, match_type="any")
 85 | 
 86 | 
 87 | def all_words(response: str, words: List[str], case_sensitive: bool = False) -> int:
 88 |     """
 89 |     Check if all words from a list of words are present in the given response and return 1 or 0.
 90 | 
 91 |     Args:
 92 |         response (str): The string to search for words.
 93 |         words (List[str]): A list of words to check for their presence in the response.
 94 |         case_sensitive (bool, optional): If True, the search will be case-sensitive;
 95 |                                          otherwise, it will be case-insensitive. Defaults to False.
 96 | 
 97 |     Returns:
 98 |         int: 1 if all words from the list are found in the response; otherwise, 0.
 99 | 
100 |     Examples:
101 |     >>> all_words("This is an Example string with test.", ["example", "test"])
102 |     1
103 |     >>> all_words("This is an Example string with Test.", ["Example", "Test"], case_sensitive=True)
104 |     1
105 |     >>> all_words("This is an Example string.", ["example", "test"], case_sensitive=True)
106 |     0
107 |     >>> all_words("This is an Example string.", ["example", "notfound"])
108 |     0
109 |     """
110 |     return _common_word_search(response, words, case_sensitive, match_type="all")
111 | 
112 | 
113 | base_all = all
114 | base_any = any
115 | 
116 | 
117 | def all(iteratable):
118 |     return 1 if base_all([i == 1 for i in iteratable]) else 0
119 | 
120 | 
121 | def any(iteratable):
122 |     return 1 if base_any([i == 1 for i in iteratable]) else 0
123 | 


--------------------------------------------------------------------------------
/promptimize/prompt_cases.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | from typing import Any, Callable, List, Optional, Union
  3 | 
  4 | from langchain.llms import OpenAI
  5 | from langchain.callbacks import get_openai_callback
  6 | 
  7 | from box import Box
  8 | 
  9 | from promptimize import utils
 10 | from promptimize.simple_jinja import process_template
 11 | 
 12 | 
 13 | class BasePromptCase:
 14 |     """Abstract base prompt case"""
 15 | 
 16 |     attributes_used_for_hash = set()
 17 |     verbose_attrs = {"prompt"}
 18 | 
 19 |     def __init__(
 20 |         self,
 21 |         evaluators: Optional[Union[Callable, List[Callable]]] = None,
 22 |         key: Optional[str] = None,
 23 |         weight=1,
 24 |         category: str = None,  # used for info/reporting purposes only
 25 |         prompt_executor: Any = None,
 26 |         prompt_executor_kwargs: dict = None,
 27 |         prompt_hash=None,
 28 |         *args,
 29 |         **kwargs,
 30 |     ) -> None:
 31 |         """
 32 |         Initialize a SimplePromptCase instance.
 33 | 
 34 |         Args:
 35 |             user_input (str): Raw user_input for the prompt.
 36 |             evaluators (Optional[Union[Callable, List[Callable]]]): Optional
 37 |                 callable or list of callables used for evaluation.
 38 |             key (Optional[str]): Optional unique key for the prompt.
 39 |             weight (int, optional): Optional weight for the prompt (default: 1).
 40 |             category (Optional[str], optional): Optional category for
 41 |                 the prompt (used for info/reporting purposes only).
 42 |         """
 43 |         self.extra_args = args
 44 |         self.extra_kwargs = kwargs
 45 |         self.response = None
 46 |         self.has_run = False
 47 |         self.was_tested = False
 48 |         self.test_results = None
 49 |         self.evaluators = evaluators or []
 50 |         self.weight = weight or 1
 51 |         self.category = category
 52 |         self.pre_run_output = None
 53 |         self.post_run_output = None
 54 |         self.prompt_executor = prompt_executor or self.get_prompt_executor()
 55 |         self.prompt_executor_kwargs = prompt_executor_kwargs or {}
 56 | 
 57 |         self._prompt_hash = prompt_hash
 58 | 
 59 |         self.execution = Box()
 60 | 
 61 |         self.prompt = utils.literal_str(self.render()).strip()
 62 | 
 63 |         self.key = key or "prompt-" + self.prompt_hash
 64 | 
 65 |         if not utils.is_iterable(self.evaluators):
 66 |             self.evaluators = [self.evaluators]  # type: ignore
 67 | 
 68 |     def get_prompt_executor(self):
 69 |         model_name = os.environ.get("OPENAI_MODEL") or "text-davinci-003"
 70 |         openai_api_key = os.environ.get("OPENAI_API_KEY")
 71 |         self.prompt_executor_kwargs = {"model_name": model_name}
 72 |         return OpenAI(model_name=model_name, openai_api_key=openai_api_key)
 73 | 
 74 |     def execute_prompt(self, prompt_str):
 75 |         with get_openai_callback() as cb:
 76 |             self.response = self.prompt_executor(prompt_str)
 77 |         self.execution.openai = Box()
 78 |         oai = self.execution.openai
 79 |         oai.total_tokens = cb.total_tokens
 80 |         oai.prompt_tokens = cb.prompt_tokens
 81 |         oai.completion_tokens = cb.completion_tokens
 82 |         oai.total_cost = cb.total_cost
 83 | 
 84 |         return self.response
 85 | 
 86 |     def pre_run(self):
 87 |         pass
 88 | 
 89 |     def post_run(self):
 90 |         pass
 91 | 
 92 |     @property
 93 |     def hash(self):
 94 |         return utils.short_hash(hash(self))
 95 | 
 96 |     def __hash__(self):
 97 |         attrs = self.attributes_used_for_hash
 98 |         s = "|".join([utils.short_hash(utils.hashable_repr(getattr(self, attr))) for attr in attrs])
 99 |         return utils.int_hash(s)
100 | 
101 |     def render(self):
102 |         raise NotImplementedError()
103 | 
104 |     def get_unique_hash(self, extra_context=None):
105 |         """Returns a unique identifier, determined by the run
106 | 
107 |         Generally, the actual call sent to GPT (prompt, execution params)
108 |         represent something unique.
109 |         """
110 |         return utils.short_hash(str(self.extra_kwargs))
111 | 
112 |     def to_dict(self, verbose=False):
113 |         d = {
114 |             "key": self.key,
115 |             "prompt_hash": self.prompt_hash,
116 |             "prompt": self.prompt,
117 |             "category": self.category,
118 |             "response": self.response,
119 |             "weight": self.weight,
120 |             "execution": self.execution.to_dict(),
121 |         }
122 |         if hasattr(self, "error"):
123 |             d["error"] = self.error
124 |         return d
125 | 
126 |     def print(self, verbose=False, style="yaml"):
127 |         style = style or "yaml"
128 |         output = self.to_dict(verbose)
129 |         if not verbose:
130 |             for attr in self.verbose_attrs:
131 |                 del output[attr]
132 |         if self.weight == 1:
133 |             del output["weight"]
134 |         highlighted = utils.serialize_object(output, style)
135 |         print(highlighted)
136 | 
137 |     def test(self):
138 |         test_results = []
139 |         for evaluator in self.evaluators:
140 |             result = evaluator(self)
141 |             if not (utils.is_numeric(result) and 0 <= result <= 1):
142 |                 raise Exception("Value should be between 0 and 1")
143 |             test_results.append(result)
144 | 
145 |         if len(test_results):
146 |             self.execution.score = sum(test_results) / len(test_results)
147 |             self.execution.results = test_results
148 |         self.was_tested = True
149 | 
150 |     @property
151 |     def prompt_hash(self):
152 |         if self._prompt_hash:
153 |             return self._prompt_hash
154 |         return utils.short_hash(hash(self))
155 | 
156 |     def _run(self, dry_run):
157 |         pre_run_output = self.pre_run()
158 |         if pre_run_output:
159 |             self.execution.pre_run_output = pre_run_output
160 | 
161 |         if not dry_run:
162 |             with utils.MeasureDuration() as md:
163 |                 self.response = self.execute_prompt(self.prompt).strip()
164 | 
165 |             self.execution.api_call_duration_ms = md.duration
166 | 
167 |             post_run_output = self.post_run()
168 |             if post_run_output:
169 |                 self.execution.post_run_output = post_run_output
170 |             self.has_run = True
171 |             self.execution.run_at = utils.current_iso_timestamp()
172 |             return self.response
173 | 
174 | 
175 | class PromptCase(BasePromptCase):
176 |     """A simple prompt case"""
177 | 
178 |     attributes_used_for_hash = BasePromptCase.attributes_used_for_hash | {"user_input"}
179 | 
180 |     def __init__(
181 |         self,
182 |         user_input,
183 |         *args,
184 |         **kwargs,
185 |     ) -> None:
186 |         self.user_input = user_input
187 |         super().__init__(*args, **kwargs)
188 | 
189 |     def to_dict(self, verbose=False, *args, **kwargs):
190 |         d = super().to_dict(*args, **kwargs)
191 |         d = utils.insert_in_dict(d, "user_input", self.user_input, after_key="key")
192 |         return d
193 | 
194 |     def render(self):
195 |         return self.user_input
196 | 
197 | 
198 | class TemplatedPromptCase(BasePromptCase):
199 |     template_defaults: dict = {}
200 |     attributes_used_for_hash = BasePromptCase.attributes_used_for_hash | {
201 |         "user_input",
202 |         "extra_kwargs",
203 |     }
204 |     template = "{{ user_input }}"
205 | 
206 |     def __init__(
207 |         self,
208 |         user_input=None,
209 |         *args,
210 |         **kwargs,
211 |     ) -> None:
212 |         self.user_input = user_input
213 |         return super().__init__(*args, **kwargs)
214 | 
215 |     def to_dict(self, verbose=False, *args, **kwargs):
216 |         d = super().to_dict(*args, **kwargs)
217 |         d = utils.insert_in_dict(d, "user_input", self.user_input, after_key="key")
218 |         return d
219 | 
220 |     def get_extra_template_context(self):
221 |         """meant to be overriden in derived classes to add logic/context"""
222 |         return {}
223 | 
224 |     @property
225 |     def jinja_context(self):
226 |         context_kwargs = self.template_defaults.copy()
227 |         context_kwargs.update(self.get_extra_template_context())
228 |         context_kwargs.update(self.extra_kwargs)
229 |         context_kwargs.update({"user_input": self.user_input})
230 |         return context_kwargs
231 | 
232 |     def render(self, **kwargs):
233 |         return process_template(self.template, **self.jinja_context)
234 | 
235 | 
236 | class LangchainPromptCase(BasePromptCase):
237 |     attributes_used_for_hash = BasePromptCase.attributes_used_for_hash | {
238 |         "extra_kwargs",
239 |         "langchain_prompt",
240 |     }
241 | 
242 |     def __init__(
243 |         self,
244 |         langchain_prompt,
245 |         *args,
246 |         **kwargs,
247 |     ) -> None:
248 |         self.langchain_prompt = langchain_prompt
249 |         return super().__init__(*args, **kwargs)
250 | 
251 |     def to_dict(self, verbose=False, *args, **kwargs):
252 |         d = super().to_dict(*args, **kwargs)
253 |         d = utils.insert_in_dict(d, "prompt_kwargs", self.extra_kwargs, after_key="key")
254 |         return d
255 | 
256 |     def render(self):
257 |         return self.langchain_prompt.format(**self.extra_kwargs)
258 | 


--------------------------------------------------------------------------------
/promptimize/reports.py:
--------------------------------------------------------------------------------
  1 | import yaml
  2 | from box import Box
  3 | 
  4 | import pandas as pd
  5 | 
  6 | from promptimize import utils
  7 | 
  8 | 
  9 | class Report:
 10 |     """Report objects interacting with the filesystem / databases and data structures"""
 11 | 
 12 |     version = "0.1.0"
 13 | 
 14 |     def __init__(self, path=None, data=None):
 15 |         self.data = Box()
 16 |         if data:
 17 |             self.data = Box(data)
 18 |         self.path = path
 19 | 
 20 |     def write(self, path=None, style="yaml"):
 21 |         """write the report to the filesystem"""
 22 |         path = path or self.path
 23 |         with open(path, "w") as f:
 24 |             f.write(utils.serialize_object(self.data.to_dict(), highlighted=False, style=style))
 25 | 
 26 |     def merge(self, report):
 27 |         """merge in another report into this one"""
 28 |         all_keys = set(report.prompts.keys()) | set(self.prompts.keys())
 29 |         for k in all_keys:
 30 |             a = report.prompts.get(k)
 31 |             b = self.prompts.get(k)
 32 |             if a and b:
 33 |                 if a.execution.get("run_at", "") > b.execution.get("run_at", ""):
 34 |                     self.prompts[k] = a
 35 |                 else:
 36 |                     self.prompts[k] = b
 37 | 
 38 |             if not a:
 39 |                 self.prompts[k] = b
 40 |             elif not b:
 41 |                 self.prompts[k] = a
 42 | 
 43 |     @property
 44 |     def prompts(self):
 45 |         """list the prompts in this report"""
 46 |         if self.data:
 47 |             return self.data.prompts
 48 |         return {}
 49 | 
 50 |     @property
 51 |     def failed_keys(self):
 52 |         """return the list of prompt keys that have not suceeded"""
 53 |         keys = set()
 54 |         for p in self.prompts.values():
 55 |             if p.execution.get("score", 0) < 1:
 56 |                 keys.add(p.key)
 57 |         return keys
 58 | 
 59 |     @classmethod
 60 |     def from_path(cls, path):
 61 |         """load a report object from a path in the filesystem"""
 62 |         try:
 63 |             with open(path, "r") as f:
 64 |                 report = cls(path, yaml.safe_load(f))
 65 |             return report
 66 |         except FileNotFoundError:
 67 |             return None
 68 | 
 69 |     @classmethod
 70 |     def from_suite(cls, suite):
 71 |         """load a report object from a suite instance"""
 72 |         report = cls(data=suite.to_dict())
 73 |         return report
 74 | 
 75 |     def get_prompt(self, prompt_key):
 76 |         """get a specific prompt data structure from the report"""
 77 |         return self.prompts.get(prompt_key)
 78 | 
 79 |     def prompt_df(self):
 80 |         """make a flat pandas dataframe out of the prompts in the reports"""
 81 |         prompts = [p for p in self.prompts.values() if p.execution]
 82 |         return pd.json_normalize(prompts)
 83 | 
 84 |     def print_summary(self, groupby="category"):
 85 |         """print the summary from the report"""
 86 |         if groupby:
 87 |             self.print_summary(groupby=None)
 88 | 
 89 |         df = self.prompt_df()
 90 | 
 91 |         df["score"] = df["weight"] * df["execution.score"]
 92 | 
 93 |         if groupby:
 94 |             df = df[[groupby, "weight", "score"]].groupby(groupby).sum()
 95 |         else:
 96 |             df = df.agg({"weight": "sum", "score": "sum"}).to_frame().T
 97 |         df["perc"] = (df["score"] / df["weight"]) * 100
 98 |         df = df.sort_values(by="weight", ascending=False)
 99 |         headers = []
100 |         if groupby:
101 |             headers = "keys"
102 |         else:
103 |             df = df.T
104 |         print(utils.trabulate(df, headers=headers))
105 | 


--------------------------------------------------------------------------------
/promptimize/simple_jinja.py:
--------------------------------------------------------------------------------
1 | import jinja2
2 | 
3 | environment = jinja2.Environment()
4 | 
5 | 
6 | def process_template(template_as_string, **kwargs):
7 |     template = environment.from_string(template_as_string)
8 |     return template.render(**kwargs)
9 | 


--------------------------------------------------------------------------------
/promptimize/suite.py:
--------------------------------------------------------------------------------
  1 | """
  2 | This module provides a Suite class to manage and execute a collection of
  3 | use cases (prompts) to be tested. It allows running the tests, displaying
  4 | results, and serializing the summary of the suite.
  5 | """
  6 | import random
  7 | from typing import Any, Dict, List, Optional, Union
  8 | 
  9 | import click
 10 | 
 11 | from promptimize import utils
 12 | from promptimize.prompt_cases import BasePromptCase
 13 | 
 14 | 
 15 | def separator(fg=None) -> None:
 16 |     """Print a separator line."""
 17 |     click.secho("# " + "-" * 40, fg=fg)
 18 | 
 19 | 
 20 | def separated_section(s, fg=None):
 21 |     separator(fg)
 22 |     click.secho(s, fg=fg)
 23 |     separator(fg)
 24 | 
 25 | 
 26 | class Suite:
 27 |     """A collection of use cases to be tested.
 28 | 
 29 |     Attributes:
 30 |         name (Optional[str]): The name of the suite.
 31 |         prompts (Dict[str, Prompt]): Dictionary of prompts to be tested,
 32 |             keyed by the prompt key.
 33 |         last_run_completion_create_kwargs (Dict[str, Any]): Keyword arguments
 34 |             used in the last run for completion creation.
 35 |         efective_prompts (List): List of prompts values that finally will be tested.
 36 |     """
 37 | 
 38 |     def __init__(
 39 |         self,
 40 |         prompts: List["BasePromptCase"],
 41 |         name: Optional[str] = None,
 42 |     ) -> None:
 43 |         """
 44 |         Args:
 45 |             prompts (List[Prompt]): List of prompts to be tested.
 46 |             name (Optional[str]): The name of the suite. Defaults to None.
 47 |         """
 48 |         self.name = name
 49 |         self.prompts = {o.key: o for o in prompts}
 50 |         self.last_run_completion_create_kwargs: dict = {}
 51 |         self.effective_prompts = list(self.prompts.values())
 52 | 
 53 |     def execute(  # noqa
 54 |         self,
 55 |         verbose: bool = False,
 56 |         style: str = "yaml",
 57 |         silent: bool = False,
 58 |         report=None,
 59 |         dry_run: bool = False,
 60 |         keys: list = None,
 61 |         force: bool = False,
 62 |         repair: bool = False,
 63 |         human: bool = False,
 64 |         shuffle: bool = False,
 65 |         limit: int = 0,
 66 |     ) -> None:
 67 |         """
 68 |         Execute the suite with the given settings.
 69 | 
 70 |         Args:
 71 |             verbose (bool): If True, print verbose output. Defaults to False.
 72 |             style (str): Output style for serialization. Defaults to "yaml".
 73 |             silent (bool): If True, suppress output. Defaults to False.
 74 |         """
 75 |         self.reload_effective_prompts(
 76 |             report=report,
 77 |             keys=keys,
 78 |             repair=repair,
 79 |             shuffle=shuffle,
 80 |             limit=limit,
 81 |         )
 82 |         prompts = self.effective_prompts
 83 | 
 84 |         for i, prompt in enumerate(prompts):
 85 |             should_run = force or self.should_prompt_execute(prompt, report)
 86 |             progress = f"({i+1}/{len(prompts)})"
 87 |             if not silent:
 88 |                 if should_run:
 89 |                     separated_section(f"# {progress} [RUN] prompt: {prompt.key}", fg="cyan")
 90 |                 else:
 91 |                     separated_section(f"# {progress} [SKIP] prompt: {prompt.key}", fg="yellow")
 92 | 
 93 |             if should_run:
 94 |                 prompt._run(dry_run)
 95 |                 if not dry_run:
 96 |                     prompt.test()
 97 | 
 98 |             if not silent and should_run:
 99 |                 prompt.print(verbose=verbose, style=style)
100 | 
101 |             if should_run and human:
102 |                 v = click.prompt(
103 |                     'Press Enter to continue, "Y" to force success, "N" to force fail, "X" to exit',
104 |                     default="",
105 |                     show_default=False,
106 |                 )
107 |                 v = v.lower()
108 |                 if v == "":
109 |                     click.secho("Leaving result unaltered", fg="yellow")
110 |                 elif v == "y":
111 |                     prompt.execution.score = 1
112 |                     prompt.execution.human_override = True
113 |                     click.secho("Forcing SUCCESS", fg="green")
114 |                 elif v == "n":
115 |                     prompt.execution.score = 0
116 |                     prompt.execution.human_override = True
117 |                     click.secho("Forcing FAILURE", fg="red")
118 |                 elif v == "x":
119 |                     break
120 | 
121 |         # `self.last_run_completion_create_kwargs = completion_create_kwargs
122 |         if not silent:
123 |             separated_section("# Suite summary", fg="cyan")
124 |             click.echo(utils.serialize_object(self._serialize_run_summary(), style))
125 | 
126 |     def reload_effective_prompts(
127 |         self,
128 |         report=None,
129 |         keys: list = None,
130 |         repair: bool = False,
131 |         shuffle: bool = False,
132 |         limit: int = 0,
133 |     ):
134 |         self.effective_prompts = list(self.prompts.values())
135 |         if keys:
136 |             self.effective_prompts = [p for p in self.effective_prompts if p.key in keys]
137 |         if repair and report:
138 |             failed_keys = report.failed_keys
139 |             self.effective_prompts = [p for p in self.effective_prompts if p.key in failed_keys]
140 | 
141 |         if shuffle:
142 |             random.shuffle(self.effective_prompts)
143 | 
144 |         if limit:
145 |             self.effective_prompts = self.effective_prompts[:limit]
146 | 
147 |     def should_prompt_execute(self, prompt, report):
148 |         if not report or not report.prompts:
149 |             return True
150 |         report_prompt = report.prompts.get(prompt.key)
151 |         if not report_prompt:
152 |             return True
153 |         else:
154 |             if not report_prompt.execution:
155 |                 return True
156 | 
157 |         if report_prompt.prompt_hash == prompt.prompt_hash:
158 |             return False
159 | 
160 |         return True
161 | 
162 |     def _serialize_run_summary(
163 |         self, verbose: bool = False
164 |     ) -> Dict[str, Union[Optional[float], Dict[str, Any]]]:
165 |         """
166 |         Serialize the run summary of the suite.
167 | 
168 |         Args:
169 |             verbose (bool): If True, include verbose output. Defaults to False.
170 | 
171 |         Returns:
172 |             Dict[str, Union[Optional[float], Dict[str, Any]]]: Serialized run summary of the suite.
173 |         """
174 |         prompts = self.prompts.values()
175 |         tested = [p for p in prompts if p.was_tested and p.execution.score is not None]
176 |         suite_score = None
177 |         if len(tested) > 0:
178 |             total_weight = sum([p.weight for p in tested])
179 |             suite_score = sum([p.execution.score * p.weight for p in tested]) / total_weight
180 |         d = {
181 |             "suite_score": suite_score,
182 |             "git_info": utils.get_git_info(),
183 |         }
184 | 
185 |         return d
186 | 
187 |     def to_dict(self) -> Dict[str, Any]:
188 |         """
189 |         Convert the suite to a dictionary.
190 | 
191 |         Returns:
192 |             Dict[str, Any]: Dictionary representation of the suite.
193 |         """
194 |         return {
195 |             "name": self.name,
196 |             "prompts": {p.key: p.to_dict() for p in self.prompts.values()},
197 |             "run_summary": self._serialize_run_summary(),
198 |         }
199 | 


--------------------------------------------------------------------------------
/promptimize/utils.py:
--------------------------------------------------------------------------------
  1 | import json
  2 | from typing import Any, Dict, List, Optional
  3 | import subprocess
  4 | import hashlib
  5 | from datetime import datetime
  6 | import re
  7 | import time
  8 | from pygments import highlight
  9 | from pygments.lexers import YamlLexer, JsonLexer
 10 | from pygments.formatters import TerminalFormatter
 11 | from tabulate import tabulate
 12 | import yaml
 13 | from yaml.representer import SafeRepresenter
 14 | 
 15 | 
 16 | def is_numeric(value):
 17 |     """that'd be nice if we had this in the std lib..."""
 18 |     return isinstance(value, (int, float, complex))
 19 | 
 20 | 
 21 | def is_iterable(obj):
 22 |     """that'd be nice if we had this in the std lib..."""
 23 |     try:
 24 |         iter(obj)
 25 |         return True
 26 |     except TypeError:
 27 |         return False
 28 | 
 29 | 
 30 | # Yaml-related section, hacking out of some issues
 31 | 
 32 | 
 33 | class folded_str(str):
 34 |     pass
 35 | 
 36 | 
 37 | class literal_str(str):
 38 |     pass
 39 | 
 40 | 
 41 | def change_style(style, representer):
 42 |     def new_representer(dumper, data):
 43 |         scalar = representer(dumper, data)
 44 |         scalar.style = style
 45 |         return scalar
 46 | 
 47 |     return new_representer
 48 | 
 49 | 
 50 | # represent_str does handle some corner cases, so use that
 51 | # instead of calling represent_scalar directly
 52 | represent_folded_str = change_style(">", SafeRepresenter.represent_str)
 53 | represent_literal_str = change_style("|", SafeRepresenter.represent_str)
 54 | 
 55 | 
 56 | def str_presenter(dumper, data):
 57 |     """
 58 |     Some hack to get yaml output to use look good for multiline,
 59 |     which is common in this package
 60 | 
 61 |     from: https://shorturl.at/klvC1
 62 |     """
 63 |     if len(data.splitlines()) > 1 or "\n" in data or "\\n" in data:
 64 |         text_list = [line.rstrip() for line in data.splitlines()]
 65 |         fixed_data = "\n".join(text_list)
 66 |         return dumper.represent_scalar("tag:yaml.org,2002:str", fixed_data, style="|")
 67 |     return dumper.represent_scalar("tag:yaml.org,2002:str", data)
 68 | 
 69 | 
 70 | yaml.add_representer(folded_str, represent_folded_str)
 71 | yaml.add_representer(literal_str, represent_literal_str)
 72 | yaml.add_representer(str, str_presenter)
 73 | 
 74 | 
 75 | def extract_json_objects(text: str, get_first: bool = True) -> List[Dict[str, Any]]:
 76 |     """
 77 |     Extract JSON objects from a given string by looking for matching curly brackets.
 78 | 
 79 |     Args:
 80 |         text (str): The input string containing JSON objects.
 81 | 
 82 |     Returns:
 83 |         List[Dict[str, Any]]: A list of JSON objects found in the input string.
 84 | 
 85 |     Example:
 86 | 
 87 |     >>> extract_json_objects('Some text: {"key1": "value1", "key2": "value2"} and more text.')
 88 |     [{'key1': 'value1', 'key2': 'value2'}]
 89 | 
 90 |     >>> extract_json_objects('No JSON objects in this text.')
 91 |     []
 92 | 
 93 |     >>> extract_json_objects('{"a": 1, "b": 2} and {"c": 3, "d": 4}')
 94 |     [{'a': 1, 'b': 2}, {'c': 3, 'd': 4}]
 95 |     """
 96 |     json_objects = []
 97 |     # man that regex is something... generated by GPT4...
 98 |     regex = r"(\{(?:[^{}]|(?:\{(?:[^{}]|(?:\{(?:[^{}]|(?:\{[^{}]*\}))*\}))*\}))*\})"
 99 |     for match in re.finditer(regex, text):
100 |         try:
101 |             json_object = json.loads(match.group())
102 |             json_objects.append(json_object)
103 |         except json.JSONDecodeError:
104 |             pass
105 |     return json_objects
106 | 
107 | 
108 | def int_hash(text):
109 |     # Create a SHA-256 hash of the input string
110 |     hash_object = hashlib.sha256(str(text).encode())
111 | 
112 |     # Convert the hash to a hexadecimal string
113 |     hex_hash = hash_object.hexdigest()
114 |     return int(hex_hash, 16)
115 | 
116 | 
117 | def short_hash(obj, length=8):
118 |     hash_object = hashlib.sha256(str(obj).encode())
119 |     hex_hash = hash_object.hexdigest()
120 |     return hex_hash[:length]
121 | 
122 | 
123 | def to_yaml(data, highlighted=True):
124 |     data = yaml.dump(data, sort_keys=False)
125 |     if highlighted:
126 |         data = highlight(data, YamlLexer(), TerminalFormatter())
127 |     return data
128 | 
129 | 
130 | def to_json(data, highlighted=True):
131 |     data = json.dumps(data, indent=2)
132 |     highlighted = highlight(data, JsonLexer(), TerminalFormatter())
133 |     return highlighted
134 | 
135 | 
136 | def serialize_object(data, style="yaml", highlighted=True):
137 |     if style == "yaml":
138 |         return to_yaml(data, highlighted)
139 |     return to_json(data, highlighted)
140 | 
141 | 
142 | def transform_strings(obj, transformation):
143 |     """
144 |     Recursively iterates through nested iterables (lists and tuples) and dictionaries,
145 |     applying a given transformation function to all strings found.
146 | 
147 |     :param obj: The input object, which can be a string, dictionary, list, or tuple.
148 |                 Other types will be returned unchanged.
149 |     :param transformation: A function that takes a single string argument and returns
150 |                            a transformed string.
151 |     :return: A new object with the same structure as the input object, but with all
152 |              strings transformed by the given transformation function.
153 |     """
154 |     if isinstance(obj, str):
155 |         return transformation(obj)
156 |     elif isinstance(obj, dict):
157 |         return {key: transform_strings(value, transformation) for key, value in obj.items()}
158 |     elif isinstance(obj, list):
159 |         return [transform_strings(item, transformation) for item in obj]
160 |     elif isinstance(obj, tuple):
161 |         return tuple(transform_strings(item, transformation) for item in obj)
162 |     else:
163 |         return obj
164 | 
165 | 
166 | def get_git_info(sha_length: int = 12):
167 |     try:
168 |         sha = subprocess.check_output(["git", "rev-parse", "HEAD"]).decode("utf-8").strip()
169 |         if sha_length:
170 |             sha = sha[:sha_length]
171 |         branch = (
172 |             subprocess.check_output(["git", "rev-parse", "--abbrev-ref", "HEAD"])
173 |             .decode("utf-8")
174 |             .strip()
175 |         )
176 |         dirty = subprocess.call(["git", "diff-index", "--quiet", "HEAD"]) != 0
177 | 
178 |         return {"sha": sha, "branch": branch, "dirty": dirty}
179 |     except subprocess.CalledProcessError:
180 |         return None
181 | 
182 | 
183 | class MeasureDuration:
184 |     def __init__(self):
185 |         self.duration = None
186 | 
187 |     def __enter__(self):
188 |         self.start_time = time.time()
189 |         return self
190 | 
191 |     def __exit__(self, exc_type, exc_val, exc_tb):
192 |         end_time = time.time()
193 |         self.duration = (end_time - self.start_time) * 1000
194 | 
195 | 
196 | def insert_in_dict(
197 |     dictionary: Dict[Any, Any],
198 |     key: Any,
199 |     value: Any,
200 |     position: Optional[int] = None,
201 |     before_key: Optional[Any] = None,
202 |     after_key: Optional[Any] = None,
203 | ) -> Dict[Any, Any]:
204 |     """
205 |     Insert a key/value pair in a dictionary at a specific position, before a
206 |         specified key, or after a specified key.
207 | 
208 |     Args:
209 |         dictionary (Dict[Any, Any]): The original dictionary.
210 |         key (Any): The key to be inserted.
211 |         value (Any): The value associated with the key.
212 |         position (Optional[int], optional): The position at which the key/value pair
213 |             should be inserted. Defaults to None.
214 |         before_key (Optional[Any], optional): The key before which the new
215 |             key/value pair should be inserted. Defaults to None.
216 |         after_key (Optional[Any], optional): The key after which the new
217 |             key/value pair should be inserted. Defaults to None.
218 | 
219 |     Raises:
220 |         ValueError: If more than one of 'position', 'before_key', or 'after_key' is specified.
221 |         ValueError: If the specified position is out of range.
222 |         KeyError: If 'before_key' or 'after_key' is not found in the dictionary.
223 | 
224 |     Returns:
225 |         Dict[Any, Any]: A new dictionary with the inserted key/value pair.
226 |     """
227 |     if sum([bool(position is not None), bool(before_key), bool(after_key)]) > 1:
228 |         raise ValueError("Only one of 'position', 'before_key', or 'after_key' can be specified")
229 | 
230 |     if position is not None and (position > len(dictionary) or position < 0):
231 |         raise ValueError("Position is out of range")
232 | 
233 |     if before_key is not None and before_key not in dictionary:
234 |         raise KeyError(f"'before_key': {before_key} not found in the dictionary")
235 | 
236 |     if after_key is not None and after_key not in dictionary:
237 |         raise KeyError(f"'after_key': {after_key} not found in the dictionary")
238 | 
239 |     new_dict = {}
240 |     inserted = False
241 | 
242 |     for index, (dict_key, dict_value) in enumerate(dictionary.items()):
243 |         if position == index or dict_key == before_key:
244 |             new_dict[key] = value
245 |             inserted = True
246 |         elif after_key == dict_key:
247 |             new_dict[dict_key] = dict_value
248 |             new_dict[key] = value
249 |             inserted = True
250 |             continue
251 | 
252 |         new_dict[dict_key] = dict_value
253 | 
254 |     if not inserted or position == len(dictionary):
255 |         new_dict[key] = value
256 | 
257 |     return new_dict
258 | 
259 | 
260 | def current_iso_timestamp():
261 |     now = datetime.utcnow()
262 |     return now.isoformat()
263 | 
264 | 
265 | def hashable_repr(obj):
266 |     if isinstance(obj, (list, tuple, set)):
267 |         return "".join(hashable_repr(item) for item in obj)
268 |     elif isinstance(obj, dict):
269 |         return "".join(
270 |             hashable_repr(key) + hashable_repr(value) for key, value in sorted(obj.items())
271 |         )
272 |     elif callable(obj):
273 |         return str(obj.__code__.co_code)
274 |     else:
275 |         return str(obj)
276 | 
277 | 
278 | def trabulate(df, showindex=True, headers="keys"):
279 |     headers = headers if headers else []
280 |     for column in df.columns:
281 |         if df[column].dtype == "int64":
282 |             df[column] = df[column].astype(str)
283 |     return tabulate(df, headers=headers, showindex=showindex, tablefmt="psql", floatfmt=".2f")
284 | 


--------------------------------------------------------------------------------
/publish.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # FIRST! manually update the setup.py with the version number
 4 | # export PROMPTIMIZE_VERSION=0.2.3
 5 | 
 6 | git checkout main
 7 | git commit -a -m "Version $PROMPTIMIZE_VERSION"
 8 | git tag $PROMPTIMIZE_VERSION
 9 | git push origin main $PROMPTIMIZE_VERSION
10 | python setup.py sdist bdist_wheel
11 | twine upload dist/promptimize-$PROMPTIMIZE_VERSION*
12 | 


--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
1 | [tool.black]
2 | line-length = 100
3 | 


--------------------------------------------------------------------------------
/requirements-dev.txt:
--------------------------------------------------------------------------------
 1 | flake8
 2 | pre-commit
 3 | recommonmark
 4 | setuptools
 5 | sphinx
 6 | sphinx-click
 7 | sphinxcontrib-napoleon
 8 | twine
 9 | wheel
10 | 


--------------------------------------------------------------------------------
/requirements-examples.txt:
--------------------------------------------------------------------------------
1 | RestrictedPython
2 | demjson
3 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | jinja2
 2 | python-box
 3 | black
 4 | click
 5 | pygments
 6 | pyyaml
 7 | langchain
 8 | tabulate
 9 | pandas
10 | openai
11 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | from setuptools import setup, find_packages
 2 | 
 3 | # Read the contents of the requirements.txt file
 4 | with open("requirements.txt") as f:
 5 |     requirements = f.read().splitlines()
 6 | 
 7 | # Read the contents of the README.md file
 8 | with open("README.md", "r", encoding="utf-8") as f:
 9 |     readme = f.read()
10 | 
11 | setup(
12 |     name="promptimize",
13 |     version="0.2.3",
14 |     packages=find_packages(),
15 |     include_package_data=True,
16 |     install_requires=requirements,
17 |     entry_points={
18 |         "console_scripts": [
19 |             "promptimize=promptimize:cli",
20 |             "p9e=promptimize:cli",
21 |         ],
22 |     },
23 |     author="Maxime Beauchemin",
24 |     author_email="maximebeauchemin@gmail.com",
25 |     description="A python toolkit to generate and evaluate prompts for GPT at scale",
26 |     long_description=readme,
27 |     long_description_content_type="text/markdown",
28 |     license="Apache License, Version 2.0",
29 |     license_file="LICENSE",
30 | )
31 | 


--------------------------------------------------------------------------------