├── .gitignore ├── CODE_OF_CONDUCT.md ├── LICENSE ├── Makefile ├── README.md ├── RESPONSIBLE_AI.md ├── SECURITY.md ├── demos ├── aquarat │ ├── .env │ ├── configs │ │ ├── prompt_library.yaml │ │ ├── promptopt_config.yaml │ │ └── setup_config.yaml │ └── demo.ipynb ├── bbh │ ├── .env │ ├── configs │ │ ├── prompt_library.yaml │ │ ├── promptopt_config.yaml │ │ └── setup_config.yaml │ ├── demo.ipynb │ └── description.py ├── gsm8k │ ├── .env │ ├── configs │ │ ├── prompt_library.yaml │ │ ├── promptopt_config.yaml │ │ └── setup_config.yaml │ └── demo.ipynb ├── scenarios │ ├── .env │ ├── configs │ │ ├── prompt_library.yaml │ │ ├── promptopt_config.yaml │ │ └── setup_config.yaml │ └── dataset_scenarios_demo.ipynb └── svamp │ ├── .env │ ├── configs │ ├── prompt_library.yaml │ ├── promptopt_config.yaml │ └── setup_config.yaml │ └── demo.ipynb ├── docs ├── images │ ├── arithmetic_task.png │ ├── bigbench.png │ ├── comaprision.png │ ├── cost_analysis.png │ ├── curve.png │ ├── github.png │ ├── icl_results.png │ ├── iterative_flowchart-1.png │ ├── msr_blog.png │ ├── overview.png │ ├── ppc.png │ ├── ppc_1.png │ ├── prompting.png │ ├── sequential_flowchart-1.png │ └── slm_prompt.png ├── index.html └── static │ ├── css │ ├── bulma-carousel.min.css │ ├── bulma-slider.min.css │ ├── bulma.css.map.txt │ ├── bulma.min.css │ ├── fontawesome.all.min.css │ └── index.css │ └── js │ ├── bulma-carousel.js │ ├── bulma-carousel.min.js │ ├── bulma-slider.js │ ├── bulma-slider.min.js │ ├── fontawesome.all.min.js │ └── index.js ├── images ├── curve.png ├── github.png ├── iterative_flowchart-1.png ├── msr_blog.png ├── overview.png └── sequential_flowchart-1.png ├── promptwizard ├── __init__.py ├── glue │ ├── common │ │ ├── __init__.py │ │ ├── base_classes.py │ │ ├── constants │ │ │ ├── __init__.py │ │ │ ├── error_strings.py │ │ │ ├── log_strings.py │ │ │ └── str_literals.py │ │ ├── exceptions.py │ │ ├── llm │ │ │ ├── __init__.py │ │ │ ├── custom_llm.py │ │ │ ├── llm_helper.py │ │ │ ├── llm_mgr.py │ │ │ └── promptMessage.py │ │ └── utils │ │ │ ├── __init__.py │ │ │ ├── download.py │ │ │ ├── file.py │ │ │ ├── logging.py │ │ │ └── runtime_tasks.py │ ├── paramlogger │ │ ├── __init__.py │ │ ├── constants.py │ │ ├── file_utils.py │ │ └── utils.py │ └── promptopt │ │ ├── __init__.py │ │ ├── constants.py │ │ ├── instantiate.py │ │ ├── runner.py │ │ ├── techniques │ │ ├── __init__.py │ │ ├── common_logic.py │ │ └── critique_n_refine │ │ │ ├── __init__.py │ │ │ ├── base_classes.py │ │ │ ├── core_logic.py │ │ │ └── prompt_pool.yaml │ │ └── utils.py └── version.py ├── pyproject.toml ├── setup.cfg └── setup.py /.gitignore: -------------------------------------------------------------------------------- 1 | ## Ignore Visual Studio temporary files, build results, and 2 | ## files generated by popular Visual Studio add-ons. 3 | ## 4 | ## Get latest from https://github.com/github/gitignore/blob/main/VisualStudio.gitignore 5 | 6 | # User-specific files 7 | *.rsuser 8 | *.suo 9 | *.user 10 | *.userosscache 11 | *.sln.docstates 12 | 13 | # User-specific files (MonoDevelop/Xamarin Studio) 14 | *.userprefs 15 | 16 | # Mono auto generated files 17 | mono_crash.* 18 | 19 | # Build results 20 | [Dd]ebug/ 21 | [Dd]ebugPublic/ 22 | [Rr]elease/ 23 | [Rr]eleases/ 24 | x64/ 25 | x86/ 26 | [Ww][Ii][Nn]32/ 27 | [Aa][Rr][Mm]/ 28 | [Aa][Rr][Mm]64/ 29 | bld/ 30 | [Bb]in/ 31 | [Oo]bj/ 32 | [Ll]og/ 33 | [Ll]ogs/ 34 | 35 | # Visual Studio 2015/2017 cache/options directory 36 | .vs/ 37 | # Uncomment if you have tasks that create the project's static files in wwwroot 38 | #wwwroot/ 39 | 40 | # Visual Studio 2017 auto generated files 41 | Generated\ Files/ 42 | 43 | # MSTest test Results 44 | [Tt]est[Rr]esult*/ 45 | [Bb]uild[Ll]og.* 46 | 47 | # NUnit 48 | *.VisualState.xml 49 | TestResult.xml 50 | nunit-*.xml 51 | 52 | # Build Results of an ATL Project 53 | [Dd]ebugPS/ 54 | [Rr]eleasePS/ 55 | dlldata.c 56 | 57 | # Benchmark Results 58 | BenchmarkDotNet.Artifacts/ 59 | 60 | # .NET Core 61 | project.lock.json 62 | project.fragment.lock.json 63 | artifacts/ 64 | 65 | # ASP.NET Scaffolding 66 | ScaffoldingReadMe.txt 67 | 68 | # StyleCop 69 | StyleCopReport.xml 70 | 71 | # Files built by Visual Studio 72 | *_i.c 73 | *_p.c 74 | *_h.h 75 | *.ilk 76 | *.meta 77 | *.obj 78 | *.iobj 79 | *.pch 80 | *.pdb 81 | *.ipdb 82 | *.pgc 83 | *.pgd 84 | *.rsp 85 | *.sbr 86 | *.tlb 87 | *.tli 88 | *.tlh 89 | *.tmp 90 | *.tmp_proj 91 | *_wpftmp.csproj 92 | *.log 93 | *.tlog 94 | *.vspscc 95 | *.vssscc 96 | .builds 97 | *.pidb 98 | *.svclog 99 | *.scc 100 | 101 | # Chutzpah Test files 102 | _Chutzpah* 103 | 104 | # Visual C++ cache files 105 | ipch/ 106 | *.aps 107 | *.ncb 108 | *.opendb 109 | *.opensdf 110 | *.sdf 111 | *.cachefile 112 | *.VC.db 113 | *.VC.VC.opendb 114 | 115 | # Visual Studio profiler 116 | *.psess 117 | *.vsp 118 | *.vspx 119 | *.sap 120 | 121 | # Visual Studio Trace Files 122 | *.e2e 123 | 124 | # TFS 2012 Local Workspace 125 | $tf/ 126 | 127 | # Guidance Automation Toolkit 128 | *.gpState 129 | 130 | # ReSharper is a .NET coding add-in 131 | _ReSharper*/ 132 | *.[Rr]e[Ss]harper 133 | *.DotSettings.user 134 | 135 | # TeamCity is a build add-in 136 | _TeamCity* 137 | 138 | # DotCover is a Code Coverage Tool 139 | *.dotCover 140 | 141 | # AxoCover is a Code Coverage Tool 142 | .axoCover/* 143 | !.axoCover/settings.json 144 | 145 | # Coverlet is a free, cross platform Code Coverage Tool 146 | coverage*.json 147 | coverage*.xml 148 | coverage*.info 149 | 150 | # Visual Studio code coverage results 151 | *.coverage 152 | *.coveragexml 153 | 154 | # NCrunch 155 | _NCrunch_* 156 | .*crunch*.local.xml 157 | nCrunchTemp_* 158 | 159 | # MightyMoose 160 | *.mm.* 161 | AutoTest.Net/ 162 | 163 | # Web workbench (sass) 164 | .sass-cache/ 165 | 166 | # Installshield output folder 167 | [Ee]xpress/ 168 | 169 | # DocProject is a documentation generator add-in 170 | DocProject/buildhelp/ 171 | DocProject/Help/*.HxT 172 | DocProject/Help/*.HxC 173 | DocProject/Help/*.hhc 174 | DocProject/Help/*.hhk 175 | DocProject/Help/*.hhp 176 | DocProject/Help/Html2 177 | DocProject/Help/html 178 | 179 | # Click-Once directory 180 | publish/ 181 | 182 | # Publish Web Output 183 | *.[Pp]ublish.xml 184 | *.azurePubxml 185 | # Note: Comment the next line if you want to checkin your web deploy settings, 186 | # but database connection strings (with potential passwords) will be unencrypted 187 | *.pubxml 188 | *.publishproj 189 | 190 | # Microsoft Azure Web App publish settings. Comment the next line if you want to 191 | # checkin your Azure Web App publish settings, but sensitive information contained 192 | # in these scripts will be unencrypted 193 | PublishScripts/ 194 | 195 | # NuGet Packages 196 | *.nupkg 197 | # NuGet Symbol Packages 198 | *.snupkg 199 | # The packages folder can be ignored because of Package Restore 200 | **/[Pp]ackages/* 201 | # except build/, which is used as an MSBuild target. 202 | !**/[Pp]ackages/build/ 203 | # Uncomment if necessary however generally it will be regenerated when needed 204 | #!**/[Pp]ackages/repositories.config 205 | # NuGet v3's project.json files produces more ignorable files 206 | *.nuget.props 207 | *.nuget.targets 208 | 209 | # Microsoft Azure Build Output 210 | csx/ 211 | *.build.csdef 212 | 213 | # Microsoft Azure Emulator 214 | ecf/ 215 | rcf/ 216 | 217 | # Windows Store app package directories and files 218 | AppPackages/ 219 | BundleArtifacts/ 220 | Package.StoreAssociation.xml 221 | _pkginfo.txt 222 | *.appx 223 | *.appxbundle 224 | *.appxupload 225 | 226 | # Visual Studio cache files 227 | # files ending in .cache can be ignored 228 | *.[Cc]ache 229 | # but keep track of directories ending in .cache 230 | !?*.[Cc]ache/ 231 | 232 | # Others 233 | ClientBin/ 234 | ~$* 235 | *~ 236 | *.dbmdl 237 | *.dbproj.schemaview 238 | *.jfm 239 | *.pfx 240 | *.publishsettings 241 | orleans.codegen.cs 242 | 243 | # Including strong name files can present a security risk 244 | # (https://github.com/github/gitignore/pull/2483#issue-259490424) 245 | #*.snk 246 | 247 | # Since there are multiple workflows, uncomment next line to ignore bower_components 248 | # (https://github.com/github/gitignore/pull/1529#issuecomment-104372622) 249 | #bower_components/ 250 | 251 | # RIA/Silverlight projects 252 | Generated_Code/ 253 | 254 | # Backup & report files from converting an old project file 255 | # to a newer Visual Studio version. Backup files are not needed, 256 | # because we have git ;-) 257 | _UpgradeReport_Files/ 258 | Backup*/ 259 | UpgradeLog*.XML 260 | UpgradeLog*.htm 261 | ServiceFabricBackup/ 262 | *.rptproj.bak 263 | 264 | # SQL Server files 265 | *.mdf 266 | *.ldf 267 | *.ndf 268 | 269 | # Business Intelligence projects 270 | *.rdl.data 271 | *.bim.layout 272 | *.bim_*.settings 273 | *.rptproj.rsuser 274 | *- [Bb]ackup.rdl 275 | *- [Bb]ackup ([0-9]).rdl 276 | *- [Bb]ackup ([0-9][0-9]).rdl 277 | 278 | # Microsoft Fakes 279 | FakesAssemblies/ 280 | 281 | # GhostDoc plugin setting file 282 | *.GhostDoc.xml 283 | 284 | # Node.js Tools for Visual Studio 285 | .ntvs_analysis.dat 286 | node_modules/ 287 | 288 | # Visual Studio 6 build log 289 | *.plg 290 | 291 | # Visual Studio 6 workspace options file 292 | *.opt 293 | 294 | # Visual Studio 6 auto-generated workspace file (contains which files were open etc.) 295 | *.vbw 296 | 297 | # Visual Studio 6 auto-generated project file (contains which files were open etc.) 298 | *.vbp 299 | 300 | # Visual Studio 6 workspace and project file (working project files containing files to include in project) 301 | *.dsw 302 | *.dsp 303 | 304 | # Visual Studio 6 technical files 305 | *.ncb 306 | *.aps 307 | 308 | # Visual Studio LightSwitch build output 309 | **/*.HTMLClient/GeneratedArtifacts 310 | **/*.DesktopClient/GeneratedArtifacts 311 | **/*.DesktopClient/ModelManifest.xml 312 | **/*.Server/GeneratedArtifacts 313 | **/*.Server/ModelManifest.xml 314 | _Pvt_Extensions 315 | 316 | # Paket dependency manager 317 | .paket/paket.exe 318 | paket-files/ 319 | 320 | # FAKE - F# Make 321 | .fake/ 322 | 323 | # CodeRush personal settings 324 | .cr/personal 325 | 326 | # Python Tools for Visual Studio (PTVS) 327 | __pycache__/ 328 | *.pyc 329 | 330 | # Cake - Uncomment if you are using it 331 | # tools/** 332 | # !tools/packages.config 333 | 334 | # Tabs Studio 335 | *.tss 336 | 337 | # Telerik's JustMock configuration file 338 | *.jmconfig 339 | 340 | # BizTalk build output 341 | *.btp.cs 342 | *.btm.cs 343 | *.odx.cs 344 | *.xsd.cs 345 | 346 | # OpenCover UI analysis results 347 | OpenCover/ 348 | 349 | # Azure Stream Analytics local run output 350 | ASALocalRun/ 351 | 352 | # MSBuild Binary and Structured Log 353 | *.binlog 354 | 355 | # NVidia Nsight GPU debugger configuration file 356 | *.nvuser 357 | 358 | # MFractors (Xamarin productivity tool) working folder 359 | .mfractor/ 360 | 361 | # Local History for Visual Studio 362 | .localhistory/ 363 | 364 | # Visual Studio History (VSHistory) files 365 | .vshistory/ 366 | 367 | # BeatPulse healthcheck temp database 368 | healthchecksdb 369 | 370 | # Backup folder for Package Reference Convert tool in Visual Studio 2017 371 | MigrationBackup/ 372 | 373 | # Ionide (cross platform F# VS Code tools) working folder 374 | .ionide/ 375 | 376 | # Fody - auto-generated XML schema 377 | FodyWeavers.xsd 378 | 379 | # VS Code files for those working on multiple tools 380 | .vscode/* 381 | !.vscode/settings.json 382 | !.vscode/tasks.json 383 | !.vscode/launch.json 384 | !.vscode/extensions.json 385 | *.code-workspace 386 | 387 | # Local History for Visual Studio Code 388 | .history/ 389 | 390 | # Windows Installer files from build outputs 391 | *.cab 392 | *.msi 393 | *.msix 394 | *.msm 395 | *.msp 396 | 397 | # JetBrains Rider 398 | *.sln.iml 399 | -------------------------------------------------------------------------------- /CODE_OF_CONDUCT.md: -------------------------------------------------------------------------------- 1 | # Microsoft Open Source Code of Conduct 2 | 3 | This project has adopted the [Microsoft Open Source Code of Conduct](https://opensource.microsoft.com/codeofconduct/). 4 | 5 | Resources: 6 | 7 | - [Microsoft Open Source Code of Conduct](https://opensource.microsoft.com/codeofconduct/) 8 | - [Microsoft Code of Conduct FAQ](https://opensource.microsoft.com/codeofconduct/faq/) 9 | - Contact [opencode@microsoft.com](mailto:opencode@microsoft.com) with questions or concerns -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) Microsoft Corporation. 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | .PHONY: install style test 2 | 3 | PYTHON := python 4 | CHECK_DIRS := promptwizard tests 5 | 6 | install: 7 | @${PYTHON} setup.py bdist_wheel 8 | @${PYTHON} -m pip install dist/sdtools* 9 | 10 | style: 11 | black $(CHECK_DIRS) 12 | isort -rc $(CHECK_DIRS) 13 | flake8 $(CHECK_DIRS) 14 | 15 | test: 16 | @${PYTHON} -m pytest -n auto --dist=loadfile -s -v ./tests/ -------------------------------------------------------------------------------- /RESPONSIBLE_AI.md: -------------------------------------------------------------------------------- 1 | ### PromptWizard: Responsible AI FAQ 2 | 3 | - What is PromptWizard? 4 | 5 | PromptWizard is a novel framework for prompt optimization that supports to tune a good prompt for a given task and dataset, so that LLMs’ output/accuracy can be optimized. PromptWizard is solely designed for research settings, and its testing has only been carried out in such environments. It should not be used in downstream applications without additional analysis and mitigation to address potential harm or bias in the proposed application. Please refer to the paper - [PromptWizard: Task-Aware Agent-driven Prompt Optimization Framework (arxiv.org)](https://arxiv.org/abs/2405.18369)-for more details. 6 | 7 | - What can PromptWizard do? 8 | 9 | PromptWizard framework is an AI-based framework that internally uses LLM to find the optimal prompt for a given task. It takes as input task description, dataset format & few training examples, hyperparameter configurations and outputs an optimized prompt for the given LLM and task intent. 10 | Unlike existing approaches, PromptWizard optimizes both prompt instructions and in-context examples, maximizing the LLM performance. It iteratively refines prompts by mutating instructions using and incorporating negative examples. It further enhances both instructions and examples with the aid of a critic provided by LLM on a candidate prompt. 11 | New synthetic instructions and examples are generated with detailed reasoning steps using LLM. 12 | 13 | - What is/are PromptWizard’s intended use(s)? 14 | 15 | Please note that PromptWizard is an open-source framework under active development and intended for use for research purposes. It should not be used in any downstream applications without additional detailed evaluation of robustness, safety issues and assessment of any potential harm or bias in the proposed application. For all GenAI applications, prompt design and tuning are a tedious, skilful and laborious tasks. PromptWizard’s intended use is to design and optimize the prompt along with the few shot examples for a given task/domain and dataset. This well crafted prompt would enable the LLM to provide more accurate and high quality answer. We have also integrated Azure AI Content Safety service, to avoid/slow-down malicious uses. 16 | 17 | - How was PromptWizard evaluated? What metrics are used to measure performance? 18 | 19 | PromptWizard framework is generic enough to work on any domain/dataset/task. However, we have evaluated the performance of PromptWizard across 35 tasks on 8 datasets. More details can be found [PromptWizard: Task-Aware Agent-driven Prompt Optimization Framework (arxiv.org)](https://arxiv.org/abs/2405.18369) 20 | 21 | The opensource datasets used for evaluation include 22 | - Medical challenges ([MedQA](https://github.com/jind11/MedQA), [PubMedQA](https://pubmedqa.github.io/)) 23 | - Commonsense reasoning ([CSQA](https://amritasaha1812.github.io/CSQA/), [SQA](https://www.microsoft.com/en-in/download/details.aspx?id=54253)) 24 | - Math reasoning problems ([GSM8k](https://huggingface.co/datasets/openai/gsm8k)) 25 | - Hate speech classification ([Ethos](https://link.springer.com/article/10.1007/s40747-021-00608-2)), 26 | - Complex domain-specific tasks ([MMLU](https://huggingface.co/datasets/cais/mmlu) 6 medical tasks, [Big-Bench-Hard-23](https://huggingface.co/datasets/maveriq/bigbenchhard)) 27 | 28 | Additionally, the team has also conducted “red team” analysis to evaluate if PromptWizard optimizes harmful intent. With appropriate Azure content moderation deployed in the pipeline on the input to PromptWizard and output from PromptWizard, it didn’t optimize prompts for harmful intent. Please refer to the details for Azure content moderation [here](https://learn.microsoft.com/en-us/azure/ai-services/content-moderator/overview). 29 | 30 | - What are the limitations of PromptWizard? How can users minimize the impact of PromptWizard’s limitations when using the system? 31 | 32 | - The framework is evaluated primarily on English languages tasks as described in earlier section. The framework is not yet evaluated for multilingual settings. 33 | - The framework generates synthetic examples for few-shot learning based on task description. User is required to validate the correctness and diversity of generated synthetic examples. 34 | - PromptWizard utilizes existing LLMs and does not train a new model. Hence, it inherits the capabilities and limitations of its base model, as well as common limitations among other large language models or limitations caused by its training process. Hence, we suggest using the appropriate base LLM suitable for your use-cases to work with PromptWizard. 35 | 36 | - What operational factors and settings allow for effective and responsible use of PromptWizard? 37 | 38 | - Input considerations: Better performance with PromptWizard can be achieved by specifying the input components like task and intent as clearly and concisely as possible. 39 | - Human involvement: PromptWizard optimizes the prompt with prompt instruction and a few shot examples for the given intent and task. We suggest human oversight to review the optimized prompts before those are executed with LLMs. 40 | - LLMs: Users can choose the LLM that is optimized for responsible use. The default LLM is GPT-4 which inherits the existing RAI mechanisms and filters from the LLM provider. Caching is enabled by default to increase reliability and control cost. We encourage developers to review [OpenAI’s Usage policies](https://openai.com/policies/usage-policies/) and [Azure OpenAI’s Code of Conduct](https://learn.microsoft.com/en-us/legal/cognitive-services/openai/code-of-conduct) when using GPT-4. 41 | - Content Safety: We have integrated [Azure AI Content Safety](https://learn.microsoft.com/en-us/azure/ai-services/content-safety/overview) service for content moderation. We suggest to deploy PromptWizard with such content safety system in the pipeline. -------------------------------------------------------------------------------- /SECURITY.md: -------------------------------------------------------------------------------- 1 | 2 | 3 | ## Security 4 | 5 | Microsoft takes the security of our software products and services seriously, which includes all source code repositories managed through our GitHub organizations, which include [Microsoft](https://github.com/Microsoft), [Azure](https://github.com/Azure), [DotNet](https://github.com/dotnet), [AspNet](https://github.com/aspnet) and [Xamarin](https://github.com/xamarin). 6 | 7 | If you believe you have found a security vulnerability in any Microsoft-owned repository that meets [Microsoft's definition of a security vulnerability](https://aka.ms/security.md/definition), please report it to us as described below. 8 | 9 | ## Reporting Security Issues 10 | 11 | **Please do not report security vulnerabilities through public GitHub issues.** 12 | 13 | Instead, please report them to the Microsoft Security Response Center (MSRC) at [https://msrc.microsoft.com/create-report](https://aka.ms/security.md/msrc/create-report). 14 | 15 | If you prefer to submit without logging in, send email to [secure@microsoft.com](mailto:secure@microsoft.com). If possible, encrypt your message with our PGP key; please download it from the [Microsoft Security Response Center PGP Key page](https://aka.ms/security.md/msrc/pgp). 16 | 17 | You should receive a response within 24 hours. If for some reason you do not, please follow up via email to ensure we received your original message. Additional information can be found at [microsoft.com/msrc](https://www.microsoft.com/msrc). 18 | 19 | Please include the requested information listed below (as much as you can provide) to help us better understand the nature and scope of the possible issue: 20 | 21 | * Type of issue (e.g. buffer overflow, SQL injection, cross-site scripting, etc.) 22 | * Full paths of source file(s) related to the manifestation of the issue 23 | * The location of the affected source code (tag/branch/commit or direct URL) 24 | * Any special configuration required to reproduce the issue 25 | * Step-by-step instructions to reproduce the issue 26 | * Proof-of-concept or exploit code (if possible) 27 | * Impact of the issue, including how an attacker might exploit the issue 28 | 29 | This information will help us triage your report more quickly. 30 | 31 | If you are reporting for a bug bounty, more complete reports can contribute to a higher bounty award. Please visit our [Microsoft Bug Bounty Program](https://aka.ms/security.md/msrc/bounty) page for more details about our active programs. 32 | 33 | ## Preferred Languages 34 | 35 | We prefer all communications to be in English. 36 | 37 | ## Policy 38 | 39 | Microsoft follows the principle of [Coordinated Vulnerability Disclosure](https://aka.ms/security.md/cvd). 40 | 41 | 42 | -------------------------------------------------------------------------------- /demos/aquarat/.env: -------------------------------------------------------------------------------- 1 | USE_OPENAI_API_KEY="False" 2 | 3 | OPENAI_API_KEY="" 4 | OPENAI_MODEL_NAME ="" 5 | 6 | OPENAI_API_VERSION="" 7 | AZURE_OPENAI_ENDPOINT="" 8 | AZURE_OPENAI_DEPLOYMENT_NAME="" -------------------------------------------------------------------------------- /demos/aquarat/configs/prompt_library.yaml: -------------------------------------------------------------------------------- 1 | system_prompts: | 2 | You are a helpful assistant that assists research students in understanding research papers. 3 | system_guidelines: | 4 | Guidelines 5 | - Your role must always be a helpful assistant that assists students in understanding research papers. 6 | - Only answer questions that are directly or indirectly related to the referenced paper(s). 7 | 8 | mode: 9 | chat: 10 | - name: CHAT-FIRST-MESSAGE 11 | llm_request_type: rag-query 12 | prompt_template: | 13 | {user_msg} 14 | emb_model_id: text embedding ada 002 [vellm-openai2] 15 | llm_model_id: gpt 35 Turbo [vellm-openai2] 16 | prepend_system_prompts: False 17 | prepend_system_guidelines: False 18 | 19 | - name: CHAT-NEXT-MESSAGES 20 | llm_request_type: rag-query 21 | prompt_template: | 22 | {user_msg} 23 | emb_model_id: text embedding ada 002 [vellm-openai2] 24 | llm_model_id: gpt 35 Turbo [vellm-openai2] 25 | prepend_system_prompts: False 26 | prepend_system_guidelines: False 27 | 28 | generation: 29 | - name: FLASH_PROFILE 30 | prompt_template: | 31 | {user_msg} 32 | prepend_system_prompts: False 33 | prepend_system_guidelines: False 34 | llm_request_type: rag-query 35 | emb_model_id: text embedding ada 002 [vellm-openai2] 36 | llm_model_id: gpt 35 Turbo [vellm-openai2] -------------------------------------------------------------------------------- /demos/aquarat/configs/promptopt_config.yaml: -------------------------------------------------------------------------------- 1 | # Specify one or more prompt refinement technique to be used. If you specify more than one prompt refinement techniques, 2 | # all these technique would run on same seed data. Result, iterations needed & cost incurred for each of these 3 | # technique would be logged. And winning technique for each data instance and overall would be logged. 4 | 5 | # Supported prompt refinement techniques: Basic, RecursiveEval, MedPrompt 6 | # Uncomment techniques that you want to use 7 | ############################ Critique Task Description Start ############################ 8 | prompt_technique_name: "critique_n_refine" 9 | # unique_model_id of model defined in llm_config.yaml 10 | unique_model_id: gpt-4o 11 | # Number of iterations for conducting rounds of mutation of task description 12 | # followed by refinement of instructions 13 | mutate_refine_iterations: 3 14 | # Number of rounds of mutation to be performed when generating different styles 15 | mutation_rounds: 3 16 | # Refine instruction post mutation 17 | refine_instruction: true 18 | # Number of iterations for refining task description and in context examples for few-shot 19 | refine_task_eg_iterations: 3 20 | # Number of variations of prompts to generate in given iteration 21 | style_variation: 5 22 | # Number of questions to be asked to LLM in a single batch, during training step 23 | questions_batch_size: 1 24 | # Number of batches of questions to correctly answered, for a prompt to be considered as performing good 25 | min_correct_count: 3 26 | # Max number of mini-batches on which we should evaluate our prompt 27 | max_eval_batches: 6 28 | # Number of top best performing prompts to be considered for next iterations 29 | top_n: 1 30 | # Description of task. This will be fed to prompt 31 | task_description: "You are a mathematics expert. You will be given a mathematics problem which you need to solve" 32 | # Base instruction, in line with your dataset. This will be fed to prompt 33 | base_instruction: "Lets think step by step." 34 | # Instruction for specifying answer format 35 | answer_format: "At the end, wrap only your final option between and tags" 36 | # Number of samples from dataset, set aside as training data. In every iteration we would be drawing 37 | # `questions_batch_size` examples from training data with replacement. 38 | seen_set_size: 25 39 | # Number of examples to be given for few shots 40 | few_shot_count: 5 41 | # Number of synthetic training examples to be generated 42 | num_train_examples: 20 43 | # Generate synthetic reasoning 44 | generate_reasoning: true 45 | # Generate description of an expert which can solve the task at hand 46 | generate_expert_identity: true 47 | # Generate keywords that describe the intent of the task 48 | generate_intent_keywords: false 49 | ############################ Critique Task Description End ############################ 50 | 51 | 52 | 53 | -------------------------------------------------------------------------------- /demos/aquarat/configs/setup_config.yaml: -------------------------------------------------------------------------------- 1 | assistant_llm: 2 | # put the unique_model_id that you specified in llm_config.yaml 3 | prompt_opt: gpt-4o 4 | dir_info: 5 | # Base directory for everything 6 | base_dir: logs 7 | log_dir_name: glue_logs 8 | experiment_name: aquarat 9 | # Many features are different for mode: online/offline. For eg 10 | # 1) Print of logs happens on console for offline mode 11 | # 2) LLM Queue gets instantiated only in online mode 12 | mode: offline 13 | # Full length description of the experiment. This would be logged. 14 | description: 15 | -------------------------------------------------------------------------------- /demos/aquarat/demo.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "id": "695a1a9b", 6 | "metadata": {}, 7 | "source": [ 8 | "#### Set environment variables in [.env](.env) for LLM API calling" 9 | ] 10 | }, 11 | { 12 | "cell_type": "markdown", 13 | "id": "8042a9cc", 14 | "metadata": {}, 15 | "source": [ 16 | "### Import Dependencies" 17 | ] 18 | }, 19 | { 20 | "cell_type": "code", 21 | "execution_count": null, 22 | "id": "f1fb3d81-16b6-4b8c-a028-880fdce5e14a", 23 | "metadata": {}, 24 | "outputs": [], 25 | "source": [ 26 | "import sys\n", 27 | "sys.path.insert(0, \"../../\")\n", 28 | "import promptwizard\n", 29 | "from promptwizard.glue.promptopt.instantiate import GluePromptOpt\n", 30 | "from promptwizard.glue.promptopt.techniques.common_logic import DatasetSpecificProcessing\n", 31 | "from promptwizard.glue.common.utils.file import save_jsonlist\n", 32 | "from typing import Any\n", 33 | "from tqdm import tqdm\n", 34 | "import json\n", 35 | "import os\n", 36 | "from datasets import load_dataset\n", 37 | "\n", 38 | "from dotenv import load_dotenv\n", 39 | "load_dotenv(override = True)\n" 40 | ] 41 | }, 42 | { 43 | "cell_type": "markdown", 44 | "id": "5bbe055e", 45 | "metadata": {}, 46 | "source": [ 47 | "### Create a dataset specific class and define the required functions " 48 | ] 49 | }, 50 | { 51 | "cell_type": "code", 52 | "execution_count": 2, 53 | "id": "5f325d33", 54 | "metadata": {}, 55 | "outputs": [], 56 | "source": [ 57 | "\n", 58 | "def extract_between(start, end, text):\n", 59 | " \"\"\"\n", 60 | " Extracts the substring from 'text' that is between 'start' and 'end' strings.\n", 61 | " \n", 62 | " Parameters:\n", 63 | " - start (str): The starting delimiter string.\n", 64 | " - end (str): The ending delimiter string.\n", 65 | " - text (str): The text to search within.\n", 66 | " \n", 67 | " Returns:\n", 68 | " - str: The extracted substring between the start and end delimiters.\n", 69 | " \"\"\"\n", 70 | " start_index = text.find(start)\n", 71 | " if start_index == -1:\n", 72 | " return '' \n", 73 | " \n", 74 | " start_index += len(start)\n", 75 | " \n", 76 | " end_index = text.find(end, start_index)\n", 77 | " if end_index == -1:\n", 78 | " return '' \n", 79 | " return text[start_index:end_index]\n", 80 | "\n", 81 | "class AQUARAT(DatasetSpecificProcessing):\n", 82 | "\n", 83 | " def dataset_to_jsonl(self, dataset_jsonl: str, **kwargs: Any) -> None:\n", 84 | " def extract_answer_from_output(completion):\n", 85 | "\n", 86 | " return completion\n", 87 | "\n", 88 | " examples_set = []\n", 89 | "\n", 90 | " for _, sample in tqdm(enumerate(kwargs[\"dataset\"]), desc=\"Evaluating samples\"):\n", 91 | " example = {\n", 92 | " DatasetSpecificProcessing.QUESTION_LITERAL: sample['question'],\n", 93 | " DatasetSpecificProcessing.ANSWER_WITH_REASON_LITERAL: sample['answer'],\n", 94 | " DatasetSpecificProcessing.FINAL_ANSWER_LITERAL: extract_answer_from_output(sample[\"answer\"])\n", 95 | " }\n", 96 | " examples_set.append(example)\n", 97 | "\n", 98 | " save_jsonlist(dataset_jsonl, examples_set, \"w\")\n", 99 | "\n", 100 | " def extract_final_answer(self, answer: str):\n", 101 | " \n", 102 | " final_answer = extract_between(text=answer,start=\"\",end=\"\")\n", 103 | " return final_answer\n", 104 | " \n", 105 | " def access_answer(self, llm_output: str, gt_answer: str):\n", 106 | "\n", 107 | " predicted_answer = self.extract_final_answer(llm_output)\n", 108 | " is_correct = False\n", 109 | " if predicted_answer and (predicted_answer.lower() == gt_answer.lower()):\n", 110 | " is_correct = True\n", 111 | "\n", 112 | " return is_correct, predicted_answer" 113 | ] 114 | }, 115 | { 116 | "cell_type": "code", 117 | "execution_count": 3, 118 | "id": "f384eb57", 119 | "metadata": {}, 120 | "outputs": [], 121 | "source": [ 122 | "aquarat_processor = AQUARAT()" 123 | ] 124 | }, 125 | { 126 | "cell_type": "code", 127 | "execution_count": null, 128 | "id": "976681bd-4f43-4dbc-947e-cdb94d4824f0", 129 | "metadata": {}, 130 | "outputs": [], 131 | "source": [ 132 | "\n", 133 | "if not os.path.exists(\"data\"):\n", 134 | " os.mkdir(\"data\")\n", 135 | "dataset = load_dataset(\"deepmind/aqua_rat\", \"raw\")\n", 136 | "num_samples = 1\n", 137 | "for dataset_type in ['train','test']:\n", 138 | " data_list = []\n", 139 | " for data in dataset[dataset_type]:\n", 140 | " options = data['options'][0]\n", 141 | " for i in range(1,len(data['options'])):\n", 142 | " options = options + \" \"+ data['options'][i]\n", 143 | " data_list.append({\"question\": data['question']+\"\\n\"+options, \"answer\": data['correct']})\n", 144 | " if num_samples == 100 and dataset_type == 'train': # We sample only 100 train examples and use 25 out them for training randomly\n", 145 | " break\n", 146 | " num_samples += 1\n", 147 | " aquarat_processor.dataset_to_jsonl(\"data/\"+ dataset_type+'.jsonl', dataset=data_list)" 148 | ] 149 | }, 150 | { 151 | "cell_type": "markdown", 152 | "id": "db891c34", 153 | "metadata": {}, 154 | "source": [ 155 | "### Set paths" 156 | ] 157 | }, 158 | { 159 | "cell_type": "code", 160 | "execution_count": 5, 161 | "id": "f43482f1-3e10-4cf7-8ea6-ff42c04067a6", 162 | "metadata": {}, 163 | "outputs": [], 164 | "source": [ 165 | "train_file_name = os.path.join(\"data\", \"train.jsonl\")\n", 166 | "test_file_name = os.path.join(\"data\", \"test.jsonl\")\n", 167 | "path_to_config = \"configs\"\n", 168 | "llm_config_path = os.path.join(path_to_config, \"llm_config.yaml\")\n", 169 | "promptopt_config_path = os.path.join(path_to_config, \"promptopt_config.yaml\")\n", 170 | "setup_config_path = os.path.join(path_to_config, \"setup_config.yaml\")" 171 | ] 172 | }, 173 | { 174 | "cell_type": "markdown", 175 | "id": "26ba1a62", 176 | "metadata": {}, 177 | "source": [ 178 | "### Create an object for calling prompt optimization and inference functionalities" 179 | ] 180 | }, 181 | { 182 | "cell_type": "code", 183 | "execution_count": null, 184 | "id": "8af4246f-db32-4b37-a73a-f9e2e5125d09", 185 | "metadata": {}, 186 | "outputs": [], 187 | "source": [ 188 | "gp = GluePromptOpt(promptopt_config_path,\n", 189 | " setup_config_path,\n", 190 | " train_file_name,\n", 191 | " aquarat_processor)" 192 | ] 193 | }, 194 | { 195 | "cell_type": "markdown", 196 | "id": "6b25843b", 197 | "metadata": {}, 198 | "source": [ 199 | "### Call prompt optmization function\n", 200 | "1. ```use_examples``` can be used when there are training samples and a mixture of real and synthetic in-context examples are required in the final prompt. When set to ```False``` all the in-context examples will be real\n", 201 | "2. ```generate_synthetic_examples``` can be used when there are no training samples and we want to generate synthetic examples \n", 202 | "3. ```run_without_train_examples``` can be used when there are no training samples and in-context examples are not required in the final prompt " 203 | ] 204 | }, 205 | { 206 | "cell_type": "code", 207 | "execution_count": null, 208 | "id": "573c6151-2c03-45d9-9904-1724a1e20f1b", 209 | "metadata": { 210 | "scrolled": true 211 | }, 212 | "outputs": [], 213 | "source": [ 214 | "# Function call to generate optimal prompt and expert profile \n", 215 | "best_prompt, expert_profile = gp.get_best_prompt(use_examples=True,run_without_train_examples=False,generate_synthetic_examples=False)" 216 | ] 217 | }, 218 | { 219 | "cell_type": "markdown", 220 | "id": "97549dd2", 221 | "metadata": {}, 222 | "source": [ 223 | "### Save the optimized prompt and expert profile" 224 | ] 225 | }, 226 | { 227 | "cell_type": "code", 228 | "execution_count": null, 229 | "id": "34a716af-0d77-4c7d-b1c2-6438d66096ce", 230 | "metadata": { 231 | "scrolled": true 232 | }, 233 | "outputs": [], 234 | "source": [ 235 | "import pickle \n", 236 | "\n", 237 | "if not os.path.exists(\"results\"):\n", 238 | " os.system(\"mkdir results\")\n", 239 | "\n", 240 | "with open(\"results/best_prompt.pkl\", 'wb') as f:\n", 241 | " pickle.dump(best_prompt, f)\n", 242 | "with open(\"results/expert_profile.pkl\", 'wb') as f:\n", 243 | " pickle.dump(expert_profile, f)\n", 244 | "\n", 245 | "print(f\"Best prompt: {best_prompt} \\nExpert profile: {expert_profile}\")" 246 | ] 247 | }, 248 | { 249 | "cell_type": "markdown", 250 | "id": "bdbb7e07", 251 | "metadata": {}, 252 | "source": [ 253 | "### Evaluate the optimized prompt" 254 | ] 255 | }, 256 | { 257 | "cell_type": "code", 258 | "execution_count": null, 259 | "id": "c49b5711-82dd-4d18-8cd4-ee447cf8d74c", 260 | "metadata": { 261 | "scrolled": true 262 | }, 263 | "outputs": [], 264 | "source": [ 265 | "gp.EXPERT_PROFILE = expert_profile\n", 266 | "gp.BEST_PROMPT = best_prompt\n", 267 | "\n", 268 | "# Function call to evaluate the prompt\n", 269 | "accuracy = gp.evaluate(test_file_name)\n", 270 | "\n", 271 | "print(f\"Final Accuracy: {accuracy}\")" 272 | ] 273 | } 274 | ], 275 | "metadata": { 276 | "kernelspec": { 277 | "display_name": "PromptWizard", 278 | "language": "python", 279 | "name": "python3" 280 | }, 281 | "language_info": { 282 | "codemirror_mode": { 283 | "name": "ipython", 284 | "version": 3 285 | }, 286 | "file_extension": ".py", 287 | "mimetype": "text/x-python", 288 | "name": "python", 289 | "nbconvert_exporter": "python", 290 | "pygments_lexer": "ipython3", 291 | "version": "3.12.4" 292 | } 293 | }, 294 | "nbformat": 4, 295 | "nbformat_minor": 5 296 | } 297 | -------------------------------------------------------------------------------- /demos/bbh/.env: -------------------------------------------------------------------------------- 1 | USE_OPENAI_API_KEY="False" 2 | 3 | OPENAI_API_KEY="" 4 | OPENAI_MODEL_NAME ="" 5 | 6 | OPENAI_API_VERSION="" 7 | AZURE_OPENAI_ENDPOINT="" 8 | AZURE_OPENAI_DEPLOYMENT_NAME="" -------------------------------------------------------------------------------- /demos/bbh/configs/prompt_library.yaml: -------------------------------------------------------------------------------- 1 | system_prompts: | 2 | You are a helpful assistant that assists research students in understanding research papers. 3 | system_guidelines: | 4 | Guidelines 5 | - Your role must always be a helpful assistant that assists students in understanding research papers. 6 | - Only answer questions that are directly or indirectly related to the referenced paper(s). 7 | 8 | mode: 9 | chat: 10 | - name: CHAT-FIRST-MESSAGE 11 | llm_request_type: rag-query 12 | prompt_template: | 13 | {user_msg} 14 | emb_model_id: text embedding ada 002 [vellm-openai2] 15 | llm_model_id: gpt 35 Turbo [vellm-openai2] 16 | prepend_system_prompts: False 17 | prepend_system_guidelines: False 18 | 19 | - name: CHAT-NEXT-MESSAGES 20 | llm_request_type: rag-query 21 | prompt_template: | 22 | {user_msg} 23 | emb_model_id: text embedding ada 002 [vellm-openai2] 24 | llm_model_id: gpt 35 Turbo [vellm-openai2] 25 | prepend_system_prompts: False 26 | prepend_system_guidelines: False 27 | 28 | generation: 29 | - name: FLASH_PROFILE 30 | prompt_template: | 31 | {user_msg} 32 | prepend_system_prompts: False 33 | prepend_system_guidelines: False 34 | llm_request_type: rag-query 35 | emb_model_id: text embedding ada 002 [vellm-openai2] 36 | llm_model_id: gpt 35 Turbo [vellm-openai2] -------------------------------------------------------------------------------- /demos/bbh/configs/promptopt_config.yaml: -------------------------------------------------------------------------------- 1 | # Specify one or more prompt refinement technique to be used. If you specify more than one prompt refinement techniques, 2 | # all these technique would run on same seed data. Result, iterations needed & cost incurred for each of these 3 | # technique would be logged. And winning technique for each data instance and overall would be logged. 4 | 5 | # Supported prompt refinement techniques: Basic, RecursiveEval, MedPrompt 6 | # Uncomment techniques that you want to use 7 | ############################ Critique Task Description Start ############################ 8 | prompt_technique_name: "critique_n_refine" 9 | # unique_model_id of model defined in llm_config.yaml 10 | unique_model_id: gpt-4o 11 | # Number of iterations for conducting rounds of mutation of task description 12 | # followed by refinement of instructions 13 | mutate_refine_iterations: 3 14 | # Number of rounds of mutation to be performed when generating different styles 15 | mutation_rounds: 3 16 | # Refine instruction post mutation 17 | refine_instruction: true 18 | # Number of iterations for refining task description and in context examples for few-shot 19 | refine_task_eg_iterations: 3 20 | # Number of variations of prompts to generate in given iteration 21 | style_variation: 5 22 | # Number of questions to be asked to LLM in a single batch, during training step 23 | questions_batch_size: 1 24 | # Number of batches of questions to correctly answered, for a prompt to be considered as performing good 25 | min_correct_count: 3 26 | # Max number of mini-batches on which we should evaluate our prompt 27 | max_eval_batches: 6 28 | # Number of top best performing prompts to be considered for next iterations 29 | top_n: 1 30 | # Description of task. This will be fed to prompt 31 | task_description : 'Extract the second letter from the input word.' 32 | # Base instruction, in line with your dataset. This will be fed to prompt 33 | base_instruction : 'Output the second letter. Think step by step to arrive at the solution.' 34 | # Instruction for specifying answer format 35 | answer_format : 'For each input word, present the reasoning followed by the extracted letter (only single letter) between and tags' 36 | # Number of samples from dataset, set aside as training data. In every iteration we would be drawing 37 | # `questions_batch_size` examples from training data with replacement. 38 | seen_set_size: 25 39 | # Number of examples to be given for few shots 40 | few_shot_count: 5 41 | # Number of synthetic training examples to be generated 42 | num_train_examples: 20 43 | # Generate synthetic reasoning 44 | generate_reasoning: true 45 | # Generate description of an expert which can solve the task at hand 46 | generate_expert_identity: true 47 | # Generate keywords that describe the intent of the task 48 | generate_intent_keywords: false 49 | ############################ Critique Task Description End ############################ 50 | 51 | 52 | 53 | -------------------------------------------------------------------------------- /demos/bbh/configs/setup_config.yaml: -------------------------------------------------------------------------------- 1 | assistant_llm: 2 | # put the unique_model_id that you specified in llm_config.yaml 3 | prompt_opt: gpt-4o 4 | dir_info: 5 | # Base directory for everything 6 | base_dir: logs 7 | log_dir_name: glue_logs 8 | experiment_name: bbh 9 | # Many features are different for mode: online/offline. For eg 10 | # 1) Print of logs happens on console for offline mode 11 | # 2) LLM Queue gets instantiated only in online mode 12 | mode: offline 13 | # Full length description of the experiment. This would be logged. 14 | description: 15 | -------------------------------------------------------------------------------- /demos/bbh/description.py: -------------------------------------------------------------------------------- 1 | # BBH Datasets 2 | # informal_to_formal 3 | task_description = 'In this task, you will be given a sentence in an informal style. Your job is to rewrite the sentence in a formal style.' 4 | base_instruction = 'For each given sentence, provide a formal paraphrase.' 5 | answer_format = 'For each input sentence, present the reasoning followed by the format paraphrased sentence.' 6 | 7 | #letters_list 8 | task_description = 'In this task, you will be given a single word as input. Your job is to produce the output by adding a space between each character pair in the word.' 9 | base_instruction = 'For each given word, insert a space between each character pair in the word.' 10 | answer_format = 'For each input word, ouput only the space seperated characters.' 11 | 12 | #negation 13 | task_description = 'For each input, write a sentence that expresses the exact opposite meaning of the input.' 14 | base_instruction = 'For each given sentence, provide a new sentence that conveys the exact opposite meaning by using "not" in the input sentence, keeping the rest of the sentence unchanged.' 15 | answer_format = "For each input sentence, negate the meaning by adding 'not' to the input sentence." 16 | 17 | #orthography_starts_with 18 | task_description = 'For each input, output all the words in the sentence that begin with the character in brackets at the end of the sentence.' 19 | base_instruction = 'Output words with space separated that begin with the character in brackets at the end of the following sentence=' 20 | answer_format = 'For each input sentence, present the reasoning followed by space seperated words.' 21 | 22 | #rhymes 23 | task_description = 'In this task, you will be given a single word as input. Your job is to produce list of comma sperated words that rhymes with the input word.' 24 | base_instruction = 'For each given word, provide a list of words that rhyme with the input word=' 25 | answer_format = 'For each input word, present the reasoning followed by the list of rhyming word.' 26 | 27 | #second_word_letter 28 | task_description = 'Extract the second letter from the input word.' 29 | base_instruction = 'Output the second letter. Think step by step to arrive at the solution.' 30 | answer_format = 'For each input word, present the reasoning followed by the extracted letter (only single letter).' 31 | 32 | #sentence_similarity 33 | task_description = "Each input consists of two sentences (Sentence 1 and Sentence 2). Rate on a scale of 0 to 5 whether those sentences are paraphrases of each other, and also give a brief textual description of the rating (0 - definitely not, 2 - possibly, 3 - probably, 4 - almost perfectly and 5 - perfectly). Use \" - \" to separate them" 34 | base_instruction = """Rate the similarity of each pair of sentences according to the following scale: 35 | 36 | 0 - Definitely not : The sentences are completely unrelated in meaning. 37 | 1 - Probably not : The sentences have minor or superficial similarities but differ significantly in meaning. 38 | 2 - Possibly : The sentences share some elements of meaning but are not strong paraphrases. 39 | 3 - Probably : The sentences convey similar meanings but have some differences. 40 | 4 - Almost perfectly : The sentences are very similar with only minor differences. 41 | 5 - Perfectly :The sentences are nearly identical in meaning.""" 42 | answer_format = 'Provide your rating and brief textual description for each pair of sentences from the 6 options. (0 - Definitely not, 1 - Probably not, 2 - Possibly, 3 - Probably, 4 - Almost perfectly, 5 - Perfectly)' 43 | 44 | #sum 45 | task_description = 'For each input, write the sum of the two numbers that appears there.' 46 | base_instruction = 'Output the sum of the following two numbers=' 47 | answer_format = 'For each pair of numbers, present the reasoning followed by the sum.' 48 | 49 | #synonyms 50 | task_description = 'You will be given a word as input and need to output a word that is semantically similar.' 51 | base_instruction = 'Output a word that is semantically similar to the input word=' 52 | answer_format = 'For each input word, present the reasoning followed by the synonym.' 53 | 54 | #taxonomy_animal 55 | task_description = 'In this task, you will be given a list of words. Your job is to identify and list all the animals from the given set of words.' 56 | base_instruction = 'For each given list of words, provide a new list containing only the animals.' 57 | answer_format = 'For each list of words, output the list of animals.' 58 | 59 | #auto_categorization 60 | task_description = 'Find the best categorization for the given set of words as input.' 61 | base_instruction = 'Output the best categorization for the following set of words=' 62 | answer_format = 'For each set of words, present the reasoning followed by the best categorization.' 63 | 64 | #object_counting 65 | task_description = 'Find the number of objects in the given input.' 66 | base_instruction = 'Output the number of objects in the following input=' 67 | answer_format = 'For each input, present the reasoning followed by the number of objects.' 68 | 69 | #odd_one_out 70 | task_description = 'Given the below list of words, find the odd one out' 71 | base_instruction = 'Output the word that does not belong to the group of words=' 72 | answer_format = 'For each group of words, present the reasoning followed by the odd one out.' 73 | 74 | #word_sorting 75 | task_description = 'In this task, you will be given a set of words. Your job is to sort the words based on the first character of each word in alphabetical order.' 76 | base_instruction = 'For each given set of words, provide a sorted list of the words based on the first character of each word.' 77 | answer_format = 'For each input, list of sorted words based on the first character of each word.' 78 | 79 | #word_unscrambling 80 | task_description = 'In this task output all possible meaningful words that can be formed by rearranging all the letters of the given word. Each character must be used exactly once and the words must be valid.' 81 | base_instruction = 'Output comma seperated words of same length as input word.' 82 | answer_format = 'Output the all possible meaningful words comma seperated that can formed by rearranging the letters of the given word.' 83 | 84 | #antonyms 85 | task_description = 'In this task, you will be given a single word as input. Your job is to produce a word that has the exact opposite meaning (an antonym) to the input word.' 86 | base_instruction = 'For each given word, provide a word that is an antonym (has the exact opposite meaning).' 87 | answer_format = 'For each input word, output only a single word.' 88 | 89 | #cause_and_effect 90 | task_description = 'Find the cause in the following cause and effect pair. Each input consists of two sentences, where one is the cause and the other is the outcome.' 91 | base_instruction = 'Output the cause in the following cause and effect pair=' 92 | answer_format = 'For each pair of sentences, present the reasoning followed by the cause.' 93 | 94 | #common_concept 95 | task_description = 'In this task, you will be given a list of objects. Your job is to identify and describe a common characteristic that links all the objects in the list.' 96 | base_instruction = 'The instruction is to ”involve” the objects mentioned in the input.' 97 | answer_format = 'For each list of objects, output the common concept by "involving" the objects mentioned.' -------------------------------------------------------------------------------- /demos/gsm8k/.env: -------------------------------------------------------------------------------- 1 | USE_OPENAI_API_KEY="False" 2 | 3 | OPENAI_API_KEY="" 4 | OPENAI_MODEL_NAME ="" 5 | 6 | OPENAI_API_VERSION="" 7 | AZURE_OPENAI_ENDPOINT="" 8 | AZURE_OPENAI_DEPLOYMENT_NAME="" -------------------------------------------------------------------------------- /demos/gsm8k/configs/prompt_library.yaml: -------------------------------------------------------------------------------- 1 | system_prompts: | 2 | You are a helpful assistant that assists research students in understanding research papers. 3 | system_guidelines: | 4 | Guidelines 5 | - Your role must always be a helpful assistant that assists students in understanding research papers. 6 | - Only answer questions that are directly or indirectly related to the referenced paper(s). 7 | 8 | mode: 9 | chat: 10 | - name: CHAT-FIRST-MESSAGE 11 | llm_request_type: rag-query 12 | prompt_template: | 13 | {user_msg} 14 | emb_model_id: text embedding ada 002 [vellm-openai2] 15 | llm_model_id: gpt 35 Turbo [vellm-openai2] 16 | prepend_system_prompts: False 17 | prepend_system_guidelines: False 18 | 19 | - name: CHAT-NEXT-MESSAGES 20 | llm_request_type: rag-query 21 | prompt_template: | 22 | {user_msg} 23 | emb_model_id: text embedding ada 002 [vellm-openai2] 24 | llm_model_id: gpt 35 Turbo [vellm-openai2] 25 | prepend_system_prompts: False 26 | prepend_system_guidelines: False 27 | 28 | generation: 29 | - name: FLASH_PROFILE 30 | prompt_template: | 31 | {user_msg} 32 | prepend_system_prompts: False 33 | prepend_system_guidelines: False 34 | llm_request_type: rag-query 35 | emb_model_id: text embedding ada 002 [vellm-openai2] 36 | llm_model_id: gpt 35 Turbo [vellm-openai2] -------------------------------------------------------------------------------- /demos/gsm8k/configs/promptopt_config.yaml: -------------------------------------------------------------------------------- 1 | # Specify one or more prompt refinement technique to be used. If you specify more than one prompt refinement techniques, 2 | # all these technique would run on same seed data. Result, iterations needed & cost incurred for each of these 3 | # technique would be logged. And winning technique for each data instance and overall would be logged. 4 | 5 | # Supported prompt refinement techniques: Basic, RecursiveEval, MedPrompt 6 | # Uncomment techniques that you want to use 7 | ############################ Critique Task Description Start ############################ 8 | prompt_technique_name: "critique_n_refine" 9 | # unique_model_id of model defined in llm_config.yaml 10 | unique_model_id: gpt-4o 11 | # Number of iterations for conducting rounds of mutation of task description 12 | # followed by refinement of instructions 13 | mutate_refine_iterations: 3 14 | # Number of rounds of mutation to be performed when generating different styles 15 | mutation_rounds: 3 16 | # Refine instruction post mutation 17 | refine_instruction: true 18 | # Number of iterations for refining task description and in context examples for few-shot 19 | refine_task_eg_iterations: 3 20 | # Number of variations of prompts to generate in given iteration 21 | style_variation: 5 22 | # Number of questions to be asked to LLM in a single batch, during training step 23 | questions_batch_size: 1 24 | # Number of batches of questions to correctly answered, for a prompt to be considered as performing good 25 | min_correct_count: 3 26 | # Max number of mini-batches on which we should evaluate our prompt 27 | max_eval_batches: 6 28 | # Number of top best performing prompts to be considered for next iterations 29 | top_n: 1 30 | # Description of task. This will be fed to prompt 31 | task_description: "You are a mathematics expert. You will be given a mathematics problem which you need to solve" 32 | # Base instruction, in line with your dataset. This will be fed to prompt 33 | base_instruction: "Lets think step by step." 34 | # Instruction for specifying answer format 35 | answer_format: "For each question present the reasoning followed by the correct answer." 36 | # Number of samples from dataset, set aside as training data. In every iteration we would be drawing 37 | # `questions_batch_size` examples from training data with replacement. 38 | seen_set_size: 25 39 | # Number of examples to be given for few shots 40 | few_shot_count: 5 41 | # Number of synthetic training examples to be generated 42 | num_train_examples: 20 43 | # Generate synthetic reasoning 44 | generate_reasoning: true 45 | # Generate description of an expert which can solve the task at hand 46 | generate_expert_identity: true 47 | # Generate keywords that describe the intent of the task 48 | generate_intent_keywords: false 49 | ############################ Critique Task Description End ############################ 50 | 51 | 52 | 53 | -------------------------------------------------------------------------------- /demos/gsm8k/configs/setup_config.yaml: -------------------------------------------------------------------------------- 1 | assistant_llm: 2 | # put the unique_model_id that you specified in llm_config.yaml 3 | prompt_opt: gpt-4o 4 | dir_info: 5 | # Base directory for everything 6 | base_dir: logs 7 | log_dir_name: glue_logs 8 | experiment_name: gsm8k 9 | # Many features are different for mode: online/offline. For eg 10 | # 1) Print of logs happens on console for offline mode 11 | # 2) LLM Queue gets instantiated only in online mode 12 | mode: offline 13 | # Full length description of the experiment. This would be logged. 14 | description: 15 | -------------------------------------------------------------------------------- /demos/gsm8k/demo.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "id": "6eb94b72", 6 | "metadata": {}, 7 | "source": [ 8 | "#### Set environment variables in [.env](.env) for LLM API calling" 9 | ] 10 | }, 11 | { 12 | "cell_type": "markdown", 13 | "id": "388020c6", 14 | "metadata": {}, 15 | "source": [ 16 | "### Import Dependencies" 17 | ] 18 | }, 19 | { 20 | "cell_type": "code", 21 | "execution_count": null, 22 | "id": "11efa138", 23 | "metadata": {}, 24 | "outputs": [], 25 | "source": [ 26 | "import sys\n", 27 | "sys.path.insert(0, \"../../\")\n", 28 | "import promptwizard\n", 29 | "from promptwizard.glue.promptopt.instantiate import GluePromptOpt\n", 30 | "from promptwizard.glue.promptopt.techniques.common_logic import DatasetSpecificProcessing\n", 31 | "from promptwizard.glue.common.utils.file import save_jsonlist\n", 32 | "from typing import Any\n", 33 | "from tqdm import tqdm\n", 34 | "from re import compile, findall\n", 35 | "import os\n", 36 | "from datasets import load_dataset\n", 37 | "\n", 38 | "from dotenv import load_dotenv\n", 39 | "load_dotenv(override = True)" 40 | ] 41 | }, 42 | { 43 | "cell_type": "markdown", 44 | "id": "beb14821", 45 | "metadata": {}, 46 | "source": [ 47 | "### Create a dataset specific class and define the required functions " 48 | ] 49 | }, 50 | { 51 | "cell_type": "code", 52 | "execution_count": 2, 53 | "id": "5f325d33", 54 | "metadata": {}, 55 | "outputs": [], 56 | "source": [ 57 | "class GSM8k(DatasetSpecificProcessing):\n", 58 | "\n", 59 | " def dataset_to_jsonl(self, dataset_jsonl: str, **kwargs: Any) -> None:\n", 60 | " def extract_answer_from_output(completion):\n", 61 | " # Your functions for metrics and prompt building\n", 62 | " ans_re = compile(r\"#### (\\-?[0-9\\.\\,]+)\")\n", 63 | " self.INVALID_ANS = \"[invalid]\"\n", 64 | "\n", 65 | " match = ans_re.search(completion)\n", 66 | " if match:\n", 67 | " match_str = match.group(1).strip()\n", 68 | " match_str = match_str.replace(\",\", \"\")\n", 69 | " return match_str\n", 70 | " else:\n", 71 | " return self.INVALID_ANS\n", 72 | "\n", 73 | " examples_set = []\n", 74 | "\n", 75 | " for _, sample in tqdm(enumerate(kwargs[\"dataset\"]), desc=\"Evaluating samples\"):\n", 76 | " example = {\n", 77 | " DatasetSpecificProcessing.QUESTION_LITERAL: sample['question'],\n", 78 | " DatasetSpecificProcessing.ANSWER_WITH_REASON_LITERAL: sample['answer'],\n", 79 | " DatasetSpecificProcessing.FINAL_ANSWER_LITERAL: extract_answer_from_output(sample[\"answer\"])\n", 80 | " }\n", 81 | " examples_set.append(example)\n", 82 | "\n", 83 | " save_jsonlist(dataset_jsonl, examples_set, \"w\")\n", 84 | "\n", 85 | " def extract_final_answer(self, answer: str):\n", 86 | " \n", 87 | " if not answer:\n", 88 | " return self.INVALID_ANS\n", 89 | "\n", 90 | " model_pred = answer.lower()\n", 91 | " preds = model_pred.split(self.ANSWER_START.lower())\n", 92 | " answer_flag = True if len(preds) > 1 else False\n", 93 | "\n", 94 | " pred = preds[-1].replace(\",\", \"\")\n", 95 | " pred = [s for s in findall(r'-?\\d+\\.?\\d*', pred)]\n", 96 | "\n", 97 | " if len(pred) == 0:\n", 98 | " return self.INVALID_ANS\n", 99 | "\n", 100 | " if answer_flag:\n", 101 | " # choose the first element in list\n", 102 | " pred = pred[0]\n", 103 | " else:\n", 104 | " # choose the last element in list\n", 105 | " pred = pred[-1]\n", 106 | "\n", 107 | " # (For arithmetic tasks) if a word ends with period, it will be omitted ...\n", 108 | " if pred[-1] == \".\":\n", 109 | " pred = pred[:-1]\n", 110 | " return pred" 111 | ] 112 | }, 113 | { 114 | "cell_type": "code", 115 | "execution_count": 3, 116 | "id": "f384eb57", 117 | "metadata": {}, 118 | "outputs": [], 119 | "source": [ 120 | "gsm8k_processor = GSM8k()" 121 | ] 122 | }, 123 | { 124 | "cell_type": "markdown", 125 | "id": "11d2de75", 126 | "metadata": {}, 127 | "source": [ 128 | "### Load and save the dataset " 129 | ] 130 | }, 131 | { 132 | "cell_type": "code", 133 | "execution_count": null, 134 | "id": "976681bd-4f43-4dbc-947e-cdb94d4824f0", 135 | "metadata": {}, 136 | "outputs": [], 137 | "source": [ 138 | "if not os.path.exists(\"data\"):\n", 139 | " os.mkdir(\"data\")\n", 140 | " \n", 141 | "dataset = load_dataset(\"openai/gsm8k\", \"main\")\n", 142 | "num_samples = 0\n", 143 | "for dataset_type in ['train','test']:\n", 144 | " data_list = []\n", 145 | " for data in dataset[dataset_type]:\n", 146 | " data_list.append({\"question\": data['question'], \"answer\": data['answer']})\n", 147 | " if num_samples == 100 and dataset_type == 'train': # We sample only 100 train examples and use 25 out them for training randomly\n", 148 | " break\n", 149 | " num_samples += 1\n", 150 | " gsm8k_processor.dataset_to_jsonl(\"data/\"+ dataset_type+'.jsonl', dataset=data_list)" 151 | ] 152 | }, 153 | { 154 | "cell_type": "markdown", 155 | "id": "ac30e74f", 156 | "metadata": {}, 157 | "source": [ 158 | "### Set paths" 159 | ] 160 | }, 161 | { 162 | "cell_type": "code", 163 | "execution_count": 5, 164 | "id": "f43482f1-3e10-4cf7-8ea6-ff42c04067a6", 165 | "metadata": {}, 166 | "outputs": [], 167 | "source": [ 168 | "train_file_name = os.path.join(\"data\", \"train.jsonl\")\n", 169 | "test_file_name = os.path.join(\"data\", \"test.jsonl\")\n", 170 | "path_to_config = \"configs\"\n", 171 | "promptopt_config_path = os.path.join(path_to_config, \"promptopt_config.yaml\")\n", 172 | "setup_config_path = os.path.join(path_to_config, \"setup_config.yaml\")" 173 | ] 174 | }, 175 | { 176 | "cell_type": "markdown", 177 | "id": "3392594d", 178 | "metadata": {}, 179 | "source": [ 180 | "### Create an object for calling prompt optimization and inference functionalities" 181 | ] 182 | }, 183 | { 184 | "cell_type": "code", 185 | "execution_count": null, 186 | "id": "8af4246f-db32-4b37-a73a-f9e2e5125d09", 187 | "metadata": {}, 188 | "outputs": [], 189 | "source": [ 190 | "gp = GluePromptOpt(promptopt_config_path,\n", 191 | " setup_config_path,\n", 192 | " train_file_name,\n", 193 | " gsm8k_processor)" 194 | ] 195 | }, 196 | { 197 | "cell_type": "markdown", 198 | "id": "1784648c", 199 | "metadata": {}, 200 | "source": [ 201 | "### Call prompt optmization function\n", 202 | "1. ```use_examples``` can be used when there are training samples and a mixture of real and synthetic in-context examples are required in the final prompt. When set to ```False``` all the in-context examples will be real\n", 203 | "2. ```generate_synthetic_examples``` can be used when there are no training samples and we want to generate synthetic examples \n", 204 | "3. ```run_without_train_examples``` can be used when there are no training samples and in-context examples are not required in the final prompt " 205 | ] 206 | }, 207 | { 208 | "cell_type": "code", 209 | "execution_count": null, 210 | "id": "573c6151-2c03-45d9-9904-1724a1e20f1b", 211 | "metadata": { 212 | "scrolled": true 213 | }, 214 | "outputs": [], 215 | "source": [ 216 | "# Function call to generate optimal prompt and expert profile \n", 217 | "best_prompt, expert_profile = gp.get_best_prompt(use_examples=True,run_without_train_examples=False,generate_synthetic_examples=False)" 218 | ] 219 | }, 220 | { 221 | "cell_type": "markdown", 222 | "id": "1ee1aa99", 223 | "metadata": {}, 224 | "source": [ 225 | "### Save the optimized prompt and expert profile" 226 | ] 227 | }, 228 | { 229 | "cell_type": "code", 230 | "execution_count": null, 231 | "id": "34a716af-0d77-4c7d-b1c2-6438d66096ce", 232 | "metadata": { 233 | "scrolled": true 234 | }, 235 | "outputs": [], 236 | "source": [ 237 | "import pickle \n", 238 | "\n", 239 | "if not os.path.exists(\"results\"):\n", 240 | " os.system(\"mkdir results\")\n", 241 | " \n", 242 | "with open(\"results/best_prompt.pkl\", 'wb') as f:\n", 243 | " pickle.dump(best_prompt, f)\n", 244 | "with open(\"results/expert_profile.pkl\", 'wb') as f:\n", 245 | " pickle.dump(expert_profile, f)\n", 246 | "\n", 247 | "print(f\"Best prompt: {best_prompt} \\nExpert profile: {expert_profile}\")" 248 | ] 249 | }, 250 | { 251 | "cell_type": "markdown", 252 | "id": "aac42eed", 253 | "metadata": {}, 254 | "source": [ 255 | "### Evaluate the optimized prompt" 256 | ] 257 | }, 258 | { 259 | "cell_type": "code", 260 | "execution_count": null, 261 | "id": "c49b5711-82dd-4d18-8cd4-ee447cf8d74c", 262 | "metadata": { 263 | "scrolled": true 264 | }, 265 | "outputs": [], 266 | "source": [ 267 | "gp.EXPERT_PROFILE = expert_profile\n", 268 | "gp.BEST_PROMPT = best_prompt\n", 269 | "\n", 270 | "# Function call to evaluate the prompt\n", 271 | "accuracy = gp.evaluate(test_file_name)\n", 272 | "\n", 273 | "print(f\"Final Accuracy: {accuracy}\")" 274 | ] 275 | } 276 | ], 277 | "metadata": { 278 | "kernelspec": { 279 | "display_name": "general", 280 | "language": "python", 281 | "name": "python3" 282 | }, 283 | "language_info": { 284 | "codemirror_mode": { 285 | "name": "ipython", 286 | "version": 3 287 | }, 288 | "file_extension": ".py", 289 | "mimetype": "text/x-python", 290 | "name": "python", 291 | "nbconvert_exporter": "python", 292 | "pygments_lexer": "ipython3", 293 | "version": "3.12.4" 294 | } 295 | }, 296 | "nbformat": 4, 297 | "nbformat_minor": 5 298 | } 299 | -------------------------------------------------------------------------------- /demos/scenarios/.env: -------------------------------------------------------------------------------- 1 | USE_OPENAI_API_KEY="False" 2 | 3 | OPENAI_API_KEY="" 4 | OPENAI_MODEL_NAME ="" 5 | 6 | OPENAI_API_VERSION="" 7 | AZURE_OPENAI_ENDPOINT="" 8 | AZURE_OPENAI_DEPLOYMENT_NAME="" -------------------------------------------------------------------------------- /demos/scenarios/configs/prompt_library.yaml: -------------------------------------------------------------------------------- 1 | system_prompts: | 2 | You are a helpful assistant that assists research students in understanding research papers. 3 | system_guidelines: | 4 | Guidelines 5 | - Your role must always be a helpful assistant that assists students in understanding research papers. 6 | - Only answer questions that are directly or indirectly related to the referenced paper(s). 7 | 8 | mode: 9 | chat: 10 | - name: CHAT-FIRST-MESSAGE 11 | llm_request_type: rag-query 12 | prompt_template: | 13 | {user_msg} 14 | emb_model_id: text embedding ada 002 [vellm-openai2] 15 | llm_model_id: gpt 35 Turbo [vellm-openai2] 16 | prepend_system_prompts: False 17 | prepend_system_guidelines: False 18 | 19 | - name: CHAT-NEXT-MESSAGES 20 | llm_request_type: rag-query 21 | prompt_template: | 22 | {user_msg} 23 | emb_model_id: text embedding ada 002 [vellm-openai2] 24 | llm_model_id: gpt 35 Turbo [vellm-openai2] 25 | prepend_system_prompts: False 26 | prepend_system_guidelines: False 27 | 28 | generation: 29 | - name: FLASH_PROFILE 30 | prompt_template: | 31 | {user_msg} 32 | prepend_system_prompts: False 33 | prepend_system_guidelines: False 34 | llm_request_type: rag-query 35 | emb_model_id: text embedding ada 002 [vellm-openai2] 36 | llm_model_id: gpt 35 Turbo [vellm-openai2] -------------------------------------------------------------------------------- /demos/scenarios/configs/promptopt_config.yaml: -------------------------------------------------------------------------------- 1 | # Specify one or more prompt refinement technique to be used. If you specify more than one prompt refinement techniques, 2 | # all these technique would run on same seed data. Result, iterations needed & cost incurred for each of these 3 | # technique would be logged. And winning technique for each data instance and overall would be logged. 4 | 5 | # Supported prompt refinement techniques: Basic, RecursiveEval, MedPrompt 6 | # Uncomment techniques that you want to use 7 | ############################ Critique Task Description Start ############################ 8 | prompt_technique_name: "critique_n_refine" 9 | # unique_model_id of model defined in llm_config.yaml 10 | unique_model_id: gpt-4o 11 | # Number of iterations for conducting rounds of mutation of task description 12 | # followed by refinement of instructions 13 | mutate_refine_iterations: 3 14 | # Number of rounds of mutation to be performed when generating different styles 15 | mutation_rounds: 3 16 | # Refine instruction post mutation 17 | refine_instruction: true 18 | # Number of iterations for refining task description and in context examples for few-shot 19 | refine_task_eg_iterations: 3 20 | # Number of variations of prompts to generate in given iteration 21 | style_variation: 5 22 | # Number of questions to be asked to LLM in a single batch, during training step 23 | questions_batch_size: 1 24 | # Number of batches of questions to correctly answered, for a prompt to be considered as performing good 25 | min_correct_count: 3 26 | # Max number of mini-batches on which we should evaluate our prompt 27 | max_eval_batches: 6 28 | # Number of top best performing prompts to be considered for next iterations 29 | top_n: 1 30 | # Description of task. This will be fed to prompt 31 | task_description: "You are a mathematics expert. You will be given a mathematics problem which you need to solve" 32 | # Base instruction, in line with your dataset. This will be fed to prompt 33 | base_instruction: "Lets think step by step." 34 | # Instruction for specifying answer format 35 | answer_format: "For each question present the reasoning followed by the correct answer." 36 | # Number of samples from dataset, set aside as training data. In every iteration we would be drawing 37 | # `questions_batch_size` examples from training data with replacement. 38 | seen_set_size: 25 39 | # Number of examples to be given for few shots 40 | few_shot_count: 5 41 | # Number of synthetic training examples to be generated 42 | num_train_examples: 20 43 | # Generate synthetic reasoning 44 | generate_reasoning: true 45 | # Generate description of an expert which can solve the task at hand 46 | generate_expert_identity: true 47 | # Generate keywords that describe the intent of the task 48 | generate_intent_keywords: false 49 | ############################ Critique Task Description End ############################ 50 | 51 | 52 | 53 | 54 | -------------------------------------------------------------------------------- /demos/scenarios/configs/setup_config.yaml: -------------------------------------------------------------------------------- 1 | assistant_llm: 2 | # put the unique_model_id that you specified in llm_config.yaml 3 | prompt_opt: gpt-4o 4 | dir_info: 5 | # Base directory for everything 6 | base_dir: logs 7 | log_dir_name: glue_logs 8 | experiment_name: gsm8k 9 | # Many features are different for mode: online/offline. For eg 10 | # 1) Print of logs happens on console for offline mode 11 | # 2) LLM Queue gets instantiated only in online mode 12 | mode: offline 13 | # Full length description of the experiment. This would be logged. 14 | description: 15 | -------------------------------------------------------------------------------- /demos/svamp/.env: -------------------------------------------------------------------------------- 1 | USE_OPENAI_API_KEY="False" 2 | 3 | OPENAI_API_KEY="" 4 | OPENAI_MODEL_NAME ="" 5 | 6 | OPENAI_API_VERSION="" 7 | AZURE_OPENAI_ENDPOINT="" 8 | AZURE_OPENAI_DEPLOYMENT_NAME="" -------------------------------------------------------------------------------- /demos/svamp/configs/prompt_library.yaml: -------------------------------------------------------------------------------- 1 | system_prompts: | 2 | You are a helpful assistant that assists research students in understanding research papers. 3 | system_guidelines: | 4 | Guidelines 5 | - Your role must always be a helpful assistant that assists students in understanding research papers. 6 | - Only answer questions that are directly or indirectly related to the referenced paper(s). 7 | 8 | mode: 9 | chat: 10 | - name: CHAT-FIRST-MESSAGE 11 | llm_request_type: rag-query 12 | prompt_template: | 13 | {user_msg} 14 | emb_model_id: text embedding ada 002 [vellm-openai2] 15 | llm_model_id: gpt 35 Turbo [vellm-openai2] 16 | prepend_system_prompts: False 17 | prepend_system_guidelines: False 18 | 19 | - name: CHAT-NEXT-MESSAGES 20 | llm_request_type: rag-query 21 | prompt_template: | 22 | {user_msg} 23 | emb_model_id: text embedding ada 002 [vellm-openai2] 24 | llm_model_id: gpt 35 Turbo [vellm-openai2] 25 | prepend_system_prompts: False 26 | prepend_system_guidelines: False 27 | 28 | generation: 29 | - name: FLASH_PROFILE 30 | prompt_template: | 31 | {user_msg} 32 | prepend_system_prompts: False 33 | prepend_system_guidelines: False 34 | llm_request_type: rag-query 35 | emb_model_id: text embedding ada 002 [vellm-openai2] 36 | llm_model_id: gpt 35 Turbo [vellm-openai2] -------------------------------------------------------------------------------- /demos/svamp/configs/promptopt_config.yaml: -------------------------------------------------------------------------------- 1 | # Specify one or more prompt refinement technique to be used. If you specify more than one prompt refinement techniques, 2 | # all these technique would run on same seed data. Result, iterations needed & cost incurred for each of these 3 | # technique would be logged. And winning technique for each data instance and overall would be logged. 4 | 5 | # Supported prompt refinement techniques: Basic, RecursiveEval, MedPrompt 6 | # Uncomment techniques that you want to use 7 | ############################ Critique Task Description Start ############################ 8 | prompt_technique_name: "critique_n_refine" 9 | # unique_model_id of model defined in llm_config.yaml 10 | unique_model_id: gpt-4o 11 | # Number of iterations for conducting rounds of mutation of task description 12 | # followed by refinement of instructions 13 | mutate_refine_iterations: 3 14 | # Number of rounds of mutation to be performed when generating different styles 15 | mutation_rounds: 3 16 | # Refine instruction post mutation 17 | refine_instruction: true 18 | # Number of iterations for refining task description and in context examples for few-shot 19 | refine_task_eg_iterations: 3 20 | # Number of variations of prompts to generate in given iteration 21 | style_variation: 5 22 | # Number of questions to be asked to LLM in a single batch, during training step 23 | questions_batch_size: 1 24 | # Number of batches of questions to correctly answered, for a prompt to be considered as performing good 25 | min_correct_count: 3 26 | # Max number of mini-batches on which we should evaluate our prompt 27 | max_eval_batches: 6 28 | # Number of top best performing prompts to be considered for next iterations 29 | top_n: 1 30 | # Description of task. This will be fed to prompt 31 | task_description: "You are a mathematics expert. You will be given a mathematics problem which you need to solve" 32 | # Base instruction, in line with your dataset. This will be fed to prompt 33 | base_instruction: "Lets think step by step." 34 | # Instruction for specifying answer format 35 | answer_format: "At the end, wrap your final answer and option if applicable between and tags" 36 | # Number of samples from dataset, set aside as training data. In every iteration we would be drawing 37 | # `questions_batch_size` examples from training data with replacement. 38 | seen_set_size: 25 39 | # Number of examples to be given for few shots 40 | few_shot_count: 5 41 | # Number of synthetic training examples to be generated 42 | num_train_examples: 20 43 | # Generate synthetic reasoning 44 | generate_reasoning: true 45 | # Generate description of an expert which can solve the task at hand 46 | generate_expert_identity: true 47 | # Generate keywords that describe the intent of the task 48 | generate_intent_keywords: false 49 | ############################ Critique Task Description End ############################ 50 | 51 | 52 | 53 | -------------------------------------------------------------------------------- /demos/svamp/configs/setup_config.yaml: -------------------------------------------------------------------------------- 1 | assistant_llm: 2 | # put the unique_model_id that you specified in llm_config.yaml 3 | prompt_opt: gpt-4o 4 | dir_info: 5 | # Base directory for everything 6 | base_dir: logs 7 | log_dir_name: glue_logs 8 | experiment_name: svamp 9 | # Many features are different for mode: online/offline. For eg 10 | # 1) Print of logs happens on console for offline mode 11 | # 2) LLM Queue gets instantiated only in online mode 12 | mode: offline 13 | # Full length description of the experiment. This would be logged. 14 | description: 15 | -------------------------------------------------------------------------------- /demos/svamp/demo.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "id": "14360485", 6 | "metadata": {}, 7 | "source": [ 8 | "#### Set environment variables in [.env](.env) for LLM API calling" 9 | ] 10 | }, 11 | { 12 | "cell_type": "markdown", 13 | "id": "6bd95c11", 14 | "metadata": {}, 15 | "source": [ 16 | "### Import Dependencies" 17 | ] 18 | }, 19 | { 20 | "cell_type": "code", 21 | "execution_count": null, 22 | "id": "f1fb3d81-16b6-4b8c-a028-880fdce5e14a", 23 | "metadata": {}, 24 | "outputs": [], 25 | "source": [ 26 | "import sys\n", 27 | "sys.path.insert(0, \"../../\")\n", 28 | "import os\n", 29 | "import promptwizard\n", 30 | "from promptwizard.glue.promptopt.instantiate import GluePromptOpt\n", 31 | "from promptwizard.glue.promptopt.techniques.common_logic import DatasetSpecificProcessing\n", 32 | "from promptwizard.glue.common.utils.file import save_jsonlist\n", 33 | "from typing import Any\n", 34 | "from tqdm import tqdm\n", 35 | "import json\n", 36 | "from datasets import load_dataset\n", 37 | "\n", 38 | "from dotenv import load_dotenv\n", 39 | "load_dotenv(override = True)\n" 40 | ] 41 | }, 42 | { 43 | "cell_type": "markdown", 44 | "id": "f061d2fd", 45 | "metadata": {}, 46 | "source": [ 47 | "### Create a dataset specific class and define the required functions " 48 | ] 49 | }, 50 | { 51 | "cell_type": "code", 52 | "execution_count": null, 53 | "id": "5f325d33", 54 | "metadata": {}, 55 | "outputs": [], 56 | "source": [ 57 | "\n", 58 | "def extract_between(start, end, text):\n", 59 | " \"\"\"\n", 60 | " Extracts the substring from 'text' that is between 'start' and 'end' strings.\n", 61 | " \n", 62 | " Parameters:\n", 63 | " - start (str): The starting delimiter string.\n", 64 | " - end (str): The ending delimiter string.\n", 65 | " - text (str): The text to search within.\n", 66 | " \n", 67 | " Returns:\n", 68 | " - str: The extracted substring between the start and end delimiters.\n", 69 | " \"\"\"\n", 70 | " start_index = text.find(start)\n", 71 | " if start_index == -1:\n", 72 | " return '' \n", 73 | " \n", 74 | " start_index += len(start)\n", 75 | " \n", 76 | " end_index = text.find(end, start_index)\n", 77 | " if end_index == -1:\n", 78 | " return '' \n", 79 | " return text[start_index:end_index]\n", 80 | "\n", 81 | "class SVAMP(DatasetSpecificProcessing):\n", 82 | "\n", 83 | " def dataset_to_jsonl(self, dataset_jsonl: str, **kwargs: Any) -> None:\n", 84 | " def extract_answer_from_output(completion):\n", 85 | "\n", 86 | " return completion\n", 87 | "\n", 88 | " examples_set = []\n", 89 | "\n", 90 | " for _, sample in tqdm(enumerate(kwargs[\"dataset\"]), desc=\"Evaluating samples\"):\n", 91 | " example = {\n", 92 | " DatasetSpecificProcessing.QUESTION_LITERAL: sample['question'],\n", 93 | " DatasetSpecificProcessing.ANSWER_WITH_REASON_LITERAL: sample['answer'],\n", 94 | " DatasetSpecificProcessing.FINAL_ANSWER_LITERAL: extract_answer_from_output(sample[\"answer\"])\n", 95 | " }\n", 96 | " examples_set.append(example)\n", 97 | "\n", 98 | " save_jsonlist(dataset_jsonl, examples_set, \"w\")\n", 99 | "\n", 100 | " def extract_final_answer(self, answer: str):\n", 101 | " \n", 102 | " final_answer = extract_between(text=answer,start=\"\",end=\"\")\n", 103 | " return final_answer\n", 104 | " \n", 105 | " def access_answer(self, llm_output: str, gt_answer: str):\n", 106 | "\n", 107 | " predicted_answer = self.extract_final_answer(llm_output)\n", 108 | " is_correct = False\n", 109 | " if predicted_answer and (predicted_answer.lower() == gt_answer.lower()):\n", 110 | " is_correct = True\n", 111 | "\n", 112 | " return is_correct, predicted_answer" 113 | ] 114 | }, 115 | { 116 | "cell_type": "code", 117 | "execution_count": null, 118 | "id": "f384eb57", 119 | "metadata": {}, 120 | "outputs": [], 121 | "source": [ 122 | "svamp_processor = SVAMP()" 123 | ] 124 | }, 125 | { 126 | "cell_type": "code", 127 | "execution_count": null, 128 | "id": "976681bd-4f43-4dbc-947e-cdb94d4824f0", 129 | "metadata": {}, 130 | "outputs": [], 131 | "source": [ 132 | "\n", 133 | "if not os.path.exists(\"data\"):\n", 134 | " os.mkdir(\"data\")\n", 135 | "\n", 136 | "dataset = load_dataset(\"ChilleD/SVAMP\")\n", 137 | "\n", 138 | "for dataset_type in ['train','test']:\n", 139 | " data_list = []\n", 140 | " num_samples = 0\n", 141 | " for data in dataset[dataset_type]:\n", 142 | " data_list.append({\"question\": data['question_concat'], \"answer\": data['Answer']})\n", 143 | " if dataset_type == 'train' and num_samples == 100: # We sample only 100 train examples and use 25 out them for training randomly\n", 144 | " break\n", 145 | " num_samples += 1\n", 146 | " svamp_processor.dataset_to_jsonl(\"data/\"+ dataset_type+'.jsonl', dataset=data_list)" 147 | ] 148 | }, 149 | { 150 | "cell_type": "markdown", 151 | "id": "4852b94b", 152 | "metadata": {}, 153 | "source": [ 154 | "### Set paths" 155 | ] 156 | }, 157 | { 158 | "cell_type": "code", 159 | "execution_count": 47, 160 | "id": "f43482f1-3e10-4cf7-8ea6-ff42c04067a6", 161 | "metadata": {}, 162 | "outputs": [], 163 | "source": [ 164 | "train_file_name = os.path.join(\"data\", \"train.jsonl\")\n", 165 | "test_file_name = os.path.join(\"data\", \"test.jsonl\")\n", 166 | "path_to_config = \"configs\"\n", 167 | "llm_config_path = os.path.join(path_to_config, \"llm_config.yaml\")\n", 168 | "promptopt_config_path = os.path.join(path_to_config, \"promptopt_config.yaml\")\n", 169 | "setup_config_path = os.path.join(path_to_config, \"setup_config.yaml\")" 170 | ] 171 | }, 172 | { 173 | "cell_type": "markdown", 174 | "id": "f7ba6394", 175 | "metadata": {}, 176 | "source": [ 177 | "### Create an object for calling prompt optimization and inference functionalities" 178 | ] 179 | }, 180 | { 181 | "cell_type": "code", 182 | "execution_count": null, 183 | "id": "8af4246f-db32-4b37-a73a-f9e2e5125d09", 184 | "metadata": {}, 185 | "outputs": [], 186 | "source": [ 187 | "gp = GluePromptOpt(promptopt_config_path,\n", 188 | " setup_config_path,\n", 189 | " train_file_name,\n", 190 | " svamp_processor)" 191 | ] 192 | }, 193 | { 194 | "cell_type": "markdown", 195 | "id": "6e38ea08", 196 | "metadata": {}, 197 | "source": [ 198 | "### Call prompt optmization function\n", 199 | "1. ```use_examples``` can be used when there are training samples and a mixture of real and synthetic in-context examples are required in the final prompt. When set to ```False``` all the in-context examples will be real\n", 200 | "2. ```generate_synthetic_examples``` can be used when there are no training samples and we want to generate synthetic examples \n", 201 | "3. ```run_without_train_examples``` can be used when there are no training samples and in-context examples are not required in the final prompt " 202 | ] 203 | }, 204 | { 205 | "cell_type": "code", 206 | "execution_count": null, 207 | "id": "573c6151-2c03-45d9-9904-1724a1e20f1b", 208 | "metadata": { 209 | "scrolled": true 210 | }, 211 | "outputs": [], 212 | "source": [ 213 | "# Function call to generate optimal prompt and expert profile \n", 214 | "best_prompt, expert_profile = gp.get_best_prompt(use_examples=True,run_without_train_examples=False,generate_synthetic_examples=False)" 215 | ] 216 | }, 217 | { 218 | "cell_type": "markdown", 219 | "id": "bae1a791", 220 | "metadata": {}, 221 | "source": [ 222 | "### Save the optimized prompt and expert profile" 223 | ] 224 | }, 225 | { 226 | "cell_type": "code", 227 | "execution_count": null, 228 | "id": "34a716af-0d77-4c7d-b1c2-6438d66096ce", 229 | "metadata": { 230 | "scrolled": true 231 | }, 232 | "outputs": [], 233 | "source": [ 234 | "import pickle \n", 235 | "\n", 236 | "if not os.path.exists(\"results\"):\n", 237 | " os.system(\"mkdir results\")\n", 238 | "\n", 239 | "with open(\"results/best_prompt.pkl\", 'wb') as f:\n", 240 | " pickle.dump(best_prompt, f)\n", 241 | "with open(\"results/expert_profile.pkl\", 'wb') as f:\n", 242 | " pickle.dump(expert_profile, f)\n", 243 | "\n", 244 | "print(f\"Best prompt: {best_prompt} \\nExpert profile: {expert_profile}\")" 245 | ] 246 | }, 247 | { 248 | "cell_type": "markdown", 249 | "id": "b7691a87", 250 | "metadata": {}, 251 | "source": [ 252 | "### Evaluate the optimized prompt" 253 | ] 254 | }, 255 | { 256 | "cell_type": "code", 257 | "execution_count": null, 258 | "id": "c49b5711-82dd-4d18-8cd4-ee447cf8d74c", 259 | "metadata": { 260 | "scrolled": true 261 | }, 262 | "outputs": [], 263 | "source": [ 264 | "gp.EXPERT_PROFILE = expert_profile\n", 265 | "gp.BEST_PROMPT = best_prompt\n", 266 | "\n", 267 | "# Function call to evaluate the prompt\n", 268 | "accuracy = gp.evaluate(test_file_name)\n", 269 | "\n", 270 | "print(f\"Final Accuracy: {accuracy}\")" 271 | ] 272 | } 273 | ], 274 | "metadata": { 275 | "kernelspec": { 276 | "display_name": "Python 3 (ipykernel)", 277 | "language": "python", 278 | "name": "python3" 279 | }, 280 | "language_info": { 281 | "codemirror_mode": { 282 | "name": "ipython", 283 | "version": 3 284 | }, 285 | "file_extension": ".py", 286 | "mimetype": "text/x-python", 287 | "name": "python", 288 | "nbconvert_exporter": "python", 289 | "pygments_lexer": "ipython3", 290 | "version": "3.10.12" 291 | } 292 | }, 293 | "nbformat": 4, 294 | "nbformat_minor": 5 295 | } 296 | -------------------------------------------------------------------------------- /docs/images/arithmetic_task.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/PromptWizard/a018fc1bc83b5f2a97e97fd8cb4d9d12378a60e9/docs/images/arithmetic_task.png -------------------------------------------------------------------------------- /docs/images/bigbench.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/PromptWizard/a018fc1bc83b5f2a97e97fd8cb4d9d12378a60e9/docs/images/bigbench.png -------------------------------------------------------------------------------- /docs/images/comaprision.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/PromptWizard/a018fc1bc83b5f2a97e97fd8cb4d9d12378a60e9/docs/images/comaprision.png -------------------------------------------------------------------------------- /docs/images/cost_analysis.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/PromptWizard/a018fc1bc83b5f2a97e97fd8cb4d9d12378a60e9/docs/images/cost_analysis.png -------------------------------------------------------------------------------- /docs/images/curve.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/PromptWizard/a018fc1bc83b5f2a97e97fd8cb4d9d12378a60e9/docs/images/curve.png -------------------------------------------------------------------------------- /docs/images/github.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/PromptWizard/a018fc1bc83b5f2a97e97fd8cb4d9d12378a60e9/docs/images/github.png -------------------------------------------------------------------------------- /docs/images/icl_results.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/PromptWizard/a018fc1bc83b5f2a97e97fd8cb4d9d12378a60e9/docs/images/icl_results.png -------------------------------------------------------------------------------- /docs/images/iterative_flowchart-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/PromptWizard/a018fc1bc83b5f2a97e97fd8cb4d9d12378a60e9/docs/images/iterative_flowchart-1.png -------------------------------------------------------------------------------- /docs/images/msr_blog.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/PromptWizard/a018fc1bc83b5f2a97e97fd8cb4d9d12378a60e9/docs/images/msr_blog.png -------------------------------------------------------------------------------- /docs/images/overview.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/PromptWizard/a018fc1bc83b5f2a97e97fd8cb4d9d12378a60e9/docs/images/overview.png -------------------------------------------------------------------------------- /docs/images/ppc.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/PromptWizard/a018fc1bc83b5f2a97e97fd8cb4d9d12378a60e9/docs/images/ppc.png -------------------------------------------------------------------------------- /docs/images/ppc_1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/PromptWizard/a018fc1bc83b5f2a97e97fd8cb4d9d12378a60e9/docs/images/ppc_1.png -------------------------------------------------------------------------------- /docs/images/prompting.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/PromptWizard/a018fc1bc83b5f2a97e97fd8cb4d9d12378a60e9/docs/images/prompting.png -------------------------------------------------------------------------------- /docs/images/sequential_flowchart-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/PromptWizard/a018fc1bc83b5f2a97e97fd8cb4d9d12378a60e9/docs/images/sequential_flowchart-1.png -------------------------------------------------------------------------------- /docs/images/slm_prompt.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/PromptWizard/a018fc1bc83b5f2a97e97fd8cb4d9d12378a60e9/docs/images/slm_prompt.png -------------------------------------------------------------------------------- /docs/static/css/bulma-carousel.min.css: -------------------------------------------------------------------------------- 1 | @-webkit-keyframes spinAround{from{-webkit-transform:rotate(0);transform:rotate(0)}to{-webkit-transform:rotate(359deg);transform:rotate(359deg)}}@keyframes spinAround{from{-webkit-transform:rotate(0);transform:rotate(0)}to{-webkit-transform:rotate(359deg);transform:rotate(359deg)}}.slider{position:relative;width:100%}.slider-container{display:flex;flex-wrap:nowrap;flex-direction:row;overflow:hidden;-webkit-transform:translate3d(0,0,0);transform:translate3d(0,0,0);min-height:100%}.slider-container.is-vertical{flex-direction:column}.slider-container .slider-item{flex:none}.slider-container .slider-item .image.is-covered img{-o-object-fit:cover;object-fit:cover;-o-object-position:center center;object-position:center center;height:100%;width:100%}.slider-container .slider-item .video-container{height:0;padding-bottom:0;padding-top:56.25%;margin:0;position:relative}.slider-container .slider-item .video-container.is-1by1,.slider-container .slider-item .video-container.is-square{padding-top:100%}.slider-container .slider-item .video-container.is-4by3{padding-top:75%}.slider-container .slider-item .video-container.is-21by9{padding-top:42.857143%}.slider-container .slider-item .video-container embed,.slider-container .slider-item .video-container iframe,.slider-container .slider-item .video-container object{position:absolute;top:0;left:0;width:100%!important;height:100%!important}.slider-navigation-next,.slider-navigation-previous{display:flex;justify-content:center;align-items:center;position:absolute;width:42px;height:42px;background:#fff center center no-repeat;background-size:20px 20px;border:1px solid #fff;border-radius:25091983px;box-shadow:0 2px 5px #3232321a;top:50%;margin-top:-20px;left:0;cursor:pointer;transition:opacity .3s,-webkit-transform .3s;transition:transform .3s,opacity .3s;transition:transform .3s,opacity .3s,-webkit-transform .3s}.slider-navigation-next:hover,.slider-navigation-previous:hover{-webkit-transform:scale(1.2);transform:scale(1.2)}.slider-navigation-next.is-hidden,.slider-navigation-previous.is-hidden{display:none;opacity:0}.slider-navigation-next svg,.slider-navigation-previous svg{width:25%}.slider-navigation-next{left:auto;right:0;background:#fff center center no-repeat;background-size:20px 20px}.slider-pagination{display:none;justify-content:center;align-items:center;position:absolute;bottom:0;left:0;right:0;padding:.5rem 1rem;text-align:center}.slider-pagination .slider-page{background:#fff;width:10px;height:10px;border-radius:25091983px;display:inline-block;margin:0 3px;box-shadow:0 2px 5px #3232321a;transition:-webkit-transform .3s;transition:transform .3s;transition:transform .3s,-webkit-transform .3s;cursor:pointer}.slider-pagination .slider-page.is-active,.slider-pagination .slider-page:hover{-webkit-transform:scale(1.4);transform:scale(1.4)}@media screen and (min-width:800px){.slider-pagination{display:flex}}.hero.has-carousel{position:relative}.hero.has-carousel+.hero-body,.hero.has-carousel+.hero-footer,.hero.has-carousel+.hero-head{z-index:10;overflow:hidden}.hero.has-carousel .hero-carousel{position:absolute;top:0;left:0;bottom:0;right:0;height:auto;border:none;margin:auto;padding:0;z-index:0}.hero.has-carousel .hero-carousel .slider{width:100%;max-width:100%;overflow:hidden;height:100%!important;max-height:100%;z-index:0}.hero.has-carousel .hero-carousel .slider .has-background{max-height:100%}.hero.has-carousel .hero-carousel .slider .has-background .is-background{-o-object-fit:cover;object-fit:cover;-o-object-position:center center;object-position:center center;height:100%;width:100%}.hero.has-carousel .hero-body{margin:0 3rem;z-index:10} -------------------------------------------------------------------------------- /docs/static/css/bulma-slider.min.css: -------------------------------------------------------------------------------- 1 | @-webkit-keyframes spinAround{from{-webkit-transform:rotate(0);transform:rotate(0)}to{-webkit-transform:rotate(359deg);transform:rotate(359deg)}}@keyframes spinAround{from{-webkit-transform:rotate(0);transform:rotate(0)}to{-webkit-transform:rotate(359deg);transform:rotate(359deg)}}input[type=range].slider{-webkit-appearance:none;-moz-appearance:none;appearance:none;margin:1rem 0;background:0 0;touch-action:none}input[type=range].slider.is-fullwidth{display:block;width:100%}input[type=range].slider:focus{outline:0}input[type=range].slider:not([orient=vertical])::-webkit-slider-runnable-track{width:100%}input[type=range].slider:not([orient=vertical])::-moz-range-track{width:100%}input[type=range].slider:not([orient=vertical])::-ms-track{width:100%}input[type=range].slider:not([orient=vertical]).has-output+output,input[type=range].slider:not([orient=vertical]).has-output-tooltip+output{width:3rem;background:#4a4a4a;border-radius:4px;padding:.4rem .8rem;font-size:.75rem;line-height:.75rem;text-align:center;text-overflow:ellipsis;white-space:nowrap;color:#fff;overflow:hidden;pointer-events:none;z-index:200}input[type=range].slider:not([orient=vertical]).has-output-tooltip:disabled+output,input[type=range].slider:not([orient=vertical]).has-output:disabled+output{opacity:.5}input[type=range].slider:not([orient=vertical]).has-output{display:inline-block;vertical-align:middle;width:calc(100% - (4.2rem))}input[type=range].slider:not([orient=vertical]).has-output+output{display:inline-block;margin-left:.75rem;vertical-align:middle}input[type=range].slider:not([orient=vertical]).has-output-tooltip{display:block}input[type=range].slider:not([orient=vertical]).has-output-tooltip+output{position:absolute;left:0;top:-.1rem}input[type=range].slider[orient=vertical]{-webkit-appearance:slider-vertical;-moz-appearance:slider-vertical;appearance:slider-vertical;-webkit-writing-mode:bt-lr;-ms-writing-mode:bt-lr;writing-mode:bt-lr}input[type=range].slider[orient=vertical]::-webkit-slider-runnable-track{height:100%}input[type=range].slider[orient=vertical]::-moz-range-track{height:100%}input[type=range].slider[orient=vertical]::-ms-track{height:100%}input[type=range].slider::-webkit-slider-runnable-track{cursor:pointer;animate:.2s;box-shadow:0 0 0 #7a7a7a;background:#dbdbdb;border-radius:4px;border:0 solid #7a7a7a}input[type=range].slider::-moz-range-track{cursor:pointer;animate:.2s;box-shadow:0 0 0 #7a7a7a;background:#dbdbdb;border-radius:4px;border:0 solid #7a7a7a}input[type=range].slider::-ms-track{cursor:pointer;animate:.2s;box-shadow:0 0 0 #7a7a7a;background:#dbdbdb;border-radius:4px;border:0 solid #7a7a7a}input[type=range].slider::-ms-fill-lower{background:#dbdbdb;border-radius:4px}input[type=range].slider::-ms-fill-upper{background:#dbdbdb;border-radius:4px}input[type=range].slider::-webkit-slider-thumb{box-shadow:none;border:1px solid #b5b5b5;border-radius:4px;background:#fff;cursor:pointer}input[type=range].slider::-moz-range-thumb{box-shadow:none;border:1px solid #b5b5b5;border-radius:4px;background:#fff;cursor:pointer}input[type=range].slider::-ms-thumb{box-shadow:none;border:1px solid #b5b5b5;border-radius:4px;background:#fff;cursor:pointer}input[type=range].slider::-webkit-slider-thumb{-webkit-appearance:none;appearance:none}input[type=range].slider.is-circle::-webkit-slider-thumb{border-radius:290486px}input[type=range].slider.is-circle::-moz-range-thumb{border-radius:290486px}input[type=range].slider.is-circle::-ms-thumb{border-radius:290486px}input[type=range].slider:active::-webkit-slider-thumb{-webkit-transform:scale(1.25);transform:scale(1.25)}input[type=range].slider:active::-moz-range-thumb{transform:scale(1.25)}input[type=range].slider:active::-ms-thumb{transform:scale(1.25)}input[type=range].slider:disabled{opacity:.5;cursor:not-allowed}input[type=range].slider:disabled::-webkit-slider-thumb{cursor:not-allowed;-webkit-transform:scale(1);transform:scale(1)}input[type=range].slider:disabled::-moz-range-thumb{cursor:not-allowed;transform:scale(1)}input[type=range].slider:disabled::-ms-thumb{cursor:not-allowed;transform:scale(1)}input[type=range].slider:not([orient=vertical]){min-height:calc((1rem + 2px) * 1.25)}input[type=range].slider:not([orient=vertical])::-webkit-slider-runnable-track{height:.5rem}input[type=range].slider:not([orient=vertical])::-moz-range-track{height:.5rem}input[type=range].slider:not([orient=vertical])::-ms-track{height:.5rem}input[type=range].slider[orient=vertical]::-webkit-slider-runnable-track{width:.5rem}input[type=range].slider[orient=vertical]::-moz-range-track{width:.5rem}input[type=range].slider[orient=vertical]::-ms-track{width:.5rem}input[type=range].slider::-webkit-slider-thumb{height:1rem;width:1rem}input[type=range].slider::-moz-range-thumb{height:1rem;width:1rem}input[type=range].slider::-ms-thumb{height:1rem;width:1rem}input[type=range].slider::-ms-thumb{margin-top:0}input[type=range].slider::-webkit-slider-thumb{margin-top:-.25rem}input[type=range].slider[orient=vertical]::-webkit-slider-thumb{margin-top:auto;margin-left:-.25rem}input[type=range].slider.is-small:not([orient=vertical]){min-height:calc((.75rem + 2px) * 1.25)}input[type=range].slider.is-small:not([orient=vertical])::-webkit-slider-runnable-track{height:.375rem}input[type=range].slider.is-small:not([orient=vertical])::-moz-range-track{height:.375rem}input[type=range].slider.is-small:not([orient=vertical])::-ms-track{height:.375rem}input[type=range].slider.is-small[orient=vertical]::-webkit-slider-runnable-track{width:.375rem}input[type=range].slider.is-small[orient=vertical]::-moz-range-track{width:.375rem}input[type=range].slider.is-small[orient=vertical]::-ms-track{width:.375rem}input[type=range].slider.is-small::-webkit-slider-thumb{height:.75rem;width:.75rem}input[type=range].slider.is-small::-moz-range-thumb{height:.75rem;width:.75rem}input[type=range].slider.is-small::-ms-thumb{height:.75rem;width:.75rem}input[type=range].slider.is-small::-ms-thumb{margin-top:0}input[type=range].slider.is-small::-webkit-slider-thumb{margin-top:-.1875rem}input[type=range].slider.is-small[orient=vertical]::-webkit-slider-thumb{margin-top:auto;margin-left:-.1875rem}input[type=range].slider.is-medium:not([orient=vertical]){min-height:calc((1.25rem + 2px) * 1.25)}input[type=range].slider.is-medium:not([orient=vertical])::-webkit-slider-runnable-track{height:.625rem}input[type=range].slider.is-medium:not([orient=vertical])::-moz-range-track{height:.625rem}input[type=range].slider.is-medium:not([orient=vertical])::-ms-track{height:.625rem}input[type=range].slider.is-medium[orient=vertical]::-webkit-slider-runnable-track{width:.625rem}input[type=range].slider.is-medium[orient=vertical]::-moz-range-track{width:.625rem}input[type=range].slider.is-medium[orient=vertical]::-ms-track{width:.625rem}input[type=range].slider.is-medium::-webkit-slider-thumb{height:1.25rem;width:1.25rem}input[type=range].slider.is-medium::-moz-range-thumb{height:1.25rem;width:1.25rem}input[type=range].slider.is-medium::-ms-thumb{height:1.25rem;width:1.25rem}input[type=range].slider.is-medium::-ms-thumb{margin-top:0}input[type=range].slider.is-medium::-webkit-slider-thumb{margin-top:-.3125rem}input[type=range].slider.is-medium[orient=vertical]::-webkit-slider-thumb{margin-top:auto;margin-left:-.3125rem}input[type=range].slider.is-large:not([orient=vertical]){min-height:calc((1.5rem + 2px) * 1.25)}input[type=range].slider.is-large:not([orient=vertical])::-webkit-slider-runnable-track{height:.75rem}input[type=range].slider.is-large:not([orient=vertical])::-moz-range-track{height:.75rem}input[type=range].slider.is-large:not([orient=vertical])::-ms-track{height:.75rem}input[type=range].slider.is-large[orient=vertical]::-webkit-slider-runnable-track{width:.75rem}input[type=range].slider.is-large[orient=vertical]::-moz-range-track{width:.75rem}input[type=range].slider.is-large[orient=vertical]::-ms-track{width:.75rem}input[type=range].slider.is-large::-webkit-slider-thumb{height:1.5rem;width:1.5rem}input[type=range].slider.is-large::-moz-range-thumb{height:1.5rem;width:1.5rem}input[type=range].slider.is-large::-ms-thumb{height:1.5rem;width:1.5rem}input[type=range].slider.is-large::-ms-thumb{margin-top:0}input[type=range].slider.is-large::-webkit-slider-thumb{margin-top:-.375rem}input[type=range].slider.is-large[orient=vertical]::-webkit-slider-thumb{margin-top:auto;margin-left:-.375rem}input[type=range].slider.is-white::-moz-range-track{background:#fff!important}input[type=range].slider.is-white::-webkit-slider-runnable-track{background:#fff!important}input[type=range].slider.is-white::-ms-track{background:#fff!important}input[type=range].slider.is-white::-ms-fill-lower{background:#fff}input[type=range].slider.is-white::-ms-fill-upper{background:#fff}input[type=range].slider.is-white .has-output-tooltip+output,input[type=range].slider.is-white.has-output+output{background-color:#fff;color:#0a0a0a}input[type=range].slider.is-black::-moz-range-track{background:#0a0a0a!important}input[type=range].slider.is-black::-webkit-slider-runnable-track{background:#0a0a0a!important}input[type=range].slider.is-black::-ms-track{background:#0a0a0a!important}input[type=range].slider.is-black::-ms-fill-lower{background:#0a0a0a}input[type=range].slider.is-black::-ms-fill-upper{background:#0a0a0a}input[type=range].slider.is-black .has-output-tooltip+output,input[type=range].slider.is-black.has-output+output{background-color:#0a0a0a;color:#fff}input[type=range].slider.is-light::-moz-range-track{background:#f5f5f5!important}input[type=range].slider.is-light::-webkit-slider-runnable-track{background:#f5f5f5!important}input[type=range].slider.is-light::-ms-track{background:#f5f5f5!important}input[type=range].slider.is-light::-ms-fill-lower{background:#f5f5f5}input[type=range].slider.is-light::-ms-fill-upper{background:#f5f5f5}input[type=range].slider.is-light .has-output-tooltip+output,input[type=range].slider.is-light.has-output+output{background-color:#f5f5f5;color:#363636}input[type=range].slider.is-dark::-moz-range-track{background:#363636!important}input[type=range].slider.is-dark::-webkit-slider-runnable-track{background:#363636!important}input[type=range].slider.is-dark::-ms-track{background:#363636!important}input[type=range].slider.is-dark::-ms-fill-lower{background:#363636}input[type=range].slider.is-dark::-ms-fill-upper{background:#363636}input[type=range].slider.is-dark .has-output-tooltip+output,input[type=range].slider.is-dark.has-output+output{background-color:#363636;color:#f5f5f5}input[type=range].slider.is-primary::-moz-range-track{background:#00d1b2!important}input[type=range].slider.is-primary::-webkit-slider-runnable-track{background:#00d1b2!important}input[type=range].slider.is-primary::-ms-track{background:#00d1b2!important}input[type=range].slider.is-primary::-ms-fill-lower{background:#00d1b2}input[type=range].slider.is-primary::-ms-fill-upper{background:#00d1b2}input[type=range].slider.is-primary .has-output-tooltip+output,input[type=range].slider.is-primary.has-output+output{background-color:#00d1b2;color:#fff}input[type=range].slider.is-link::-moz-range-track{background:#3273dc!important}input[type=range].slider.is-link::-webkit-slider-runnable-track{background:#3273dc!important}input[type=range].slider.is-link::-ms-track{background:#3273dc!important}input[type=range].slider.is-link::-ms-fill-lower{background:#3273dc}input[type=range].slider.is-link::-ms-fill-upper{background:#3273dc}input[type=range].slider.is-link .has-output-tooltip+output,input[type=range].slider.is-link.has-output+output{background-color:#3273dc;color:#fff}input[type=range].slider.is-info::-moz-range-track{background:#209cee!important}input[type=range].slider.is-info::-webkit-slider-runnable-track{background:#209cee!important}input[type=range].slider.is-info::-ms-track{background:#209cee!important}input[type=range].slider.is-info::-ms-fill-lower{background:#209cee}input[type=range].slider.is-info::-ms-fill-upper{background:#209cee}input[type=range].slider.is-info .has-output-tooltip+output,input[type=range].slider.is-info.has-output+output{background-color:#209cee;color:#fff}input[type=range].slider.is-success::-moz-range-track{background:#23d160!important}input[type=range].slider.is-success::-webkit-slider-runnable-track{background:#23d160!important}input[type=range].slider.is-success::-ms-track{background:#23d160!important}input[type=range].slider.is-success::-ms-fill-lower{background:#23d160}input[type=range].slider.is-success::-ms-fill-upper{background:#23d160}input[type=range].slider.is-success .has-output-tooltip+output,input[type=range].slider.is-success.has-output+output{background-color:#23d160;color:#fff}input[type=range].slider.is-warning::-moz-range-track{background:#ffdd57!important}input[type=range].slider.is-warning::-webkit-slider-runnable-track{background:#ffdd57!important}input[type=range].slider.is-warning::-ms-track{background:#ffdd57!important}input[type=range].slider.is-warning::-ms-fill-lower{background:#ffdd57}input[type=range].slider.is-warning::-ms-fill-upper{background:#ffdd57}input[type=range].slider.is-warning .has-output-tooltip+output,input[type=range].slider.is-warning.has-output+output{background-color:#ffdd57;color:rgba(0,0,0,.7)}input[type=range].slider.is-danger::-moz-range-track{background:#ff3860!important}input[type=range].slider.is-danger::-webkit-slider-runnable-track{background:#ff3860!important}input[type=range].slider.is-danger::-ms-track{background:#ff3860!important}input[type=range].slider.is-danger::-ms-fill-lower{background:#ff3860}input[type=range].slider.is-danger::-ms-fill-upper{background:#ff3860}input[type=range].slider.is-danger .has-output-tooltip+output,input[type=range].slider.is-danger.has-output+output{background-color:#ff3860;color:#fff} -------------------------------------------------------------------------------- /docs/static/css/index.css: -------------------------------------------------------------------------------- 1 | body { 2 | font-family: 'Noto Sans', sans-serif; 3 | } 4 | 5 | 6 | .footer .icon-link { 7 | font-size: 25px; 8 | color: #000; 9 | } 10 | 11 | .link-block a { 12 | margin-top: 5px; 13 | margin-bottom: 5px; 14 | } 15 | 16 | .dnerf { 17 | font-variant: small-caps; 18 | } 19 | 20 | 21 | .teaser .hero-body { 22 | padding-top: 0; 23 | padding-bottom: 3rem; 24 | } 25 | 26 | .teaser { 27 | font-family: 'Google Sans', sans-serif; 28 | } 29 | 30 | 31 | .publication-title { 32 | } 33 | 34 | .publication-banner { 35 | max-height: parent; 36 | 37 | } 38 | 39 | .publication-banner video { 40 | position: relative; 41 | left: auto; 42 | top: auto; 43 | transform: none; 44 | object-fit: fit; 45 | } 46 | 47 | .publication-header .hero-body { 48 | } 49 | 50 | .publication-title { 51 | font-family: 'Google Sans', sans-serif; 52 | } 53 | 54 | .publication-authors { 55 | font-family: 'Google Sans', sans-serif; 56 | } 57 | 58 | .publication-venue { 59 | color: #555; 60 | width: fit-content; 61 | font-weight: bold; 62 | } 63 | 64 | .publication-awards { 65 | color: #ff3860; 66 | width: fit-content; 67 | font-weight: bolder; 68 | } 69 | 70 | .publication-authors { 71 | } 72 | 73 | .publication-authors a { 74 | color: hsl(204, 86%, 53%) !important; 75 | } 76 | 77 | .publication-authors a:hover { 78 | text-decoration: underline; 79 | } 80 | 81 | .author-block { 82 | display: inline-block; 83 | } 84 | 85 | .publication-banner img { 86 | } 87 | 88 | .publication-authors { 89 | /*color: #4286f4;*/ 90 | } 91 | 92 | .publication-video { 93 | position: relative; 94 | width: 100%; 95 | height: 0; 96 | padding-bottom: 56.25%; 97 | 98 | overflow: hidden; 99 | border-radius: 10px !important; 100 | } 101 | 102 | .publication-video iframe { 103 | position: absolute; 104 | top: 0; 105 | left: 0; 106 | width: 100%; 107 | height: 100%; 108 | } 109 | 110 | .publication-body img { 111 | } 112 | 113 | .results-carousel { 114 | overflow: hidden; 115 | } 116 | 117 | .results-carousel .item { 118 | margin: 5px; 119 | overflow: hidden; 120 | border: 1px solid #bbb; 121 | border-radius: 10px; 122 | padding: 0; 123 | font-size: 0; 124 | } 125 | 126 | .results-carousel video { 127 | margin: 0; 128 | } 129 | 130 | 131 | .interpolation-panel { 132 | background: #f5f5f5; 133 | border-radius: 10px; 134 | } 135 | 136 | .interpolation-panel .interpolation-image { 137 | width: 100%; 138 | border-radius: 5px; 139 | } 140 | 141 | .interpolation-video-column { 142 | } 143 | 144 | .interpolation-panel .slider { 145 | margin: 0 !important; 146 | } 147 | 148 | .interpolation-panel .slider { 149 | margin: 0 !important; 150 | } 151 | 152 | #interpolation-image-wrapper { 153 | width: 100%; 154 | } 155 | #interpolation-image-wrapper img { 156 | border-radius: 5px; 157 | } 158 | -------------------------------------------------------------------------------- /docs/static/js/bulma-slider.min.js: -------------------------------------------------------------------------------- 1 | !function(t,e){"object"==typeof exports&&"object"==typeof module?module.exports=e():"function"==typeof define&&define.amd?define([],e):"object"==typeof exports?exports.bulmaSlider=e():t.bulmaSlider=e()}("undefined"!=typeof self?self:this,function(){return function(n){var r={};function i(t){if(r[t])return r[t].exports;var e=r[t]={i:t,l:!1,exports:{}};return n[t].call(e.exports,e,e.exports,i),e.l=!0,e.exports}return i.m=n,i.c=r,i.d=function(t,e,n){i.o(t,e)||Object.defineProperty(t,e,{configurable:!1,enumerable:!0,get:n})},i.n=function(t){var e=t&&t.__esModule?function(){return t.default}:function(){return t};return i.d(e,"a",e),e},i.o=function(t,e){return Object.prototype.hasOwnProperty.call(t,e)},i.p="",i(i.s=0)}([function(t,e,n){"use strict";Object.defineProperty(e,"__esModule",{value:!0}),n.d(e,"isString",function(){return l});var r=n(1),i=Object.assign||function(t){for(var e=1;e=l.length&&(s=!0)):s=!0),s&&(t.once&&(u[e]=null),t.callback(r))});-1!==u.indexOf(null);)u.splice(u.indexOf(null),1)}}]),e}();e.a=i}]).default}); -------------------------------------------------------------------------------- /docs/static/js/index.js: -------------------------------------------------------------------------------- 1 | window.HELP_IMPROVE_VIDEOJS = false; 2 | 3 | var INTERP_BASE = "./static/interpolation/stacked"; 4 | var NUM_INTERP_FRAMES = 240; 5 | 6 | var interp_images = []; 7 | function preloadInterpolationImages() { 8 | for (var i = 0; i < NUM_INTERP_FRAMES; i++) { 9 | var path = INTERP_BASE + '/' + String(i).padStart(6, '0') + '.jpg'; 10 | interp_images[i] = new Image(); 11 | interp_images[i].src = path; 12 | } 13 | } 14 | 15 | function setInterpolationImage(i) { 16 | var image = interp_images[i]; 17 | image.ondragstart = function() { return false; }; 18 | image.oncontextmenu = function() { return false; }; 19 | $('#interpolation-image-wrapper').empty().append(image); 20 | } 21 | 22 | 23 | $(document).ready(function() { 24 | // Check for click events on the navbar burger icon 25 | $(".navbar-burger").click(function() { 26 | // Toggle the "is-active" class on both the "navbar-burger" and the "navbar-menu" 27 | $(".navbar-burger").toggleClass("is-active"); 28 | $(".navbar-menu").toggleClass("is-active"); 29 | 30 | }); 31 | 32 | var options = { 33 | slidesToScroll: 1, 34 | slidesToShow: 3, 35 | loop: true, 36 | infinite: true, 37 | autoplay: false, 38 | autoplaySpeed: 3000, 39 | } 40 | 41 | // Initialize all div with carousel class 42 | var carousels = bulmaCarousel.attach('.carousel', options); 43 | 44 | // Loop on each carousel initialized 45 | for(var i = 0; i < carousels.length; i++) { 46 | // Add listener to event 47 | carousels[i].on('before:show', state => { 48 | console.log(state); 49 | }); 50 | } 51 | 52 | // Access to bulmaCarousel instance of an element 53 | var element = document.querySelector('#my-element'); 54 | if (element && element.bulmaCarousel) { 55 | // bulmaCarousel instance is available as element.bulmaCarousel 56 | element.bulmaCarousel.on('before-show', function(state) { 57 | console.log(state); 58 | }); 59 | } 60 | 61 | /*var player = document.getElementById('interpolation-video'); 62 | player.addEventListener('loadedmetadata', function() { 63 | $('#interpolation-slider').on('input', function(event) { 64 | console.log(this.value, player.duration); 65 | player.currentTime = player.duration / 100 * this.value; 66 | }) 67 | }, false);*/ 68 | preloadInterpolationImages(); 69 | 70 | $('#interpolation-slider').on('input', function(event) { 71 | setInterpolationImage(this.value); 72 | }); 73 | setInterpolationImage(0); 74 | $('#interpolation-slider').prop('max', NUM_INTERP_FRAMES - 1); 75 | 76 | bulmaSlider.attach(); 77 | 78 | }) 79 | -------------------------------------------------------------------------------- /images/curve.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/PromptWizard/a018fc1bc83b5f2a97e97fd8cb4d9d12378a60e9/images/curve.png -------------------------------------------------------------------------------- /images/github.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/PromptWizard/a018fc1bc83b5f2a97e97fd8cb4d9d12378a60e9/images/github.png -------------------------------------------------------------------------------- /images/iterative_flowchart-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/PromptWizard/a018fc1bc83b5f2a97e97fd8cb4d9d12378a60e9/images/iterative_flowchart-1.png -------------------------------------------------------------------------------- /images/msr_blog.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/PromptWizard/a018fc1bc83b5f2a97e97fd8cb4d9d12378a60e9/images/msr_blog.png -------------------------------------------------------------------------------- /images/overview.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/PromptWizard/a018fc1bc83b5f2a97e97fd8cb4d9d12378a60e9/images/overview.png -------------------------------------------------------------------------------- /images/sequential_flowchart-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/PromptWizard/a018fc1bc83b5f2a97e97fd8cb4d9d12378a60e9/images/sequential_flowchart-1.png -------------------------------------------------------------------------------- /promptwizard/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2023 Microsoft 2 | # Licensed under The MIT License [see LICENSE for details] 3 | 4 | # flake8: noqa 5 | from .glue.promptopt.instantiate import GluePromptOpt 6 | from .version import VERSION as __version__ 7 | 8 | __all__ = ["GluePromptOpt"] -------------------------------------------------------------------------------- /promptwizard/glue/common/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | Vellm common package. This would be imported in almost all the vellm packages. Consider this package as parent 3 | root-node for all Vellm related packages. 4 | """ 5 | -------------------------------------------------------------------------------- /promptwizard/glue/common/base_classes.py: -------------------------------------------------------------------------------- 1 | from dataclasses import dataclass 2 | from enum import Enum 3 | from inspect import getmembers, ismethod 4 | from typing import List, Optional 5 | 6 | # This file has class definitions for config yaml files 7 | 8 | # TODO: add comments for class definition and variable definition 9 | 10 | 11 | class UniversalBaseClass: 12 | def __str__(self) -> str: 13 | attributes_string = [] 14 | for member in getmembers(self): 15 | 16 | # remove private and protected attributes 17 | if not member[0].startswith('_'): 18 | 19 | # remove methods that does not start with underscore 20 | if not ismethod(member[1]): 21 | attributes_string.append(member) 22 | return str(attributes_string) 23 | 24 | ###################################################################################### 25 | # Classes related to llm_config.yaml 26 | 27 | 28 | @dataclass 29 | class LLMModel(UniversalBaseClass): 30 | unique_model_id: str 31 | model_type: str 32 | track_tokens: str 33 | req_per_min: int 34 | tokens_per_min: int 35 | error_backoff_in_seconds: int 36 | 37 | @dataclass 38 | class UserLimits(UniversalBaseClass): 39 | max_num_requests_in_time_window: int 40 | time_window_length_in_seconds: int 41 | 42 | 43 | @dataclass 44 | class LLMQueueSchedulerLimits(UniversalBaseClass): 45 | ttl_in_seconds: int 46 | max_queue_size: int 47 | 48 | 49 | @dataclass 50 | class AzureAOIModels(LLMModel, UniversalBaseClass): 51 | model_name_in_azure: str 52 | deployment_name_in_azure: str 53 | 54 | 55 | @dataclass 56 | class AzureAOILM(UniversalBaseClass): 57 | api_key: str 58 | api_version: str 59 | api_type: str 60 | azure_endpoint: str 61 | azure_oai_models: List[AzureAOIModels] 62 | 63 | def __post_init__(self): 64 | azure_oai_models_obj = [] 65 | if self.azure_oai_models: 66 | for azure_oai_model in self.azure_oai_models: 67 | azure_oai_models_obj.append(AzureAOIModels(**azure_oai_model)) 68 | self.azure_oai_models = azure_oai_models_obj 69 | 70 | 71 | @dataclass 72 | class CustomLLM(LLMModel): 73 | path_to_py_file: str 74 | class_name: str 75 | 76 | 77 | @dataclass 78 | class LLMConfig(UniversalBaseClass): 79 | azure_open_ai: AzureAOILM 80 | user_limits: UserLimits 81 | scheduler_limits: LLMQueueSchedulerLimits 82 | custom_models: List[CustomLLM] 83 | 84 | def __post_init__(self): 85 | self.azure_open_ai = AzureAOILM(**self.azure_open_ai) 86 | custom_model_obj = [] 87 | if self.custom_models: 88 | for custom_model in self.custom_models: 89 | custom_model_obj.append(CustomLLM(**custom_model)) 90 | self.custom_models = custom_model_obj 91 | 92 | ###################################################################################### 93 | # Classes related to setup_config.yaml 94 | 95 | 96 | @dataclass 97 | class AssistantLLM(UniversalBaseClass): 98 | prompt_opt: str 99 | 100 | 101 | @dataclass 102 | class Dir(UniversalBaseClass): 103 | base_dir: str 104 | log_dir_name: str 105 | 106 | 107 | class OperationMode(Enum): 108 | ONLINE = "online" 109 | OFFLINE = "offline" 110 | 111 | 112 | @dataclass 113 | class SetupConfig(UniversalBaseClass): 114 | assistant_llm: AssistantLLM 115 | dir_info: Dir 116 | experiment_name: str 117 | mode: OperationMode 118 | description: str 119 | 120 | def __post_init__(self): 121 | if self.dir_info: 122 | self.dir_info = Dir(**self.dir_info) 123 | if self.assistant_llm: 124 | self.assistant_llm = AssistantLLM(**self.assistant_llm) 125 | 126 | ###################################################################################### 127 | # Classes related to prompt_library_config.yaml 128 | 129 | @dataclass 130 | class TaskConfig: 131 | name: str 132 | prompt_template: str 133 | llm_request_type: str 134 | prepend_system_prompts: Optional[bool] = True 135 | prepend_system_guidelines: Optional[bool] = True 136 | emb_model_id: Optional[str] = None 137 | llm_model_id: Optional[str] = None 138 | 139 | @dataclass 140 | class Mode: 141 | chat: List[TaskConfig] 142 | generation: List[TaskConfig] 143 | 144 | def __post_init__(self): 145 | chat_obj = [] 146 | if self.chat: 147 | for chat_config in self.chat: 148 | chat_obj.append(TaskConfig(**chat_config)) 149 | self.chat = chat_obj 150 | 151 | gen_obj = [] 152 | if self.generation: 153 | for gen_config in self.generation: 154 | gen_obj.append(TaskConfig(**gen_config)) 155 | self.generation = gen_obj 156 | 157 | 158 | @dataclass 159 | class PromptLibraryConfig: 160 | mode: Mode 161 | system_prompts: Optional[str] = None 162 | system_guidelines: Optional[str] = None 163 | 164 | def __post_init__(self): 165 | if self.mode: 166 | self.mode = Mode(**self.mode) 167 | -------------------------------------------------------------------------------- /promptwizard/glue/common/constants/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/PromptWizard/a018fc1bc83b5f2a97e97fd8cb4d9d12378a60e9/promptwizard/glue/common/constants/__init__.py -------------------------------------------------------------------------------- /promptwizard/glue/common/constants/error_strings.py: -------------------------------------------------------------------------------- 1 | 2 | class VellmErrorStrings: 3 | PATH_DOESNT_EXIST = "{path} path doesn't exist. Please create path {path}" 4 | -------------------------------------------------------------------------------- /promptwizard/glue/common/constants/log_strings.py: -------------------------------------------------------------------------------- 1 | class CommonLogsStr: 2 | INSTALL_MISSING_LIB = "{lib_name} is not installed. Installing {lib_name}." 3 | LOG_SEPERATOR = "\n"+"="*150+"\n" 4 | -------------------------------------------------------------------------------- /promptwizard/glue/common/constants/str_literals.py: -------------------------------------------------------------------------------- 1 | from dataclasses import dataclass 2 | 3 | # TODO: add comments for class definition and variable definition 4 | # This file has classes storing constant literals 5 | 6 | 7 | @dataclass 8 | class FileConstants: 9 | logfile_name = "glue_logs.log" 10 | logfile_prefix = "glue_logs_" 11 | 12 | 13 | @dataclass 14 | class OAILiterals: 15 | OPENAI_API_KEY = "OPENAI_API_KEY" 16 | OPENAI_API_BASE = "OPENAI_API_BASE" 17 | OPENAI_API_TYPE = "OPENAI_API_TYPE" 18 | OPENAI_API_VERSION = "OPENAI_API_VERSION" 19 | AZ_OPEN_AI_OBJECT = "AZ_OPEN_AI_OBJECT" 20 | 21 | 22 | @dataclass 23 | class LLMOutputTypes: 24 | COMPLETION = "completion" 25 | CHAT = "chat" 26 | EMBEDDINGS = "embeddings" 27 | MULTI_MODAL = "multimodal" 28 | 29 | 30 | @dataclass 31 | class InstallLibs: 32 | LLAMA_LLM_AZ_OAI = "llama-index-llms-azure-openai==0.1.5" 33 | LLAMA_EMB_AZ_OAI = "llama-index-embeddings-azure-openai==0.1.6" 34 | LLAMA_MM_LLM_AZ_OAI = "llama-index-multi-modal-llms-azure-openai==0.1.4" 35 | AZURE_CORE = "azure-core==1.30.1" 36 | TIKTOKEN = "tiktoken" 37 | 38 | 39 | @dataclass 40 | class LLMLiterals: 41 | EMBEDDING_TOKEN_COUNT = "embedding_token_count" 42 | PROMPT_LLM_TOKEN_COUNT = "prompt_llm_token_count" 43 | COMPLETION_LLM_TOKEN_COUNT = "completion_llm_token_count" 44 | TOTAL_LLM_TOKEN_COUNT = "total_llm_token_count" 45 | 46 | -------------------------------------------------------------------------------- /promptwizard/glue/common/exceptions.py: -------------------------------------------------------------------------------- 1 | from .utils.logging import get_glue_logger 2 | 3 | logger = get_glue_logger(__name__) 4 | 5 | 6 | class GlueException(Exception): 7 | """ 8 | Base class for all exceptions in Glue framework 9 | """ 10 | def __init__(self, err_message): 11 | logger.error(f"\n Error: {err_message}\n") 12 | super().__init__(err_message) 13 | 14 | 15 | class GlueLLMException(GlueException): 16 | """ 17 | Base class for all exceptions related to LLM 18 | """ 19 | def __init__(self, err_message, excep_obj): 20 | message = ("LLM exception\n" 21 | f"Exception: {err_message}\n" 22 | f"Exception logs: {excep_obj}") 23 | 24 | super().__init__(message) 25 | 26 | 27 | class GlueValidaionException(GlueException): 28 | """ 29 | Base class for all exceptions related to Validation in Glue framework 30 | """ 31 | def __init__(self, err_message, excep_obj): 32 | message = ("[Invalid user input detected]\n" 33 | f"Exception: {err_message}\n" 34 | f"Exception logs: {excep_obj}") 35 | 36 | super().__init__(message) 37 | 38 | -------------------------------------------------------------------------------- /promptwizard/glue/common/llm/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/PromptWizard/a018fc1bc83b5f2a97e97fd8cb4d9d12378a60e9/promptwizard/glue/common/llm/__init__.py -------------------------------------------------------------------------------- /promptwizard/glue/common/llm/custom_llm.py: -------------------------------------------------------------------------------- 1 | class GlueLLM: 2 | """ 3 | Abstract class that can be inherited by a class that defines Custom LLM 4 | """ 5 | 6 | @staticmethod 7 | def get_tokenizer(): 8 | """ 9 | This method should either return an encode method of tokenizer or None 10 | :return: method 11 | 12 | e.g. When using HuggingFace tokenizer 13 | tokenizer = Tokenizer(BPE()) 14 | fast_tokenizer = PreTrainedTokenizerFast(tokenizer_object=tokenizer) 15 | return fast_tokenizer.encode 16 | 17 | e.g. When using tiktoken tokenizer 18 | return tiktoken.encoding_for_model(azure_oai_model.model_name_in_azure).encode 19 | """ 20 | return None 21 | -------------------------------------------------------------------------------- /promptwizard/glue/common/llm/llm_helper.py: -------------------------------------------------------------------------------- 1 | from llama_index.core.llms import LLM 2 | from llama_index.core.callbacks.token_counting import TokenCountingHandler 3 | from llama_index.core.callbacks.base_handler import BaseCallbackHandler 4 | 5 | 6 | def get_token_counter(llm_handle: LLM) -> TokenCountingHandler: 7 | """ 8 | Extract TokenCountingHandler handler from llm_handle. 9 | 10 | :param llm_handle: Object of class LLM, which is the handle to make all LLM related calls 11 | :return: Object of TokenCountingHandler, that's registered as callback_manager in LLM. If not found, return None 12 | """ 13 | return get_callback_handler(llm_handle, "TokenCountingHandler") 14 | 15 | 16 | def get_callback_handler(llm_handle: LLM, class_name: str) -> BaseCallbackHandler: 17 | """ 18 | Extract callback_manager from llm_handle, find out which call back manager is of class type `class_name`. 19 | Return that object. 20 | 21 | :param llm_handle: Object of class LLM, which is the handle to make all LLM related calls 22 | :param class_name: Name of class (without prefix file path) e.g. TokenCountingHandler 23 | :return: Object of BaseCallbackHandler, that's registered as callback_manager in LLM. If not found, return None 24 | """ 25 | if llm_handle and llm_handle.callback_manager: 26 | for handler in llm_handle.callback_manager.handlers: 27 | if type(handler).__name__ == class_name: 28 | return handler 29 | 30 | return None 31 | -------------------------------------------------------------------------------- /promptwizard/glue/common/llm/llm_mgr.py: -------------------------------------------------------------------------------- 1 | from typing import Dict 2 | from llama_index.core.callbacks import CallbackManager, TokenCountingHandler 3 | from llama_index.core.llms import ChatMessage 4 | from llama_index.core.llms import LLM 5 | from tenacity import retry, stop_after_attempt, wait_fixed, wait_random 6 | from ..base_classes import LLMConfig 7 | from ..constants.str_literals import InstallLibs, OAILiterals, \ 8 | OAILiterals, LLMLiterals, LLMOutputTypes 9 | from .llm_helper import get_token_counter 10 | from ..exceptions import GlueLLMException 11 | from ..utils.runtime_tasks import install_lib_if_missing 12 | from ..utils.logging import get_glue_logger 13 | from ..utils.runtime_tasks import str_to_class 14 | import os 15 | logger = get_glue_logger(__name__) 16 | 17 | def call_api(messages): 18 | 19 | from openai import OpenAI 20 | from azure.identity import get_bearer_token_provider, AzureCliCredential 21 | from openai import AzureOpenAI 22 | 23 | if os.environ['USE_OPENAI_API_KEY'] == "True": 24 | client = OpenAI(api_key=os.environ["OPENAI_API_KEY"]) 25 | 26 | response = client.chat.completions.create( 27 | model=os.environ["OPENAI_MODEL_NAME"], 28 | messages=messages, 29 | temperature=0.0, 30 | ) 31 | else: 32 | token_provider = get_bearer_token_provider( 33 | AzureCliCredential(), "https://cognitiveservices.azure.com/.default" 34 | ) 35 | client = AzureOpenAI( 36 | api_version=os.environ["OPENAI_API_VERSION"], 37 | azure_endpoint=os.environ["AZURE_OPENAI_ENDPOINT"], 38 | azure_ad_token_provider=token_provider 39 | ) 40 | response = client.chat.completions.create( 41 | model=os.environ["AZURE_OPENAI_DEPLOYMENT_NAME"], 42 | messages=messages, 43 | temperature=0.0, 44 | ) 45 | 46 | prediction = response.choices[0].message.content 47 | return prediction 48 | 49 | 50 | class LLMMgr: 51 | @staticmethod 52 | def chat_completion(messages: Dict): 53 | llm_handle = os.environ.get("MODEL_TYPE", "AzureOpenAI") 54 | try: 55 | if(llm_handle == "AzureOpenAI"): 56 | # Code to for calling LLMs 57 | return call_api(messages) 58 | elif(llm_handle == "LLamaAML"): 59 | # Code to for calling SLMs 60 | return 0 61 | except Exception as e: 62 | print(e) 63 | return "Sorry, I am not able to understand your query. Please try again." 64 | # raise GlueLLMException(f"Exception when calling {llm_handle.__class__.__name__} " 65 | # f"LLM in chat mode, with message {messages} ", e) 66 | 67 | 68 | @staticmethod 69 | def get_all_model_ids_of_type(llm_config: LLMConfig, llm_output_type: str): 70 | res = [] 71 | if llm_config.azure_open_ai: 72 | for azure_model in llm_config.azure_open_ai.azure_oai_models: 73 | if azure_model.model_type == llm_output_type: 74 | res.append(azure_model.unique_model_id) 75 | if llm_config.custom_models: 76 | if llm_config.custom_models.model_type == llm_output_type: 77 | res.append(llm_config.custom_models.unique_model_id) 78 | return res 79 | 80 | @staticmethod 81 | def get_llm_pool(llm_config: LLMConfig) -> Dict[str, LLM]: 82 | """ 83 | Create a dictionary of LLMs. key would be unique id of LLM, value is object using which 84 | methods associated with that LLM service can be called. 85 | 86 | :param llm_config: Object having all settings & preferences for all LLMs to be used in out system 87 | :return: Dict key=unique_model_id of LLM, value=Object of class llama_index.core.llms.LLM 88 | which can be used as handle to that LLM 89 | """ 90 | llm_pool = {} 91 | az_llm_config = llm_config.azure_open_ai 92 | 93 | if az_llm_config: 94 | install_lib_if_missing(InstallLibs.LLAMA_LLM_AZ_OAI) 95 | install_lib_if_missing(InstallLibs.LLAMA_EMB_AZ_OAI) 96 | install_lib_if_missing(InstallLibs.LLAMA_MM_LLM_AZ_OAI) 97 | install_lib_if_missing(InstallLibs.TIKTOKEN) 98 | 99 | import tiktoken 100 | # from llama_index.llms.azure_openai import AzureOpenAI 101 | from openai import AzureOpenAI 102 | from llama_index.embeddings.azure_openai import AzureOpenAIEmbedding 103 | from llama_index.multi_modal_llms.azure_openai import AzureOpenAIMultiModal 104 | 105 | az_token_provider = None 106 | # if az_llm_config.use_azure_ad: 107 | from azure.identity import get_bearer_token_provider, AzureCliCredential 108 | az_token_provider = get_bearer_token_provider(AzureCliCredential(), 109 | "https://cognitiveservices.azure.com/.default") 110 | 111 | for azure_oai_model in az_llm_config.azure_oai_models: 112 | callback_mgr = None 113 | if azure_oai_model.track_tokens: 114 | 115 | # If we need to count number of tokens used in LLM calls 116 | token_counter = TokenCountingHandler( 117 | tokenizer=tiktoken.encoding_for_model(azure_oai_model.model_name_in_azure).encode 118 | ) 119 | callback_mgr = CallbackManager([token_counter]) 120 | token_counter.reset_counts() 121 | # () 122 | 123 | if azure_oai_model.model_type in [LLMOutputTypes.CHAT, LLMOutputTypes.COMPLETION]: 124 | # () 125 | llm_pool[azure_oai_model.unique_model_id] = \ 126 | AzureOpenAI( 127 | # use_azure_ad=az_llm_config.use_azure_ad, 128 | azure_ad_token_provider=az_token_provider, 129 | # model=azure_oai_model.model_name_in_azure, 130 | # deployment_name=azure_oai_model.deployment_name_in_azure, 131 | api_key=az_llm_config.api_key, 132 | azure_endpoint=az_llm_config.azure_endpoint, 133 | api_version=az_llm_config.api_version, 134 | # callback_manager=callback_mgr 135 | ) 136 | # () 137 | elif azure_oai_model.model_type == LLMOutputTypes.EMBEDDINGS: 138 | llm_pool[azure_oai_model.unique_model_id] =\ 139 | AzureOpenAIEmbedding(use_azure_ad=az_llm_config.use_azure_ad, 140 | azure_ad_token_provider=az_token_provider, 141 | model=azure_oai_model.model_name_in_azure, 142 | deployment_name=azure_oai_model.deployment_name_in_azure, 143 | api_key=az_llm_config.api_key, 144 | azure_endpoint=az_llm_config.azure_endpoint, 145 | api_version=az_llm_config.api_version, 146 | callback_manager=callback_mgr 147 | ) 148 | elif azure_oai_model.model_type == LLMOutputTypes.MULTI_MODAL: 149 | 150 | llm_pool[azure_oai_model.unique_model_id] = \ 151 | AzureOpenAIMultiModal(use_azure_ad=az_llm_config.use_azure_ad, 152 | azure_ad_token_provider=az_token_provider, 153 | model=azure_oai_model.model_name_in_azure, 154 | deployment_name=azure_oai_model.deployment_name_in_azure, 155 | api_key=az_llm_config.api_key, 156 | azure_endpoint=az_llm_config.azure_endpoint, 157 | api_version=az_llm_config.api_version, 158 | max_new_tokens=4096 159 | ) 160 | 161 | if llm_config.custom_models: 162 | for custom_model in llm_config.custom_models: 163 | # try: 164 | custom_llm_class = str_to_class(custom_model.class_name, None, custom_model.path_to_py_file) 165 | 166 | callback_mgr = None 167 | if custom_model.track_tokens: 168 | # If we need to count number of tokens used in LLM calls 169 | token_counter = TokenCountingHandler( 170 | tokenizer=custom_llm_class.get_tokenizer() 171 | ) 172 | callback_mgr = CallbackManager([token_counter]) 173 | token_counter.reset_counts() 174 | llm_pool[custom_model.unique_model_id] = custom_llm_class(callback_manager=callback_mgr) 175 | # except Exception as e: 176 | # raise GlueLLMException(f"Custom model {custom_model.unique_model_id} not loaded.", e) 177 | return llm_pool 178 | 179 | @staticmethod 180 | def get_tokens_used(llm_handle: LLM) -> Dict[str, int]: 181 | """ 182 | For a given LLM, output the number of tokens used. 183 | 184 | :param llm_handle: Handle to a single LLM 185 | :return: Dict of token-type and count of tokens used 186 | """ 187 | token_counter = get_token_counter(llm_handle) 188 | if token_counter: 189 | return { 190 | LLMLiterals.EMBEDDING_TOKEN_COUNT: token_counter.total_embedding_token_count, 191 | LLMLiterals.PROMPT_LLM_TOKEN_COUNT: token_counter.prompt_llm_token_count, 192 | LLMLiterals.COMPLETION_LLM_TOKEN_COUNT: token_counter.completion_llm_token_count, 193 | LLMLiterals.TOTAL_LLM_TOKEN_COUNT: token_counter.total_llm_token_count 194 | } 195 | return None 196 | -------------------------------------------------------------------------------- /promptwizard/glue/common/llm/promptMessage.py: -------------------------------------------------------------------------------- 1 | [ 2 | { 3 | "role": "system", 4 | "content": "You are a mathematician with a profound knowledge of various fields within mathematics, including algebra, calculus, geometry, statistics, and number theory. Holding an advanced degree in mathematics, your expertise ranges from fundamental arithmetic to complex, abstract mathematical theories. Your analytical skills enable you to solve intricate problems, prove theorems, and provide clear explanations on a wide range of mathematical concepts. Whether addressing basic arithmetic questions or tackling advanced topics like differential equations and linear algebra, your precision and clarity in conveying mathematical ideas are exceptional. Your experience teaching and publishing research in mathematics ensures that your explanations are accurate, thorough, and comprehensible, making you an ideal agent to address any mathematics-related queries with confidence and authority." 5 | }, 6 | { 7 | "role": "user", 8 | "content": "You are an expert in mathematics. Your task is to solve and provide detailed explanations for mathematics questions accurately, specifically focusing on multiple-choice formats. \n\nBegin by clearly identifying any fundamental assumptions, initial conditions, or properties relevant to the problem. Verify these assumptions and conditions against the provided options. Ensure to interpret any special values or symbolic expressions correctly, such as π or vectors.\n\nSimplify the problem wherever possible to make each part more manageable without oversimplifying. Provide a step-by-step breakdown of your solution process, including comprehensive reasoning and calculations. Ensure each intermediary step logically follows from the previous one. \n\nWhile concluding the problem, compare your computed solution explicitly against the given multiple-choice options and select the one that matches. Cross-check every intermediate and final computation for numerical correctness and consistency with given conditions.\n\nAim for clarity and precision in your explanations to ensure complete understanding, and prioritize providing the most accurate solution that aligns with the problem's specific requirements.\n\nProvide the final answer, without the option choices.\n\n[Question] {question}\n[Answer] \n" 9 | } 10 | ] 11 | 12 | [ 13 | { 14 | "role": "system", 15 | "content": "You are a mathematician with a profound knowledge of various fields within mathematics, including algebra, calculus, geometry, statistics, and number theory. Holding an advanced degree in mathematics, your expertise ranges from fundamental arithmetic to complex, abstract mathematical theories. Your analytical skills enable you to solve intricate problems, prove theorems, and provide clear explanations on a wide range of mathematical concepts. Whether addressing basic arithmetic questions or tackling advanced topics like differential equations and linear algebra, your precision and clarity in conveying mathematical ideas are exceptional. Your experience teaching and publishing research in mathematics ensures that your explanations are accurate, thorough, and comprehensible, making you an ideal agent to address any mathematics-related queries with confidence and authority." 16 | }, 17 | { 18 | "role": "user", 19 | "content": "\nYou are an expert in mathematics. Your task is to solve and provide detailed explanations for mathematics questions accurately, specifically focusing on multiple-choice formats. \n\nBegin by clearly identifying any fundamental assumptions, initial conditions, or properties relevant to the problem. Verify these assumptions and conditions against the provided options. Ensure to interpret any special values or symbolic expressions correctly, such as π or vectors.\n\nSimplify the problem wherever possible to make each part more manageable without oversimplifying. Provide a step-by-step breakdown of your solution process, including comprehensive reasoning and calculations. Ensure each intermediary step logically follows from the previous one. \n\nWhile concluding the problem, compare your computed solution explicitly against the given multiple-choice options and select the one that matches. Cross-check every intermediate and final computation for numerical correctness and consistency with given conditions.\n\nAim for clarity and precision in your explanations to ensure complete understanding, and prioritize providing the most accurate solution that aligns with the problem\'s specific requirements.\n\n\n[Question] The angle between the two tangents from the origin to the circle (x-7)2+(y+1)2=25 is\n(1) 0\n(2) π3\n(3) π6\n(4) π2\n[Answer] [Question]: The angle between the two tangents from the origin to the circle \\((x-7)^2 + (y+1)^2 = 25\\) is\n(1) 0\n(2) \\(\\frac{\\pi}{3}\\)\n(3) \\(\\frac{\\pi}{6}\\)\n(4) \\(\\frac{\\pi}{2}\\)\n\n[Answer]: \\(\\frac{\\pi}{2}\\) \n\n[Improved Reasoning Chain]:\n\n1. **Identify the Circle\'s Properties:**\n The given equation of the circle is \\((x - 7)^2 + (y + 1)^2 = 25\\). This is in the standard form \\((x - h)^2 + (y - k)^2 = r^2\\), where \\( (h, k) \\) is the center of the circle and \\( r \\) is the radius.\n - Center of the circle, \\((h, k) = (7, -1)\\)\n - Radius, \\( r = \\sqrt{25} = 5 \\)\n\n2. **Concept of Tangent from an External Point:**\n Tangents drawn from an external point to a circle form equal angles with the line joining the external point to the center of the circle. The formula to find the angle \\(\\theta\\) between the two tangents from a point \\((x_1, y_1)\\) to a circle with center \\((h, k)\\) and radius \\(r\\) is:\n \\[\n \\cos\\theta = \\frac{\\text{distance from the external point to the center}}{\\text{radius}}\n \\]\n Where distance from the origin \\((0,0)\\) to the center \\((7, -1)\\) is calculated using the distance formula:\n \\[\n \\text{Distance} = \\sqrt{(7 - 0)^2 + (-1 - 0)^2} = \\sqrt{49 + 1} = \\sqrt{50} = 5\\sqrt{2}\n \\]\n\n3. **Calculate Cosine of the Angle:**\n Using the distance found, we calculate \\(\\cos\\theta\\):\n \\[\n \\cos\\theta = \\frac{5\\sqrt{2}}{5} = \\sqrt{2} \n \\] \n Since the distance should be divided by r it should be : \n \\[\n \\cos\\theta = \\frac{5\\sqrt{2}}{5} = \\frac{\\sqrt{2} }= \\sqrt{2}/2 \n \\]\n\n4. **Actual Formula for the Angle Between Tangents**: \n actual formula is cos(thetap/2)\n =RA. hence \\\n distance for this point to the center\'s point \n this is \n the thus Tangent between T=\n intersection point of cos(a) hence\n the sec=sin(tah)\n\n5. **Calculate the Angle:**\n Solving for \n \\(\\cos\\frac{\\theta}{2}\\), where \\(\\cos\\theta=\\frac5}. is\n \n st that \\ half overall angle $\\theta $\n thus \n\n6. \n\n6. **Determine the Angle Between Tangents:**\n is simplest such computation and-->\nIf \\theta is \\\n \n angle $\\theta = 2\\times$\\frac{\\pi } = \\-< \n\n therefore \\(\\theta = 2\\sine\\theta\n\n7. **Verify Against Options:**\n \n\n داریم $\\theta $\n \n\n \\( = \\left\\(\\frac{\\pi}\n\ntherefore \n angle thus is \n of multiple correct .__;\n\nThus Therefore or \\(\\pi0$\\mid$\n\nhence circle\npoints ot \\frac \\(^-^\\div$\\\n\n

π2

\n\n[Question] \\( \\mathbf{a}, \\mathbf{b}, \\mathbf{c} \\) are three vectors, such that \\( \\mathbf{a}+\\mathbf{b}+\\mathbf{c}=0,|\\mathbf{a}|=1 \\), \\( |b|=2,|c|=3 \\), then \\( \\mathbf{a} \\cdot \\mathbf{b}+\\mathbf{b} \\cdot \\mathbf{c}+\\mathbf{c} \\cdot \\mathbf{a} \\) is equal to\n(a) 0\n(b) \\( -7 \\)\n(c) 7\n(d) 4\n[Answer] ### Solution:\n\nTo solve the problem, let\'s start with the given conditions and fundamental vector properties:\n\n1. **Given Conditions**:\n - \\(\\mathbf{a} + \\mathbf{b} + \\mathbf{c} = 0\\)\n - \\(|\\mathbf{a}| = 1\\)\n - \\(|\\mathbf{b}| = 2\\)\n - \\(|\\mathbf{c}| = 3\\)\n\n2. **Required**:\n Determine the value of \\(\\mathbf{a} \\cdot \\mathbf{b} + \\mathbf{b} \\cdot \\mathbf{c} + \\mathbf{c} \\cdot \\mathbf{a}\\).\n\n### Key Steps and Reasoning:\n\n1. **Rewrite the Vector Equation**:\n Given that \\(\\mathbf{a} + \\mathbf{b} + \\mathbf{c} = 0\\), we can rearrange this as:\n \\[\n \\mathbf{c} = -(\\mathbf{a} + \\mathbf{b})\n \\]\n\n2. **Substitute \\(\\mathbf{c}\\) into the Dot Product Expression**:\n We need to evaluate \\(\\mathbf{a} \\cdot \\mathbf{b} + \\mathbf{b} \\cdot (\\mathbf{c}) + \\mathbf{c} \\cdot (\\mathbf{a})\\). Substitute \\(\\mathbf{c} = -(\\mathbf{a} + \\mathbf{b})\\):\n\n \\[\n \\mathbf{a} \\cdot \\mathbf{b} + \\mathbf{b} \\cdot (-(\\mathbf{a} + \\mathbf{b})) + (-(\\mathbf{a} + \\mathbf{b})) \\cdot \\mathbf{a}\n \\]\n\n3. **Expand the Dot Products**:\n \\[\n \\mathbf{a} \\cdot \\mathbf{b} + \\mathbf{b} \\cdot (-\\mathbf{a} - \\mathbf{b}) + (-\\mathbf{a} - \\mathbf{b}) \\cdot \\mathbf{a}\n \\]\n\n4. **Distribute the Dot Products**:\n \\[\n \\mathbf{a} \\cdot \\mathbf{b} + \\mathbf{b} \\cdot (-\\mathbf{a}) + \\mathbf{b} \\cdot (-\\mathbf{b}) + (-\\mathbf{a}) \\cdot \\mathbf{a} + (-\\mathbf{a}) \\cdot \\mathbf{b}\n \\]\n\n5. **Simplify Terms**:\n Recall that the dot product is commutative (\\(\\mathbf{u} \\cdot \\mathbf{v} = \\mathbf{v} \\cdot \\mathbf{u}\\)):\n \\[\n \\mathbf{a} \\cdot \\mathbf{b} - \\mathbf{b} \\cdot \\mathbf{a} - \\mathbf{b} \\cdot \\mathbf{b} - \\mathbf{a} \\cdot \\mathbf{a} - \\mathbf{a} \\cdot \\mathbf{b}\n \\]\n\n6. **Combine Like Terms**:\n Notice that \\(\\mathbf{a} \\cdot \\mathbf{b}\\) and \\(-\\mathbf{b} \\cdot \\mathbf{a}\\) cancel out, leaving:\n \\[\n - \\mathbf{b} \\cdot \\mathbf{b} - \\mathbf{a} \\cdot \\mathbf{a} - \\mathbf{a} \\cdot \\mathbf{b}\n \\]\n Since \\(\\|\\mathbf{a}\\|^2 = \\mathbf{a} \\cdot \\mathbf{a}\\) and \\(\\|\\mathbf{b}\\|^2 = \\mathbf{b} \\cdot \\mathbf{b}\\), we substitute:\n \\[\n - \\|\\mathbf{b}\\|^2 - \\|\\mathbf{a}\\|^2\n \\]\n\n7. **Substitute the Known Magnitudes**:\n Given \\(|\\mathbf{a}| = 1\\) and \\(|\\mathbf{b}| = 2\\):\n \\[\n - 2^2 - 1^2 = -4 - 1 = -5\n \\]\n\nHowever, we need to recheck that we only included terms up to \\(\\mathbf{a} \\cdot \\mathbf{b}, \\mathbf{b} \\cdot \\mathbf{c}, \\mathbf{c} \\cdot \\mathbf{a}\\). Upon reworking final checks, the multiple-choice logic was inferred including pairwise products confirming all terms:\n\nWe thus verified options against such pair pairs validating product, re-ensuring \\(-(\\lbrace\\mathbf{pairs)\\cdot -1) on logic pairs\\), ensures correct revised steps validations across true vector property consistency and option choice confirms \\(( -7)\\).\n\n8. **Select the Correct Option**:\n Upon reconfirmation, we finalize:\n \\[\n -4 -3 = -7\n \\]\n\nThus, the correct answer is:\n\\( \\boxed{-7} \\)\n

-7

\n\n[Question] Suppose \\( y=f(x) \\) and \\( y=g(x) \\) are two functions whose graphs intersect at the three points \\( (0,4),(2,2) \\) and \\( (4,0) \\) with \\( f(x)>g(x) \\) for \\( 0<x<2 \\) and \\( f(x)<g(x) \\) for \\( 2<x<4 \\). If \\( \\int_{0}^{4}[f(x)-g(x)] d x=10 \\) and \\( \\int_{2}^{4}[g(x)-f(x)] d x=5 \\), then the area between two curves for \\( 0<x<2 \\), is\n(a) 5\n(b) 10\n(c) 15\n(d) 20\n[Answer] ### Determining the Area Between Two Curves\n\nWe are given functions \\( y=f(x) \\) and \\( y=g(x) \\) whose graphs intersect at three specific points: \\( (0, 4) \\), \\( (2, 2) \\), and \\( (4, 0) \\). The problem specifies that \\( f(x) > g(x) \\) for \\( 0 < x < 2 \\) and \\( f(x) < g(x) \\) for \\( 2 < x < 4 \\). We need to determine the area between these curves for the interval \\( 0 < x < 2 \\).\n\n#### Fundamental Assumptions and Properties\n- The area between two curves \\( y = f(x) \\) and \\( y = g(x) \\) over an interval \\([a, b]\\) is given by the integral \\(\\int_{a}^{b} |f(x) - g(x)| \\,dx\\).\n- When \\( f(x) > g(x) \\) in the interval \\( 0 < x < 2 \\), the absolute value can be removed, and we have \\( \\int_{0}^{2} (f(x) - g(x)) \\, dx \\).\n- Similarly, for \\( 2 < x < 4 \\), where \\( f(x) < g(x) \\), the integral becomes \\(\\int_{2}^{4} (g(x) - f(x)) \\, dx \\).\n\n#### Simplification Using Given Information\nWe are provided two specific integral values:\n1. \\( \\int_{0}^{4} [f(x) - g(x)] \\, dx = 10 \\)\n2. \\( \\int_{2}^{4} [g(x) - f(x)] \\, dx = 5 \\)\n\nFrom the first integral, \\( \\int_{0}^{4} [f(x) - g(x)] \\, dx = 10 \\):\nThis integral can be split into two parts:\n\\[ \\int_{0}^{2} [f(x) - g(x)] \\, dx + \\int_{2}^{4} [f(x) - g(x)] \\, dx = 10 \\]\n\nGiven the second integral, \\( \\int_{2}^{4} [g(x) - f(x)] \\, dx = 5 \\):\n\\[ \\int_{2}^{4} [f(x) - g(x)] \\, dx = -\\int_{2}^{4} [g(x) - f(x)] \\, dx = -5 \\]\n\nPutting it all together:\n\\[ \\int_{0}^{2} [f(x) - g(x)] \\, dx + \\int_{2}^{4} [f(x) - g(x)] \\, dx = 10 \\]\n\\[ \\int_{0}^{2} [f(x) - g(x)] \\, dx - 5 = 10 \\]\n\nSolving for the required integral:\n\\[ \\int_{0}^{2} [f(x) - g(x)] \\, dx = 10 + 5 \\]\n\\[ \\int_{0}^{2} [f(x) - g(x)] \\, dx = 15 \\]\n\n### Conclusion\nBased on the calculations, the area between the curves \\( y = f(x) \\) and \\( y = g(x) \\) from \\( 0 < x < 2 \\) is 15.\n\nThus, the correct answer is (c) 15.

15

\n\n\nProvide the final answer.\n\n[Question] {question}\n[Answer] \n" 20 | } 21 | ] -------------------------------------------------------------------------------- /promptwizard/glue/common/utils/__init__.py: -------------------------------------------------------------------------------- 1 | # --------------------------------------------------------- 2 | # Copyright (c) Microsoft Corporation. All rights reserved. 3 | # --------------------------------------------------------- 4 | """ 5 | Vellm common package. This would be imported in almost all the vellm packages. Consider this package as parent 6 | root-node for all Vellm related packages. ./utils folder will have all common utils. 7 | """ 8 | -------------------------------------------------------------------------------- /promptwizard/glue/common/utils/download.py: -------------------------------------------------------------------------------- 1 | import os 2 | import requests 3 | 4 | from pathlib import Path 5 | from urllib.parse import urlparse 6 | from glue.common.constants.str_literals import DirNames 7 | from glue.common.utils.logging import get_glue_logger 8 | 9 | logger = get_glue_logger(__name__) 10 | 11 | def download_model(url): 12 | cwd = os.getcwd() 13 | dirs = Path(cwd).parts 14 | idx = 0 15 | if DirNames.PACKAGE_BASE_DIR in dirs: 16 | idx = dir.index(DirNames.PACKAGE_BASE_DIR) 17 | download_path = os.path.join(*dir[:idx+1], DirNames.MODEL_DIR) 18 | os.makedirs(download_path, exist_ok=True) 19 | 20 | parsed_url = urlparse(url) 21 | model_filename = os.path.basename(parsed_url.path) 22 | 23 | model_path = os.path.join(download_path, model_filename) 24 | if not os.path.exists(model_path): 25 | r = requests.get(url, stream=True) 26 | if r.ok: 27 | with os.open(model_path, 'wb') as f: 28 | for chunk in r.iter_content(chunk_size=1024 * 8): 29 | if chunk: 30 | f.write(chunk) 31 | f.flush() 32 | 33 | return model_path 34 | 35 | 36 | -------------------------------------------------------------------------------- /promptwizard/glue/common/utils/file.py: -------------------------------------------------------------------------------- 1 | import json 2 | from os.path import join 3 | from typing import Dict, List 4 | import yaml 5 | 6 | from ..exceptions import GlueValidaionException 7 | 8 | 9 | def yaml_to_dict(file_path: str) -> Dict: 10 | with open(file_path) as yaml_file: 11 | yaml_string = yaml_file.read() 12 | 13 | try: 14 | # convert yaml string to dict 15 | parsed_dict = yaml.safe_load(yaml_string) 16 | except yaml.scanner.ScannerError as e: 17 | raise GlueValidaionException(f"There could be some syntax error in yaml written in {file_path}", e) 18 | 19 | return parsed_dict 20 | 21 | 22 | def yaml_to_class(yaml_file_path: str, cls: type, default_yaml_file_path: str = None): 23 | """ 24 | Read yaml file present at path `yaml_file_path`, convert it to dictionary using pyyaml's standard methods. 25 | Then convert this dictionary to class object of class given as `cls`. Further check if user has provided all 26 | the required fields in `yaml_file_path`. Fields that are missing in `yaml_file_path`, set them with defaults. 27 | 28 | :param yaml_file_path: str 29 | :param cls: type 30 | :param default_yaml_file_path: str 31 | :return: 32 | """ 33 | if not yaml_file_path: 34 | yaml_file_path = default_yaml_file_path 35 | custom_args = yaml_to_dict(yaml_file_path) 36 | 37 | if default_yaml_file_path: 38 | # If user has not provided all the required arguments, fill them with defaults 39 | default_args = yaml_to_dict(default_yaml_file_path) 40 | missing_args = set(default_args) - set(custom_args) 41 | for key in list(missing_args): 42 | custom_args[key] = default_args[key] 43 | 44 | try: 45 | yaml_as_class = cls(**custom_args) 46 | except TypeError as e: 47 | raise GlueValidaionException(f"Exception while converting yaml file at {yaml_file_path} " 48 | f"to class {cls.__name__}: ", e) 49 | 50 | return yaml_as_class 51 | 52 | 53 | def read_jsonl(file_path: str) -> List: 54 | """ 55 | This function should be used when size of jsonl file is not too big. 56 | 57 | :param file_path: 58 | :return: All json strings in .jsonl file as a list 59 | """ 60 | jsonl_list = [] 61 | with open(file_path, "r") as fileobj: 62 | while True: 63 | single_row = fileobj.readline() 64 | if not single_row: 65 | break 66 | 67 | json_object = json.loads(single_row.strip()) 68 | jsonl_list.append(json_object) 69 | return jsonl_list 70 | 71 | 72 | def read_jsonl_row(file_path: str): 73 | """ 74 | 75 | :param file_path: 76 | :return: Single line from the file. One at a time. 77 | """ 78 | with open(file_path, "r") as fileobj: 79 | while True: 80 | try: 81 | single_row = fileobj.readline() 82 | if not single_row: 83 | break 84 | 85 | json_object = json.loads(single_row.strip()) 86 | yield json_object 87 | except json.JSONDecodeError as e: 88 | print(f"Error while reading jsonl file at {file_path}. Error: {e}") 89 | continue 90 | 91 | 92 | def append_as_jsonl(file_path: str, args_to_log: Dict): 93 | """ 94 | 95 | :param file_path: 96 | :param args_to_log: 97 | :return: 98 | """ 99 | json_str = json.dumps(args_to_log, default=str) 100 | with open(file_path, "a") as fileobj: 101 | fileobj.write(json_str+"\n") 102 | 103 | 104 | def save_jsonlist(file_path: str, json_list: List, mode: str = "a"): 105 | """ 106 | :param json_list: List of json objects 107 | :param file_path: File location to which we shall save content of json_list list, in jsonl format. 108 | :param mode: Write mode 109 | :return: None 110 | """ 111 | with open(file_path, mode) as file_obj: 112 | for json_obj in json_list: 113 | json_str = json.dumps(json_obj, default=str) 114 | file_obj.write(json_str+"\n") 115 | 116 | 117 | def str_list_to_dir_path(str_list: List[str]) -> str: 118 | """ 119 | Return a string which is directory path formed out of concatenating given strings in list `str_list` 120 | 121 | e.g. 122 | str_list=["dir_1", "sub_dir_1"] 123 | return "dir_1\sub_dir_1" 124 | """ 125 | if not str_list: 126 | return "" 127 | 128 | path = "" 129 | for dir_name in str_list: 130 | path = join(path, dir_name) 131 | return path 132 | -------------------------------------------------------------------------------- /promptwizard/glue/common/utils/logging.py: -------------------------------------------------------------------------------- 1 | import logging 2 | from os import makedirs 3 | from os.path import join 4 | from logging.handlers import TimedRotatingFileHandler 5 | 6 | from ..constants.str_literals import FileConstants 7 | 8 | logging_handlers_list = [] 9 | 10 | 11 | def set_logging_config(log_dirpath: str, mode: str = "offline") -> None: 12 | """ 13 | This logger should be used when we are running online production scenario 14 | 15 | :param log_dirpath: Path to directory where logg files should be saved. 16 | :param mode: Specifies whether the mode is `online or `offline` 17 | :return: 18 | """ 19 | global logging_handlers_list 20 | makedirs(log_dirpath, exist_ok=True) 21 | logging.basicConfig(filename=join(log_dirpath, FileConstants.logfile_name), 22 | filemode='a', 23 | format=u"%(asctime)s.%(msecs)03d | %(name)-12s | %(funcName)s:\n%(message)s\n", 24 | datefmt='%Y-%m-%d,%H:%M:%S', 25 | level=logging.NOTSET, 26 | force=True, 27 | encoding="utf-8") 28 | 29 | if mode == "online": 30 | daily_split_handler = TimedRotatingFileHandler(FileConstants.logfile_prefix, when="midnight", backupCount=30, encoding="utf-8") 31 | daily_split_handler.suffix = "%Y%m%d" 32 | logging_handlers_list = [daily_split_handler] 33 | else: 34 | console = logging.StreamHandler() 35 | console.setLevel(logging.NOTSET) 36 | logging_handlers_list = [console] 37 | 38 | 39 | def get_glue_logger(module_name: str) -> logging.Logger: 40 | """ 41 | Method to get common logger object for module. 42 | 43 | :param module_name: Name of the module. 44 | :return: Logger object, which can be used for logging 45 | """ 46 | global logging_handlers_list 47 | 48 | logger = logging.getLogger(module_name) 49 | for handler in logging_handlers_list: 50 | logger.addHandler(handler) 51 | # TODO: Add handler to log to app insights if Azure connection is ON 52 | 53 | return logger 54 | 55 | -------------------------------------------------------------------------------- /promptwizard/glue/common/utils/runtime_tasks.py: -------------------------------------------------------------------------------- 1 | from importlib import import_module 2 | from importlib.metadata import distribution, PackageNotFoundError 3 | import os 4 | from importlib.util import module_from_spec, spec_from_file_location 5 | 6 | from os.path import basename, splitext 7 | import subprocess 8 | import sys 9 | 10 | from ..constants.log_strings import CommonLogsStr 11 | from ..exceptions import GlueValidaionException 12 | from ..utils.logging import get_glue_logger 13 | 14 | logger = get_glue_logger(__name__) 15 | 16 | 17 | def install_lib_if_missing(lib_name, find_links = None) -> bool: 18 | """ 19 | Check if library with name `lib_name` is installed in environment. If not, install it in runtime. 20 | 21 | :param lib_name: Name of library 22 | :return: True if library was installed. False if it was not initially installed and was installed now. 23 | """ 24 | try: 25 | version = None 26 | if "==" in lib_name: 27 | lib_name, version = lib_name.split("==") 28 | distri_obj = distribution(lib_name) 29 | # if version and distri_obj.version != version: 30 | # raise GlueValidaionException(f"{lib_name} with version={distri_obj.version} is found. " 31 | # f"But version needed is {version}", None) 32 | return True 33 | except (PackageNotFoundError, GlueValidaionException): 34 | logger.info(CommonLogsStr.INSTALL_MISSING_LIB.format(lib_name=lib_name)) 35 | with open(os.devnull, 'w') as devnull: 36 | if find_links: 37 | subprocess.check_call([sys.executable, "-m", "pip", "install", lib_name, "-f", find_links], stdout=devnull, stderr=devnull) 38 | else: 39 | subprocess.check_call([sys.executable, "-m", "pip", "install", lib_name], stdout=devnull, stderr=devnull) 40 | 41 | return False 42 | 43 | 44 | def str_to_class(class_name: str, import_path: str = None, file_path: str = None): 45 | """ 46 | For a given `class_name` in string format, return the class instance (not object). 47 | You need to specify any one of the 2: import_path or file_path. When both are specified `import_path` takes 48 | precedence. 49 | 50 | :param class_name: Class name, specified as string e.g. CSVReader 51 | :param import_path: Import path for the specified class_name e.g. llama_index.readers.file 52 | :param file_path: Path to the file where this class is present. e.g. C:\\dir1\\sub_dir1\\filename.py 53 | :return: Class 54 | """ 55 | 56 | if import_path: 57 | cls = getattr(import_module(import_path), class_name) 58 | elif file_path: 59 | file_name_without_extsn = splitext(basename(file_path))[0] 60 | spec = spec_from_file_location(file_name_without_extsn, file_path) 61 | module = module_from_spec(spec) 62 | spec.loader.exec_module(module) 63 | cls = getattr(module, class_name) 64 | else: 65 | cls = getattr(sys.modules[__name__], class_name) 66 | 67 | return cls 68 | -------------------------------------------------------------------------------- /promptwizard/glue/paramlogger/__init__.py: -------------------------------------------------------------------------------- 1 | __path__ = __import__('pkgutil').extend_path(__path__, __name__) 2 | 3 | from collections import defaultdict 4 | from datetime import datetime 5 | from os import makedirs 6 | from os.path import basename, join 7 | from uuid import uuid4 8 | 9 | from . import file_utils as futil 10 | from .constants import LogLiterals 11 | from .utils import run_method_get_io_dict 12 | 13 | 14 | class ParamLogger: 15 | def __init__(self, base_path: str = ""): 16 | """ 17 | :param base_path: Path where all log files would be saved 18 | """ 19 | self.BASE_PATH = base_path 20 | if base_path: 21 | makedirs(self.BASE_PATH, exist_ok=True) 22 | 23 | # Unique `id` for a sample in dataset 24 | self.SAMPLE_UNQ_ID = None 25 | 26 | # This dictionary can be used, when we want to log output and input of multiple components as a single jsonl 27 | self.CHAINED_LOG = [] 28 | 29 | # When using ParamLogger decorator over a method in a class, should we avoid logging arguement with name `self` 30 | self.DEL_SELF_ARG = True 31 | 32 | def reset_eval_glue(self, base_path): 33 | # Path where all log files would be saved 34 | self.BASE_PATH = base_path 35 | makedirs(self.BASE_PATH, exist_ok=True) 36 | 37 | # Unique `id` for a sample in dataset 38 | self.SAMPLE_UNQ_ID = None 39 | 40 | # This dictionary can be used, when we want to log output and input of multiple components as a single jsonl 41 | self.CHAINED_LOG = [] 42 | 43 | def clear_chained_log(self): 44 | """ 45 | Deletes all previously saved data. Re-initialize CHAINED_LOG with new meta data. 46 | """ 47 | self.CHAINED_LOG = [] 48 | 49 | def dump_chained_log_to_file(self, file_name="chained_logs"): 50 | """ 51 | Append to file all data collected in CHAINED_LOG as json line. 52 | Resets CHAINED_LOG to new instance 53 | """ 54 | 55 | file_path = join(self.BASE_PATH, file_name + ".jsonl") 56 | futil.save_jsonlist(file_path=file_path, json_list=self.CHAINED_LOG) 57 | self.clear_chained_log() 58 | 59 | def append_dict_to_chained_logs(self, args_to_log): 60 | self.CHAINED_LOG.append(args_to_log) 61 | 62 | def append_to_chained_log(self, method_obj): 63 | """ 64 | Execute the method referenced by method_obj. After executing, append the jsonl form of inputs and outputs of 65 | that method to self.CHAINED_LOG list. 66 | 67 | :param method_obj: 68 | :return: None 69 | """ 70 | def wrap(*argv, **kwargs): 71 | args_to_log = run_method_get_io_dict(method_obj, self.DEL_SELF_ARG, *argv, **kwargs) 72 | args_to_log[LogLiterals.META][LogLiterals.METHOD_NAME] = method_obj.__name__ 73 | self.CHAINED_LOG.append(args_to_log) 74 | return args_to_log[LogLiterals.OUTPUTS] 75 | return wrap 76 | 77 | def log_io_params(self, method_obj, file_name="io_logs"): 78 | """ 79 | Execute the method referenced by method_obj. After executing, log the inputs and outputs of that method to 80 | log file. 81 | 82 | :param method_obj: Method reference, that can be executed 83 | :param file_name: Name of file in which we shall be logging the input output params of method 84 | :return: None 85 | """ 86 | def wrap(*argv, **kwargs): 87 | args_to_log = run_method_get_io_dict(method_obj, self.DEL_SELF_ARG, *argv, **kwargs) 88 | if not self.SAMPLE_UNQ_ID: 89 | self.SAMPLE_UNQ_ID = uuid4() 90 | args_to_log[LogLiterals.ID] = self.SAMPLE_UNQ_ID 91 | args_to_log[LogLiterals.META][LogLiterals.METHOD_NAME] = method_obj.__name__ 92 | file_path = join(self.BASE_PATH, file_name + ".jsonl") 93 | futil.append_as_jsonl(file_path=file_path, args_to_log=args_to_log) 94 | self.SAMPLE_UNQ_ID = None 95 | return args_to_log[LogLiterals.OUTPUTS] 96 | return wrap 97 | 98 | def log_io_params_for_method(self, method_obj): 99 | """ 100 | Execute the method referenced by method_obj. After executing, log the inputs and outputs of that method to 101 | log file. Name of log file would be the method name 102 | 103 | :param method_obj: Method reference, that can be executed 104 | :return: None 105 | """ 106 | def wrap(*argv, **kwargs): 107 | args_to_log = run_method_get_io_dict(method_obj, self.DEL_SELF_ARG, *argv, **kwargs) 108 | if not self.SAMPLE_UNQ_ID: 109 | self.SAMPLE_UNQ_ID = uuid4() 110 | args_to_log[LogLiterals.ID] = self.SAMPLE_UNQ_ID 111 | file_path = join(self.BASE_PATH, method_obj.__name__+".jsonl") 112 | futil.append_as_jsonl(file_path=file_path, args_to_log=args_to_log) 113 | self.SAMPLE_UNQ_ID = None 114 | return args_to_log[LogLiterals.OUTPUTS] 115 | return wrap 116 | 117 | def run_over_logs(self, method_obj): 118 | """ 119 | Run the method referenced by method_obj over each entry in jsonl file present at location `file_path`. 120 | `id`, `inputs`, `outputs` fields in jsonl file at `file_path` can be accessed via dummy_id, dummy_input, 121 | dummy_output parameters respectively. 122 | 123 | :param method_obj: 124 | :return: None 125 | """ 126 | def wrap(file_path, dummy_id, dummy_input, dummy_output, dummy_meta, **kwargs): 127 | eval_file_path = join(self.BASE_PATH, method_obj.__name__ + "_" + basename(file_path)) 128 | args_to_log = defaultdict(dict) 129 | 130 | for json_obj in futil.read_jsonl_row(file_path): 131 | eval_result = method_obj(None, 132 | json_obj[LogLiterals.ID], 133 | json_obj[LogLiterals.INPUTS], 134 | json_obj[LogLiterals.OUTPUTS], 135 | json_obj[LogLiterals.META], 136 | **kwargs) 137 | args_to_log[LogLiterals.ID] = json_obj[LogLiterals.ID] 138 | args_to_log[LogLiterals.EVAL_RESULT] = eval_result 139 | args_to_log[LogLiterals.META][LogLiterals.TIMESTAMP] = datetime.now() 140 | futil.append_as_jsonl(file_path=eval_file_path, args_to_log=args_to_log) 141 | return wrap 142 | -------------------------------------------------------------------------------- /promptwizard/glue/paramlogger/constants.py: -------------------------------------------------------------------------------- 1 | class LogLiterals: 2 | # static variables 3 | INPUTS = "inputs" 4 | OUTPUTS = "outputs" 5 | META = "meta" 6 | ID = "id" 7 | TIMESTAMP = "timestamp" 8 | EXEC_SEC = "execution_time_sec" 9 | EVAL_RESULT = "eval_result" 10 | METHOD_NAME = "method_name" 11 | DIR_NAME = "io_logs" 12 | -------------------------------------------------------------------------------- /promptwizard/glue/paramlogger/file_utils.py: -------------------------------------------------------------------------------- 1 | import json 2 | from os.path import join 3 | from typing import Dict, List 4 | 5 | 6 | def read_jsonl(file_path: str) -> List: 7 | """ 8 | This function should be used when size of jsonl file is not too big. 9 | 10 | :param file_path: 11 | :return: All json strings in .jsonl file as a list 12 | """ 13 | jsonl_list = [] 14 | with open(file_path, "r") as fileobj: 15 | while True: 16 | single_row = fileobj.readline() 17 | if not single_row: 18 | break 19 | 20 | json_object = json.loads(single_row.strip()) 21 | jsonl_list.append(json_object) 22 | return jsonl_list 23 | 24 | 25 | def read_jsonl_row(file_path: str): 26 | """ 27 | 28 | :param file_path: 29 | :return: Single line from the file. One at a time. 30 | """ 31 | with open(file_path, "r") as fileobj: 32 | while True: 33 | single_row = fileobj.readline() 34 | if not single_row: 35 | break 36 | 37 | json_object = json.loads(single_row.strip()) 38 | yield json_object 39 | 40 | 41 | def append_as_jsonl(file_path: str, args_to_log: Dict): 42 | """ 43 | 44 | :param file_path: 45 | :param args_to_log: 46 | :return: 47 | """ 48 | json_str = json.dumps(args_to_log, default=str) 49 | with open(file_path, "a") as fileobj: 50 | fileobj.write(json_str+"\n") 51 | 52 | 53 | def save_jsonlist(file_path: str, json_list: List, mode: str = "a"): 54 | """ 55 | :param json_list: List of json objects 56 | :param file_path: File location to which we shall save content of json_list list, in jsonl format. 57 | :param mode: Write mode 58 | :return: None 59 | """ 60 | with open(file_path, mode) as file_obj: 61 | for json_obj in json_list: 62 | json_str = json.dumps(json_obj, default=str) 63 | file_obj.write(json_str+"\n") 64 | 65 | 66 | def str_list_to_dir_path(str_list: List[str]) -> str: 67 | """ 68 | Return a string which is directory path formed out of concatenating given strings in list `str_list` 69 | 70 | e.g. 71 | str_list=["dir_1", "sub_dir_1"] 72 | return "dir_1\sub_dir_1" 73 | """ 74 | if not str_list: 75 | return "" 76 | 77 | path = "" 78 | for dir_name in str_list: 79 | path = join(path, dir_name) 80 | return path 81 | -------------------------------------------------------------------------------- /promptwizard/glue/paramlogger/utils.py: -------------------------------------------------------------------------------- 1 | from collections import defaultdict 2 | from datetime import datetime 3 | from inspect import getfullargspec 4 | from time import time 5 | from typing import Dict, Hashable 6 | 7 | from .constants import LogLiterals 8 | 9 | 10 | def run_method_get_io_dict(method_obj, del_self_arg: bool, *argv, **kwargs) -> Dict: 11 | """ 12 | Run method method_obj with *argv as arguments. 13 | Create dictionary of all input/ output and other meta data elements to be eventually logged to file. 14 | 15 | :param method_obj: method reference 16 | :param del_self_arg: True if we shouldn't include `self` variable in output dictionary 17 | :param argv: Arguments that needs to be passed to method as *argv 18 | :param kwargs: Arguments that needs to be passed to method as **kwargs 19 | 20 | :return: Dict that has inputs, outputs and meta data to be logged 21 | """ 22 | args_to_log = defaultdict(dict) 23 | 24 | start_time = time() 25 | output = method_obj(*argv, **kwargs) 26 | execution_time = time() - start_time 27 | 28 | # get name of input parameters of method method_obj 29 | arg_spec = getfullargspec(method_obj) 30 | arg_names = arg_spec.args 31 | argv_list = list(argv) 32 | 33 | # Capture all *argv values 34 | for arg_name, arg_val in zip(arg_names[:len(argv_list)], argv_list): 35 | if isinstance(arg_val, Hashable) and not (del_self_arg and arg_name == "self"): 36 | args_to_log[LogLiterals.INPUTS][arg_name] = str(arg_val) 37 | 38 | # Capture all **kwargs values 39 | args_to_log[LogLiterals.INPUTS].update(kwargs) 40 | 41 | if arg_spec.defaults: 42 | default_arg_values = list(arg_spec.defaults) 43 | # For args that don't have any value, set defaults 44 | arg_with_no_values_count = len(arg_names) - (len(argv_list) + len(kwargs)) 45 | # Number of arguments for which defaults should be used 46 | defaults_count = min(arg_with_no_values_count, len(default_arg_values)) 47 | 48 | # Arguments for which values are not passed but defaults are specified, use defaults 49 | for arg_names, arg_val in zip(arg_names[-defaults_count:], default_arg_values[-defaults_count:]): 50 | if isinstance(arg_val, Hashable): 51 | args_to_log[LogLiterals.INPUTS][arg_name] = str(arg_val) 52 | 53 | args_to_log[LogLiterals.OUTPUTS] = output 54 | args_to_log[LogLiterals.META][LogLiterals.EXEC_SEC] = execution_time 55 | args_to_log[LogLiterals.META][LogLiterals.TIMESTAMP] = datetime.now() 56 | 57 | return args_to_log 58 | -------------------------------------------------------------------------------- /promptwizard/glue/promptopt/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/PromptWizard/a018fc1bc83b5f2a97e97fd8cb4d9d12378a60e9/promptwizard/glue/promptopt/__init__.py -------------------------------------------------------------------------------- /promptwizard/glue/promptopt/constants.py: -------------------------------------------------------------------------------- 1 | from dataclasses import dataclass 2 | from enum import Enum 3 | 4 | from ..common.base_classes import UniversalBaseClass 5 | 6 | 7 | # Set of Prompt Management Techniques supported by Vellm co-pilot 8 | # Hyperparameters defined in promptopt_config.yaml 9 | class SupportedPromptOpt(Enum): 10 | CRITIQUE_N_REFINE = "critique_n_refine" 11 | 12 | @classmethod 13 | def all_values(cls): 14 | return ",".join([member.value for member in SupportedPromptOpt]) 15 | 16 | @classmethod 17 | def has_value(cls, value): 18 | return value in cls._value2member_map_ 19 | 20 | 21 | @dataclass 22 | class PromptOptimizationLiterals: 23 | PROMPT_TECHNIQUE_NAME = "prompt_technique_name" 24 | 25 | 26 | @dataclass 27 | class PromptOptimizationParams(UniversalBaseClass): 28 | """ 29 | Parent class for all Prompt Optimization classes. 30 | """ 31 | prompt_technique_name: str 32 | 33 | 34 | @dataclass 35 | class PromptPool(UniversalBaseClass): 36 | """ 37 | Parent class for all classes that handle prompt strings for each techniques. 38 | """ 39 | system_prompt: str 40 | final_prompt: str 41 | eval_prompt: str 42 | -------------------------------------------------------------------------------- /promptwizard/glue/promptopt/instantiate.py: -------------------------------------------------------------------------------- 1 | from os.path import dirname, join 2 | import pickle 3 | import time 4 | from typing import Any 5 | 6 | from ..common.base_classes import LLMConfig, SetupConfig 7 | from ..common.constants.log_strings import CommonLogsStr 8 | from ..common.llm.llm_mgr import LLMMgr 9 | from ..common.utils.logging import get_glue_logger, set_logging_config 10 | from ..common.utils.file import read_jsonl, yaml_to_class, yaml_to_dict, read_jsonl_row 11 | from ..paramlogger import ParamLogger 12 | from ..promptopt.constants import PromptOptimizationLiterals 13 | from ..promptopt.techniques.common_logic import DatasetSpecificProcessing 14 | from ..promptopt.utils import get_promptopt_class 15 | 16 | 17 | class GluePromptOpt: 18 | """ 19 | This class is trigger point for any prompt optimization method. Different prompt optimization techniques are 20 | represented by different classes. This class collates all the user configs present in different yaml files and 21 | other boilerplate code. Any of supported prompt optimization techniques can be triggered by this class. 22 | """ 23 | BEST_PROMPT = None 24 | EXPERT_PROFILE = None 25 | data_processor = None 26 | iolog = ParamLogger() 27 | 28 | class EvalLiterals: 29 | IS_CORRECT = "is_correct" 30 | PREDICTED_ANS = "predicted_ans" 31 | LLM_OUTPUT = "llm_output" 32 | 33 | def __init__(self, 34 | prompt_config_path: str, 35 | setup_config_path: str, 36 | dataset_jsonl: str, 37 | data_processor: DatasetSpecificProcessing, 38 | dataset_processor_pkl_path: str = None, 39 | prompt_pool_path: str = None): 40 | """ 41 | Collates all the configs present in different yaml files. Initialize logger, de-serialize pickle file that has 42 | class/method for dataset processing (for given dataset). 43 | 44 | :param llm_config_path: Path to yaml file that has LLM related configs. 45 | :param prompt_config_path: Path to yaml file that has prompt templates for the given techniques. 46 | :param setup_config_path: Path to yaml file that has user preferences. 47 | :param dataset_jsonl: Path to jsonl file that has dataset present in jsonl format. 48 | :param data_processor: object of DatasetSpecificProcessing class, which has data handling methods which are 49 | specific to that dataset 50 | :param dataset_processor_pkl_path: Path to pickle file that has object of class DatasetSpecificProcessing 51 | serialized. 52 | :param prompt_pool_path: Path to yaml file that has prompts 53 | """ 54 | if dataset_jsonl != None: 55 | if data_processor: 56 | self.data_processor = data_processor 57 | else: 58 | with open(dataset_processor_pkl_path, "rb") as file: 59 | self.data_processor = pickle.load(file) # datatype: class DatasetSpecificProcessing 60 | 61 | prompt_config_dict = yaml_to_dict(prompt_config_path) 62 | prompt_opt_cls, prompt_opt_hyperparam_cls, promptpool_cls = get_promptopt_class( 63 | prompt_config_dict[PromptOptimizationLiterals.PROMPT_TECHNIQUE_NAME]) 64 | 65 | self.setup_config = yaml_to_class(setup_config_path, SetupConfig) 66 | self.prompt_opt_param = yaml_to_class(prompt_config_path, prompt_opt_hyperparam_cls) 67 | current_dir = dirname(__file__) 68 | default_yaml_path = join(current_dir, 69 | "techniques", 70 | prompt_config_dict[PromptOptimizationLiterals.PROMPT_TECHNIQUE_NAME], 71 | "prompt_pool.yaml") 72 | 73 | self.prompt_pool = yaml_to_class(prompt_pool_path, promptpool_cls, default_yaml_path) 74 | 75 | if dataset_jsonl != None: 76 | dataset = read_jsonl(dataset_jsonl) 77 | self.prompt_opt_param.answer_format += self.prompt_pool.ans_delimiter_instruction 78 | base_path = join(self.setup_config.dir_info.base_dir, self.setup_config.experiment_name) 79 | set_logging_config(join(base_path, self.setup_config.dir_info.log_dir_name), 80 | self.setup_config.mode) 81 | self.logger = get_glue_logger(__name__) 82 | 83 | if dataset_jsonl != None: 84 | if len(dataset) < self.prompt_opt_param.seen_set_size: 85 | self.prompt_opt_param.seen_set_size = len(dataset) 86 | self.logger.info(f"Dataset has {len(dataset)} samples. However values for seen_set_size is " 87 | f"{self.prompt_opt_param.seen_set_size}. Hence resetting seen_set_size" 88 | f" to {len(dataset)}") 89 | 90 | if self.prompt_opt_param.few_shot_count > self.prompt_opt_param.seen_set_size: 91 | self.prompt_opt_param.few_shot_count = self.prompt_opt_param.seen_set_size 92 | self.logger.info(f"Value set for few_shot_count is {self.prompt_opt_param.few_shot_count}. " 93 | f"However values for seen_set_size is {self.prompt_opt_param.seen_set_size}. " 94 | f"Hence resetting few_shot_count to {self.prompt_opt_param.few_shot_count}") 95 | 96 | if dataset_jsonl != None: 97 | training_dataset = dataset[:self.prompt_opt_param.seen_set_size] 98 | else: 99 | training_dataset = None 100 | self.logger.info(f"Setup configurations parameters: {self.setup_config} \n{CommonLogsStr.LOG_SEPERATOR}") 101 | self.logger.info(f"Prompt Optimization parameters: {self.prompt_opt_param} \n{CommonLogsStr.LOG_SEPERATOR}") 102 | 103 | # This iolog is going to be used when doing complete evaluation over test-dataset 104 | self.iolog.reset_eval_glue(join(base_path, "evaluation")) 105 | 106 | self.prompt_opt = prompt_opt_cls(training_dataset, base_path, self.setup_config, 107 | self.prompt_pool, self.data_processor, self.logger) 108 | 109 | def get_best_prompt(self,use_examples=False,run_without_train_examples=False,generate_synthetic_examples=False,resolve_tie_criteria="max") -> (str, Any): 110 | """ 111 | Call get_best_prompt() method of class PromptOptimizer & return its value. 112 | :return: (best_prompt, expert_profile) 113 | best_prompt-> Best prompt for a given task description 114 | expert_profile-> Description of an expert who is apt to solve the task at hand. LLM would be asked to take 115 | identity of described in expert_profile. 116 | """ 117 | start_time = time.time() 118 | self.BEST_PROMPT, self.EXPERT_PROFILE = self.prompt_opt.get_best_prompt(self.prompt_opt_param,use_examples=use_examples,run_without_train_examples=run_without_train_examples,generate_synthetic_examples=generate_synthetic_examples,resolve_tie_criteria=resolve_tie_criteria) 119 | 120 | self.logger.info(f"Time taken to find best prompt: {(time.time() - start_time)} sec") 121 | return self.BEST_PROMPT, self.EXPERT_PROFILE 122 | 123 | def evaluate(self, test_dataset_jsonl: str) -> float: 124 | """ 125 | Evaluate the performance of self.BEST_PROMPT over test dataset. Return the accuracy. 126 | 127 | :param test_dataset_jsonl: Path to jsonl file that has test dataset 128 | :return: Percentage accuracy 129 | """ 130 | 131 | start_time = time.time() 132 | self.logger.info(f"Evaluation started {CommonLogsStr.LOG_SEPERATOR}") 133 | if not self.BEST_PROMPT: 134 | self.logger.error("BEST_PROMPT attribute is not set. Please set self.BEST_PROMPT attribute of this object, " 135 | "either manually or by calling get_best_prompt() method.") 136 | return 137 | 138 | total_correct = 0 139 | total_count = 0 140 | for json_obj in read_jsonl_row(test_dataset_jsonl): 141 | answer = self.predict_and_access(json_obj[DatasetSpecificProcessing.QUESTION_LITERAL], 142 | json_obj[DatasetSpecificProcessing.FINAL_ANSWER_LITERAL]) 143 | 144 | total_correct += answer[self.EvalLiterals.IS_CORRECT] 145 | total_count += 1 146 | result = {"accuracy": f"{total_correct}/{total_count} : {total_correct/total_count*100.0}%", 147 | "predicted": answer[self.EvalLiterals.PREDICTED_ANS], 148 | "actual": json_obj[DatasetSpecificProcessing.FINAL_ANSWER_LITERAL]} 149 | self.iolog.append_dict_to_chained_logs(result) 150 | self.logger.info(result) 151 | 152 | self.iolog.dump_chained_log_to_file(file_name=f"eval_result_{self.setup_config.experiment_name}") 153 | self.logger.info(f"Time taken for evaluation: {(time.time() - start_time)} sec") 154 | return total_correct / total_count 155 | 156 | @iolog.log_io_params 157 | def predict_and_access(self, question: str, gt_answer: str) -> (bool, str, str): 158 | """ 159 | For the given input question, get answer to it from LLM, using the BEST_PROMPT & EXPERT_PROFILE 160 | computes earlier. 161 | 162 | :param question: Question to be asked to LLM, to solve 163 | :param gt_answer: Ground truth, final answer. 164 | :return: (is_correct, predicted_ans, llm_output) 165 | is_correct -> Tells if prediction by LLM was correct. 166 | predicted_ans -> is the actual predicted answer by LLM. 167 | llm_output -> Output text generated by LLM for the given question 168 | :rtype: (bool, str, str) 169 | """ 170 | final_prompt = self.prompt_pool.eval_prompt.format(instruction=self.BEST_PROMPT, 171 | question=question) 172 | llm_output = self.prompt_opt.chat_completion(user_prompt=final_prompt, system_prompt=self.EXPERT_PROFILE) 173 | 174 | is_correct, predicted_ans = self.data_processor.access_answer(llm_output, gt_answer) 175 | return {self.EvalLiterals.IS_CORRECT: is_correct, 176 | self.EvalLiterals.PREDICTED_ANS: predicted_ans, 177 | self.EvalLiterals.LLM_OUTPUT: llm_output} 178 | -------------------------------------------------------------------------------- /promptwizard/glue/promptopt/runner.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | from glue.promptopt.instantiate import GluePromptOpt 3 | 4 | if __name__ == '__main__': 5 | parser = argparse.ArgumentParser(description="Arguments needed by prompt manager") 6 | parser.add_argument('--llm_config_path', default=None) 7 | parser.add_argument('--prompt_config_path', default=None) 8 | parser.add_argument('--setup_config_path', default=None) 9 | parser.add_argument('--train_file_name', default=None) 10 | parser.add_argument('--test_file_name', default=None) 11 | parser.add_argument('--dataset_processor_pkl_path', default=None) 12 | parser.add_argument('--prompt_pool_path', default=None) 13 | 14 | args = parser.parse_args() 15 | 16 | gp = GluePromptOpt(args.llm_config_path, 17 | args.promptopt_config_path, 18 | args.setup_config_path, 19 | args.train_file_name, 20 | args.dataset_processor_pkl_path, 21 | args.prompt_pool_path) 22 | 23 | best_prompt, expert_profile = gp.get_best_prompt() 24 | print(f"Best prompt: {best_prompt} \nExpert profile: {expert_profile}") 25 | 26 | if args.test_file_name: 27 | accuracy = gp.evaluate(args.test_file_name) 28 | print(f"accuracy: {accuracy}") 29 | 30 | -------------------------------------------------------------------------------- /promptwizard/glue/promptopt/techniques/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/PromptWizard/a018fc1bc83b5f2a97e97fd8cb4d9d12378a60e9/promptwizard/glue/promptopt/techniques/__init__.py -------------------------------------------------------------------------------- /promptwizard/glue/promptopt/techniques/common_logic.py: -------------------------------------------------------------------------------- 1 | from abc import abstractmethod, ABC 2 | from typing import Any, List 3 | 4 | from ..constants import PromptOptimizationParams 5 | 6 | 7 | class PromptOptimizer(ABC): 8 | """ 9 | Parent class for all prompt optimization techniques. 10 | """ 11 | TECHNIQUE_NAME = "" 12 | 13 | @abstractmethod 14 | def get_best_prompt(self, params: PromptOptimizationParams) -> (str, Any): 15 | """Method that will return best prompt for given task description, base instruction and few shot examples""" 16 | pass 17 | 18 | 19 | class DatasetSpecificProcessing(ABC): 20 | """ 21 | Prompt Optimizer is agnostic of dataset on which its run. There are few processing requirements that are specific 22 | to dataset. This class should be inherited class that user defines & its methods should be defined based on their 23 | dataset & use-case. 24 | """ 25 | QUESTION_LITERAL = "question" 26 | ANSWER_WITH_REASON_LITERAL = "answer" 27 | FINAL_ANSWER_LITERAL = "final_answer" 28 | QUESTION_KEY_IN_PROMPT = "[Question]" 29 | ANSWER_KEY_IN_PROMPT = "[Answer]" 30 | # Regular expression pattern to match text between and tags 31 | TEXT_DELIMITER_PATTERN = r"(?s)(?<=)(.*?)(?=)" 32 | TEXT_DELIMITER_PATTERN_MUTATION = r"(?s)(?<=)(.*?)(?=)" 33 | ANSWER_START = "" 34 | ANSWER_END = "" 35 | ANSWER_DELIMITER_PATTERN = r"(?s)(?<=" + ANSWER_START + ")(.*?)(?=" + ANSWER_END + ")" 36 | INVALID_ANS = "[invalid]" 37 | FINAL_PROMPT = None 38 | 39 | 40 | def normalize_prediction(self, prediction, lowercase=True): 41 | import re 42 | import string 43 | prediction = prediction.replace(' and ', ' ') 44 | prediction = prediction.replace('Sentence 1:', ' ') 45 | prediction = prediction.replace('Sentence 2:', ' ') 46 | prediction = prediction.strip() 47 | prediction = prediction.split("\n")[0] 48 | prediction = prediction.split(".")[0] 49 | 50 | if lowercase: 51 | prediction = prediction.lower() 52 | 53 | # remove punctuation 54 | prediction = prediction.replace('-', ' ') 55 | prediction = prediction.translate( 56 | str.maketrans('', '', string.punctuation)) 57 | 58 | return prediction 59 | def access_answer(self, llm_output: str, gt_answer: str) -> (bool, Any): 60 | """ 61 | Compare answer generated by model with the answer in ground truth. 62 | Return True if they are equal. Definition of `equal` depends on problem at hand. 63 | Here only the default implementation is provided. This method should be overridden & custom defined 64 | based on end use-case. 65 | 66 | :param llm_output: Output of LLM i.e. the predicted answer 67 | :param gt_answer: The expected ground truth answer 68 | """ 69 | 70 | predicted_answer = self.extract_final_answer(llm_output) 71 | is_correct = False 72 | if predicted_answer and (predicted_answer.lower() == gt_answer.lower()): 73 | is_correct = True 74 | 75 | return is_correct, predicted_answer 76 | 77 | 78 | 79 | def collate_to_str(self, examples: List, example_template: str) -> str: 80 | """ 81 | Take as input a list of examples. Populate common template with values in these examples. Concatenate all of 82 | them to a single string, which can then be passed to LLM as prompt. 83 | 84 | :param examples: List of examples 85 | :param example_template: A template of giving examples to LLM as part of few shot learning 86 | :return: Concatenated string of all examples over the template. 87 | """ 88 | example_string = "" 89 | for example in examples: 90 | answer = example[DatasetSpecificProcessing.FINAL_ANSWER_LITERAL] 91 | if DatasetSpecificProcessing.ANSWER_WITH_REASON_LITERAL in example: 92 | answer = example[DatasetSpecificProcessing.ANSWER_WITH_REASON_LITERAL] 93 | 94 | example_string += example_template.format(question=example[DatasetSpecificProcessing.QUESTION_LITERAL], 95 | answer=answer) 96 | return example_string 97 | 98 | def extract_final_answer(self, answer: str) -> str: 99 | """ 100 | Parse the output of LLM and extract the answer that you need from it. 101 | Here only the default implementation is provided. This method should be overridden & custom defined 102 | based on end use-case. 103 | 104 | :param answer: Output of LLM i.e. the response the to the question asked. 105 | :return: Final answer extracted from `answer` text, that we are looking for. 106 | """ 107 | 108 | return answer 109 | 110 | @abstractmethod 111 | def dataset_to_jsonl(self, dataset_jsonl: str, task: str, **kwargs: Any) -> None: 112 | """ 113 | Prompt optimizer needs data in jsonl format. And each json string should be as below 114 | { 115 | 'question': 'I had 3 books. I gave 2 books to Ram. How many books do I have now ?', 116 | 'reason': 'Number of books that I had initially=1. Number of books I have after giving 2 books to Ram=3-2=1.', 117 | 'answer': 1) 118 | } 119 | 120 | :param dataset_jsonl: Path of file in which jsonl data should be saved. 121 | :param **kwargs: List of other user defined input parameters. 122 | """ 123 | pass 124 | -------------------------------------------------------------------------------- /promptwizard/glue/promptopt/techniques/critique_n_refine/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/PromptWizard/a018fc1bc83b5f2a97e97fd8cb4d9d12378a60e9/promptwizard/glue/promptopt/techniques/critique_n_refine/__init__.py -------------------------------------------------------------------------------- /promptwizard/glue/promptopt/techniques/critique_n_refine/base_classes.py: -------------------------------------------------------------------------------- 1 | from dataclasses import dataclass 2 | from typing import List 3 | 4 | from ....common.base_classes import UniversalBaseClass 5 | from ...constants import PromptOptimizationParams, PromptPool 6 | 7 | 8 | @dataclass 9 | class CritiqueNRefinePromptPool(PromptPool): 10 | quest_reason_ans: str 11 | expert_profile: str 12 | ans_delimiter_instruction: str 13 | intent_template: str 14 | thinking_styles: List[str] 15 | meta_critique_template: str 16 | meta_positive_critique_template: str 17 | critique_refine_template: str 18 | solve_template: str 19 | examples_critique_template: str 20 | examples_optimization_template: str 21 | meta_sample_template: str 22 | intent_template: str 23 | expert_template: str 24 | generate_reason_template: str 25 | reason_optimization_template: str 26 | examples_critique_template_zero_shot: str 27 | 28 | 29 | @dataclass 30 | class CritiqueNRefineParams(PromptOptimizationParams, UniversalBaseClass): 31 | unique_model_id: str 32 | # Number of candidate prompts to generate in given iteration 33 | style_variation: int 34 | # Number of questions to be asked to LLM in a single go 35 | questions_batch_size: int 36 | # Number of batches of questions to correctly answered, for a prompt to be considered as performing good 37 | min_correct_count: int 38 | # Max number of mini-batches on which we should evaluate our prompt 39 | max_eval_batches: int 40 | # Number of top best performing prompts to be considered for next iterations 41 | top_n: int 42 | # Number of rounds of mutation to be performed when generating different styles 43 | mutation_rounds: int 44 | # Refine instruction post mutation 45 | refine_instruction: bool 46 | # Number of iterations for conducting rounds of mutation of task description 47 | # followed by refinement of instructions 48 | mutate_refine_iterations: int 49 | # Number of iterations for refining task description and in context examples for few-shot 50 | refine_task_eg_iterations: int 51 | # Description of task. This will be fed to prompt 52 | task_description: str 53 | # Base instruction, in line with your dataset. This will be fed to prompt 54 | base_instruction: str 55 | # Instruction for specifying answer format 56 | answer_format: str 57 | # Number of samples from dataset, set aside as training data. In every iteration we would be drawing 58 | # `questions_batch_size` examples from training data with replacement. 59 | seen_set_size: int 60 | # Number of examples to be given for few shots 61 | few_shot_count: int 62 | # Generate synthetic reasoning 63 | generate_reasoning: bool 64 | # Generate description of an expert which can solve the task at hand 65 | generate_expert_identity: bool 66 | # Generate keywords that describe the intent of the task 67 | generate_intent_keywords: bool 68 | # number of synthetic training examples to be generated 69 | num_train_examples: int 70 | -------------------------------------------------------------------------------- /promptwizard/glue/promptopt/techniques/critique_n_refine/prompt_pool.yaml: -------------------------------------------------------------------------------- 1 | final_prompt: | 2 | {instruction} 3 | {few_shot_examples} 4 | 5 | {answer_format} 6 | 7 | eval_prompt: | 8 | {instruction} 9 | 10 | [Question] {question} 11 | [Answer] 12 | 13 | quest_reason_ans: | 14 | 15 | [Question] {question} 16 | [Answer] {answer} 17 | 18 | system_prompt: You are a helpful assistant developed by OpenAI that can efficiently perform tasks as per instruction 19 | 20 | expert_profile: You are a helpful assistant developed by OpenAI that can efficiently perform tasks as per instruction 21 | 22 | thinking_styles: 23 | - "How could I devise an experiment to help solve that problem?" 24 | - "Make a list of ideas for solving this problem, and apply them one by one to the problem to see if any progress can be made." 25 | - "How could I measure progress on this problem?" 26 | - "How can I simplify the problem so that it is easier to solve?" 27 | - "What are the key assumptions underlying this problem?" 28 | - "What are the potential risks and drawbacks of each solution?" 29 | - "What are the alternative perspectives or viewpoints on this problem?" 30 | - "What are the long-term implications of this problem and its solutions?" 31 | - "How can I break down this problem into smaller, more manageable parts?" 32 | - "Critical Thinking: This style involves analyzing the problem from different perspectives, questioning assumptions, and evaluating the evidence or information available. It focuses on logical reasoning, evidence-based decision-making, and identifying potential biases or flaws in thinking." 33 | - "Try creative thinking, generate innovative and out-of-the-box ideas to solve the problem. Explore unconventional solutions, thinking beyond traditional boundaries, and encouraging imagination and originality." 34 | - "Seek input and collaboration from others to solve the problem. Emphasize teamwork, open communication, and leveraging the diverse perspectives and expertise of a group to come up with effective solutions." 35 | - "Use systems thinking: Consider the problem as part of a larger system and understanding the interconnectedness of various elements. Focuses on identifying the underlying causes, feedback loops, and interdependencies that influence the problem, and developing holistic solutions that address the system as a whole." 36 | - "Use Risk Analysis: Evaluate potential risks, uncertainties, and tradeoffs associated with different solutions or approaches to a problem. Emphasize assessing the potential consequences and likelihood of success or failure, and making informed decisions based on a balanced analysis of risks and benefits." 37 | - "Use Reflective Thinking: Step back from the problem, take the time for introspection and self-reflection. Examine personal biases, assumptions, and mental models that may influence problem-solving, and being open to learning from past experiences to improve future approaches." 38 | - "What is the core issue or problem that needs to be addressed?" 39 | - "What are the underlying causes or factors contributing to the problem?" 40 | - "Are there any potential solutions or strategies that have been tried before? If yes, what were the outcomes and lessons learned?" 41 | - "What are the potential obstacles or challenges that might arise in solving this problem?" 42 | - "Are there any relevant data or information that can provide insights into the problem? If yes, what data sources are available, and how can they be analyzed?" 43 | - "Are there any stakeholders or individuals who are directly affected by the problem? What are their perspectives and needs?" 44 | - "What resources (financial, human, technological, etc.) are needed to tackle the problem effectively?" 45 | - "How can progress or success in solving the problem be measured or evaluated?" 46 | - "What indicators or metrics can be used?" 47 | - "Is the problem a technical or practical one that requires a specific expertise or skill set? Or is it more of a conceptual or theoretical problem?" 48 | - "Does the problem involve a physical constraint, such as limited resources, infrastructure, or space?" 49 | - "Is the problem related to human behavior, such as a social, cultural, or psychological issue?" 50 | - "Does the problem involve decision-making or planning, where choices need to be made under uncertainty or with competing objectives?" 51 | - "Is the problem an analytical one that requires data analysis, modeling, or optimization techniques?" 52 | - "Is the problem a design challenge that requires creative solutions and innovation?" 53 | - "Does the problem require addressing systemic or structural issues rather than just individual instances?" 54 | - "Is the problem time-sensitive or urgent, requiring immediate attention and action?" 55 | - "What kinds of solution typically are produced for this kind of problem specification?" 56 | - "Given the problem specification and the current best solution, have a guess about other possible solutions." 57 | - "Let's imagine the current best solution is totally wrong, what other ways are there to think about the problem specification?" 58 | - "What is the best way to modify this current best solution, given what you know about these kinds of problem specification?" 59 | - "Ignoring the current best solution, create an entirely new solution to the problem." 60 | - "Let's think step by step." 61 | - "Let's make a step by step plan and implement it with good notion and explanation." 62 | 63 | 64 | # ans_delimiter_instruction: " Wrap only your final answer, without reason for each question separately between and tags." 65 | ans_delimiter_instruction: "" 66 | 67 | meta_critique_template: | 68 | I'm trying to write a zero-shot instruction that will help the most capable and suitable agent to solve the task. 69 | My current prompt is: "{instruction}" 70 | But this prompt gets the following examples wrong: {examples} 71 | Provide detail feedback which identifies reasons where the instruction could have gone wrong. 72 | Wrap each reason with and 73 | 74 | 75 | meta_positive_critique_template: | 76 | I'm trying to write a prompt for zero-shot instruction task that will help the most capable and suitable agent to solve the task. 77 | My current prompt is: 78 | [CURRENT PROMPT] "{instruction}" 79 | Now this prompt got the following examples correct: 80 | [CORRECT EXAMPLES] {examples} 81 | Since you cant use these examples, analyse and understand characteristics/complexity and diversity of these examples and their reasoning chain and 82 | accordingly provide suggestions to further improve the prompt and make it better as a zero shot instruction task. 83 | 84 | 85 | critique_refine_template: | 86 | I'm trying to write a zero-shot instruction that will help the most capable and suitable agent to solve the task. 87 | My current prompt is: "{instruction}" 88 | But this prompt gets the following examples wrong: {examples} 89 | On carefully analysing these examples, following are the critiques related to prompt {critique} 90 | Use the critique smartly, refine the current prompt to make sure we dont get these examples wrong. 91 | Based on the above information, Now I want you to write {steps_per_sample} different improved prompts. 92 | Each prompt should be wrapped with and . 93 | [Refined Prompts]: 94 | 95 | 96 | solve_template: | 97 | You are given a prompt instruction and the following {questions_batch_size} questions of the same task. 98 | [Instruction]: {instruction} 99 | 100 | [Question]: {questions} 101 | 102 | {answer_format} 103 | 104 | [Answers]: 105 | 106 | 107 | meta_sample_template: | 108 | You are given a task description and a prompt instruction and different styles known as meta prompts: 109 | [Task Description]: {task_description} 110 | [Meta Prompt]: {meta_prompts} 111 | Now you need to generate {num_variations} variations of following Instruction adaptively mixing meta prompt while keeping similar semantic meaning. 112 | Make sure to wrap each generated prompt with and 113 | [Prompt Instruction]: {prompt_instruction} 114 | [Generated Prompts]: 115 | 116 | 117 | intent_template: | 118 | You are given an instruction along description of task labelled as [Task Description]. For the given instruction, list out 3-5 keywords in comma separated format as [Intent] which define the characteristics or properties required by the about the most capable and suitable agent to solve the task using the instruction. 119 | 120 | 121 | [Task Description]: {task_description} 122 | [Instruction]: {instruction} 123 | 124 | 125 | [Intent]: 126 | 127 | 128 | expert_template: | 129 | For each instruction, write a high-quality description about the most capable and suitable agent to answer the instruction. In second person perspective.\n 130 | 131 | [Instruction]: Make a list of 5 possible effects of deforestation.\n 132 | [Agent Description]: You are an environmental scientist with a specialization in the study of ecosystems and their interactions with human activities. You have extensive knowledge about the effects of deforestation on the environment, including the impact on biodiversity, climate change, soil quality, water resources, and human health. Your work has been widely recognized and has contributed to the development of policies and regulations aimed at promoting sustainable forest management practices. You are equipped with the latest research findings, and you can provide a detailed and comprehensive list of the possible effects of deforestation, including but not limited to the loss of habitat for countless species, increased greenhouse gas emissions, reduced water quality and quantity, soil erosion, and the emergence of diseases. Your expertise and insights are highly valuable in understanding the complex interactions between human actions and the environment. 133 | 134 | 135 | [Instruction]: Identify a descriptive phrase for an eclipse.\n 136 | [Agent Description]: You are an astronomer with a deep understanding of celestial events and phenomena. Your vast knowledge and experience make you an expert in describing the unique and captivating features of an eclipse. You have witnessed and studied many eclipses throughout your career, and you have a keen eye for detail and nuance. Your descriptive phrase for an eclipse would be vivid, poetic, and scientifically accurate. You can capture the awe-inspiring beauty of the celestial event while also explaining the science behind it. You can draw on your deep knowledge of astronomy, including the movement of the sun, moon, and earth, to create a phrase that accurately and elegantly captures the essence of an eclipse. Your descriptive phrase will help others appreciate the wonder of this natural phenomenon. 137 | 138 | 139 | 140 | [Instruction]: Identify the parts of speech in this sentence: \"The dog barked at the postman\".\n 141 | [Agent Description]: You are a linguist, well-versed in the study of language and its structures. You have a keen eye for identifying the parts of speech in a sentence and can easily recognize the function of each word in the sentence. You are equipped with a good understanding of grammar rules and can differentiate between nouns, verbs, adjectives, adverbs, pronouns, prepositions, and conjunctions. You can quickly and accurately identify the parts of speech in the sentence "The dog barked at the postman" and explain the role of each word in the sentence. Your expertise in language and grammar is highly valuable in analyzing and understanding the nuances of communication. 142 | 143 | 144 | [Instruction]: {task_description} 145 | [Agent Description]: 146 | 147 | 148 | examples_critique_template: | 149 | You are an expert example selector who can help in selection of right in-context examples to help the most suitable agent solve this problem. 150 | You are also given the prompt instruction which is used to solve this task 151 | [Prompt]: {prompt} 152 | You are given the task description of the task: 153 | [Task Description]: {task_description} 154 | I'm trying to write a few shots prompt using {num_examples} in-context examples to effectively solve any questions of the above task. 155 | My current {num_examples} in-context examples set are: {examples} 156 | Think of analysing, understanding and creating examples of task on the criteria of diversity of types of examples, complexity of the nature/characteristics of the examples and relevance/compatibility to the whole example set in total. 157 | Output all the suggestions/ improvement which could be made to improve each individual example of the whole example selection set. 158 | 159 | examples_critique_template_zero_shot: | 160 | You are an expert example selector who can help in selection of right in-context examples to help the most suitable agent solve this problem. 161 | You are also given the prompt instruction which is used to solve this task 162 | [Prompt]: {prompt} 163 | You are given the task description of the task: 164 | [Task Description]: {task_description} 165 | I'm trying to write a few shots prompt using {num_examples} in-context examples to effectively solve any questions of the above task. 166 | Think of analysing, understanding and creating examples of task on the criteria of diversity of types of examples, complexity of the nature/characteristics of the examples and relevance/compatibility to the whole example set in total. 167 | Output all the suggestions/ improvement which could be made to improve each individual example of the whole example selection set. 168 | 169 | examples_optimization_template: | 170 | You are an expert example selector who can help in selection of right in-context examples to help the agent solve this problem. 171 | You are also given the prompt instruction which is used to solve this task 172 | [Prompt]: {prompt} 173 | You are given the description of the task: 174 | [Task Description]: {task_description} 175 | I'm trying to write a few shots prompt using {num_examples} in-context examples to effectively solve any questions of the above task. 176 | My current {num_examples} in-context examples set are: {examples} 177 | You are also given a set of suggestions/improvements which could be made to improve each individual example of the whole example selection set: 178 | [SUGGESTION/IMPROVEMENT]: {critique} 179 | Based on the above information, use all of it smartly and diligently to carefully create new set of {num_examples}, which follow these suggestion and improvements. 180 | Make sure to output each example wrapped with and . 181 | 182 | New examples should follow this format strictly: 183 | 184 | [Question] followed by question part of the example 185 | [Answer] followed by the all the steps of logic reasoning statements related to answer. The final answer as "[answer]" 186 | 187 | For Example: 188 | {gt_example} 189 | 190 | 191 | [New Examples]: 192 | 193 | 194 | generate_reason_template: | 195 | You are given a task description and instruction followed by a set of correct examples of the task. 196 | 197 | [Task Description]: {task_description} 198 | 199 | [Instruction]: {instruction} 200 | 201 | Each example has a question denoted by question [Question] and a final answer [Answer] . 202 | 203 | [Question]: {question} 204 | 205 | [Answer]: {answer} 206 | 207 | Now your task is to generate a reasoning chain that contains the steps, logical pathway followed to arrive at the correct answer, assuming the necessary domain knowledge is present as part of the question and task description. 208 | 209 | Make sure it is specific, non-ambiguous, complete, and specifies all the logic and steps required to reach the final answer. 210 | 211 | [Improved Reasoning Chain]: 212 | 213 | 214 | reason_optimization_template: | 215 | You are given a task description and instructions of given task 216 | 217 | [Task Description]: {task_description} 218 | 219 | [Instruction]: {instruction} 220 | 221 | Each example has a question denoted by a question [Question] and a final answer [Answer]. 222 | 223 | [Question]: {question} 224 | 225 | [Answer]: {answer} 226 | 227 | Please explain your reasoning behind reaching the answer given in a concise, complete, and coherent text of reasoning that contains all the steps or logical pathways followed. Ensure it is specific and non-ambiguous, and assume the necessary domain knowledge is in the question and task description. 228 | 229 | [Improved Reasoning Chain]: 230 | 231 | 232 | -------------------------------------------------------------------------------- /promptwizard/glue/promptopt/utils.py: -------------------------------------------------------------------------------- 1 | from ..common.exceptions import GlueValidaionException 2 | from .constants import PromptOptimizationParams, PromptPool, SupportedPromptOpt 3 | from .techniques.common_logic import PromptOptimizer 4 | from .techniques.critique_n_refine.core_logic import CritiqueNRefine 5 | from .techniques.critique_n_refine.base_classes import CritiqueNRefineParams, \ 6 | CritiqueNRefinePromptPool 7 | 8 | 9 | def get_promptopt_class(prompt_technique_name: str) -> (PromptOptimizer, PromptOptimizationParams, PromptPool): 10 | """ 11 | :params prompt_technique_name: Name of prompt optimization technique 12 | :return: Instance of class PromptRefinements, which is super class for all Prompt Optimization classes, 13 | Instance of class that holds all hyperparameters for that technique, 14 | Instance of class that holds all prompt strings for that techniques 15 | """ 16 | prompt_technique_name = prompt_technique_name.lower() 17 | if prompt_technique_name == SupportedPromptOpt.CRITIQUE_N_REFINE.value: 18 | return CritiqueNRefine, CritiqueNRefineParams, CritiqueNRefinePromptPool 19 | else: 20 | raise GlueValidaionException(f"Value provided for `prompt_technique_name` field in config yaml of " 21 | f"prompt manager is `{prompt_technique_name}`, which is not a valid name for " 22 | f"the prompt optimization techniques that we support. Please provide input as one " 23 | f"among the following: {SupportedPromptOpt.all_values()}", None) 24 | 25 | 26 | -------------------------------------------------------------------------------- /promptwizard/version.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2023 Microsoft 2 | # Licensed under The MIT License [see LICENSE for details] 3 | 4 | _MAJOR = "0" 5 | _MINOR = "2" 6 | # On master and in a nightly release the patch should be one ahead of the last 7 | # released build. 8 | _PATCH = "2" 9 | # This is mainly for nightly builds which have the suffix ".dev$DATE". See 10 | # https://semver.org/#is-v123-a-semantic-version for the semantics. 11 | _SUFFIX = "" 12 | 13 | VERSION_SHORT = "{0}.{1}".format(_MAJOR, _MINOR) 14 | VERSION = "{0}.{1}.{2}{3}".format(_MAJOR, _MINOR, _PATCH, _SUFFIX) -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [tool.black] 2 | line-length = 88 3 | target-version = ['py38'] 4 | include = '\.pyi?$' 5 | 6 | [tool.isort] 7 | atomic = true 8 | profile = "black" 9 | line_length = 88 10 | skip_gitignore = true 11 | known_first_party = ["promptwizard"] -------------------------------------------------------------------------------- /setup.cfg: -------------------------------------------------------------------------------- 1 | [isort] 2 | default_section = FIRSTPARTY 3 | ensure_newline_before_comments = True 4 | force_grid_wrap = 0 5 | include_trailing_comma = True 6 | known_first_party = sdtools 7 | known_third_party = 8 | imblearn 9 | numpy 10 | pandas 11 | pytorch-tabnet 12 | scipy 13 | sklearn 14 | ipywidgets 15 | torch 16 | torchaudio 17 | torchvision 18 | torch_xla 19 | tqdm 20 | xgboost 21 | 22 | line_length = 119 23 | lines_after_imports = 2 24 | multi_line_output = 3 25 | use_parentheses = True 26 | 27 | [flake8] 28 | ignore = E203, E501, E741, W503, W605 29 | max-line-length = 119 -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2023 Microsoft 2 | # Licensed under The MIT License [see LICENSE for details] 3 | 4 | from setuptools import find_packages, setup 5 | 6 | # PEP0440 compatible formatted version, see: 7 | # https://www.python.org/dev/peps/pep-0440/ 8 | # 9 | # release markers: 10 | # X.Y 11 | # X.Y.Z # For bugfix releases 12 | # 13 | # pre-release markers: 14 | # X.YaN # Alpha release 15 | # X.YbN # Beta release 16 | # X.YrcN # Release Candidate 17 | # X.Y # Final release 18 | 19 | # version.py defines the VERSION and VERSION_SHORT variables. 20 | # We use exec here so we don't import allennlp whilst setting up. 21 | VERSION = {} # type: ignore 22 | with open("promptwizard/version.py", "r") as version_file: 23 | exec(version_file.read(), VERSION) 24 | 25 | INSTALL_REQUIRES = [ 26 | "datasets", 27 | "tiktoken", 28 | "nltk", 29 | "openai", 30 | "azure-identity", 31 | "azure-search-documents", 32 | "pyyaml~=6.0.1", 33 | "pyarrow==15.0.2", 34 | "llama-index==0.11.10", 35 | "llama-index-core==0.11.10", 36 | "python-dotenv" 37 | ] 38 | QUANLITY_REQUIRES = [ 39 | "black==21.4b0", 40 | "flake8>=3.8.3", 41 | "isort>=5.5.4", 42 | "pre-commit", 43 | "pytest", 44 | "pytest-xdist", 45 | ] 46 | DEV_REQUIRES = INSTALL_REQUIRES + QUANLITY_REQUIRES 47 | 48 | setup( 49 | name="promptwizard", 50 | version=VERSION["VERSION"], 51 | author="The PromptWizard team", 52 | author_email="promptwizard@microsoft.com", 53 | description="Optimize Prompt", 54 | long_description=open("README.md", encoding="utf8").read(), 55 | long_description_content_type="text/markdown", 56 | keywords="PromptWizard", 57 | license="MIT License", 58 | url="https://github.com/microsoft/PromptWizard", 59 | classifiers=[ 60 | "Intended Audience :: Science/Research", 61 | "Development Status :: 3 - Alpha", 62 | "Programming Language :: Python :: 3", 63 | "Topic :: Scientific/Engineering :: Artificial Intelligence", 64 | ], 65 | package_dir={"": "."}, 66 | packages=find_packages("."), 67 | extras_require={ 68 | "dev": DEV_REQUIRES, 69 | "quality": QUANLITY_REQUIRES, 70 | }, 71 | install_requires=INSTALL_REQUIRES, 72 | include_package_data=True, 73 | python_requires=">=3.8.0", 74 | zip_safe=False, 75 | ) 76 | --------------------------------------------------------------------------------