├── .gitignore ├── CHANGELOG.md ├── LICENSE ├── README.md ├── demos-and-products ├── README.md ├── arxiv-assistant │ └── arxiv_assistant.py ├── basic-chatbot │ ├── demo.py │ └── templates │ │ └── index.html ├── chaining-workshop │ ├── README.md │ ├── apps.py │ ├── demo.py │ ├── templates │ │ ├── app.html │ │ └── applist.html │ └── tests.py ├── cot-analytics-frontend │ ├── README.md │ ├── cot-scr-1.png │ ├── cot-scr-2.png │ ├── frontend.py │ ├── incomes.csv │ ├── researchllm.py │ ├── static │ │ ├── data.js │ │ ├── interface01.css │ │ ├── output.json │ │ └── results.js │ └── templates │ │ └── interface01.html ├── cot-analytics │ ├── README.md │ ├── cot.py │ ├── requirements.txt │ └── sample_output.md ├── eval_platform │ ├── env-template.txt │ ├── eval_platform │ │ ├── __init__.py │ │ ├── asgi.py │ │ ├── settings.py │ │ ├── urls.py │ │ └── wsgi.py │ ├── llmevaluator │ │ ├── __init__.py │ │ ├── admin.py │ │ ├── apps.py │ │ ├── management │ │ │ ├── __init__.py │ │ │ └── commands │ │ │ │ ├── __init__.py │ │ │ │ └── runjobs.py │ │ ├── migrations │ │ │ ├── 0001_initial.py │ │ │ ├── 0002_batchllmjob.py │ │ │ ├── 0003_chatbotmessagearray_source_batch_job_id_and_more.py │ │ │ ├── 0004_alter_chatbotmessagearray_message_array.py │ │ │ ├── 0005_alter_chatbotmessagearray_source_batch_job_id_and_more.py │ │ │ ├── 0006_batchllmjob_tags_chatbotmessagearray_tags_and_more.py │ │ │ ├── 0007_chatbotmessagearray_title.py │ │ │ ├── 0008_batchllmjob_include_gpt_35_batchllmjob_include_gpt_4_and_more.py │ │ │ ├── 0009_batchllmjob_new_system_prompt_and_more.py │ │ │ ├── 0010_batchllmjob_resend_last_user_message.py │ │ │ ├── 0011_batchllmjob_description.py │ │ │ ├── 0012_batchllmjob_message_collection_ref.py │ │ │ ├── 0013_batchllmjob_results_array_and_more.py │ │ │ ├── 0014_messagecollection_chats.py │ │ │ └── __init__.py │ │ ├── models.py │ │ ├── tests.py │ │ └── views.py │ ├── manage.py │ ├── readme.md │ ├── requirements.txt │ ├── screenshot.png │ ├── static │ │ └── main.css │ └── templates │ │ ├── aboutus.html │ │ ├── base-navigation-two-cols.html │ │ ├── base-navigation-two-rows.html │ │ ├── base-navigation.html │ │ ├── base.html │ │ ├── batch.html │ │ ├── batch_review.html │ │ ├── chats.html │ │ ├── create-group.html │ │ ├── create.html │ │ └── view-chat.html ├── newsbot │ ├── README.md │ ├── news_articles.json │ ├── newsbot.py │ ├── newsbot_create.py │ ├── newsbot_evaluate.py │ └── notes.md ├── researchllm │ ├── README.md │ ├── frontend.py │ ├── incomes.csv │ ├── requirements.txt │ ├── researchllm.py │ ├── screenshot.png │ └── templates │ │ └── index.html └── web-search-chatbot │ ├── demo.py │ └── templates │ └── index.html ├── docs ├── Makefile ├── README.md ├── make.bat └── source │ ├── conf.py │ └── index.md ├── phasellm ├── __init__.py ├── agents.py ├── configurations.py ├── configurations_utils.py ├── eval.py ├── exceptions.py ├── html.py ├── llms.py ├── llms_utils.py ├── logging.py └── types.py ├── project_metadata.py ├── readthedocs.yaml ├── release_checklist.md ├── requirements-dev.txt ├── requirements.txt ├── setup.py ├── tests-non-deterministic ├── README.md ├── __init__.py └── llms │ ├── __init__.py │ └── test_llms.py └── tests ├── README.MD ├── __init__.py ├── e2e ├── __init__.py ├── agents │ ├── __init__.py │ └── test_e2e_agents.py ├── llms │ ├── __init__.py │ ├── test_e2e_llms.py │ └── utils.py └── sse │ ├── __init__.py │ └── test_e2e_sse.py ├── release_checklist_code.py ├── unit ├── __init__.py ├── agents │ ├── __init__.py │ └── test_agents.py └── llms │ ├── __init__.py │ └── test_llms.py └── utils.py /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # MacOS 10 | .DS_Store 11 | 12 | # Distribution / packaging 13 | .Python 14 | build/ 15 | develop-eggs/ 16 | dist/ 17 | downloads/ 18 | eggs/ 19 | .eggs/ 20 | lib/ 21 | lib64/ 22 | parts/ 23 | sdist/ 24 | var/ 25 | wheels/ 26 | pip-wheel-metadata/ 27 | share/python-wheels/ 28 | *.egg-info/ 29 | .installed.cfg 30 | *.egg 31 | MANIFEST 32 | 33 | # PyInstaller 34 | # Usually these files are written by a python script from a template 35 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 36 | *.manifest 37 | *.spec 38 | 39 | # Installer logs 40 | pip-log.txt 41 | pip-delete-this-directory.txt 42 | 43 | # Unit test / coverage reports 44 | htmlcov/ 45 | .tox/ 46 | .nox/ 47 | .coverage 48 | .coverage.* 49 | .cache 50 | nosetests.xml 51 | coverage.xml 52 | *.cover 53 | *.py,cover 54 | .hypothesis/ 55 | .pytest_cache/ 56 | 57 | # Translations 58 | *.mo 59 | *.pot 60 | 61 | # Django stuff: 62 | *.log 63 | local_settings.py 64 | db.sqlite3 65 | db.sqlite3-journal 66 | 67 | # Flask stuff: 68 | instance/ 69 | .webassets-cache 70 | 71 | # Scrapy stuff: 72 | .scrapy 73 | 74 | # Sphinx documentation 75 | docs/_build/ 76 | 77 | # PyBuilder 78 | target/ 79 | 80 | # Jupyter Notebook 81 | .ipynb_checkpoints 82 | 83 | # IPython 84 | profile_default/ 85 | ipython_config.py 86 | 87 | # pyenv 88 | .python-version 89 | 90 | # pipenv 91 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 92 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 93 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 94 | # install all needed dependencies. 95 | #Pipfile.lock 96 | 97 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow 98 | __pypackages__/ 99 | 100 | # Celery stuff 101 | celerybeat-schedule 102 | celerybeat.pid 103 | 104 | # SageMath parsed files 105 | *.sage.py 106 | 107 | # Environments 108 | .env 109 | .venv 110 | env/ 111 | venv/ 112 | ENV/ 113 | env.bak/ 114 | venv.bak/ 115 | 116 | # Jetbrains IDEs 117 | .idea 118 | 119 | # Spyder project settings 120 | .spyderproject 121 | .spyproject 122 | 123 | # Rope project settings 124 | .ropeproject 125 | 126 | # mkdocs documentation 127 | /site 128 | 129 | # mypy 130 | .mypy_cache/ 131 | .dmypy.json 132 | dmypy.json 133 | 134 | # Pyre type checker 135 | .pyre/ 136 | 137 | # Workspaces 138 | /workspace 139 | 140 | # Scratch directories 141 | .tmp -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2023 Phase AI Technologies Inc. 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # PhaseLLM 2 | 3 | Large language model evaluation and workflow framework from [Phase AI](https://phaseai.com/). 4 | 5 | - [Follow us on Twitter](https://twitter.com/phasellm) for updates. 6 | - [Star us on GitHub](https://github.com/wgryc/phasellm). 7 | - [Read the Docs](https://phasellm.readthedocs.io/en/latest/autoapi/phasellm/index.html) -- Module reference. Tutorials and code examples are below. 8 | 9 | ## Installation 10 | 11 | You can install PhaseLLM via pip: 12 | 13 | ``` 14 | pip install phasellm 15 | ``` 16 | 17 | Installing from PyPI does not include libraries for running LLMs locally. Please run `pip install phasellm[complete]` if you plan on using LLMs locally (e.g., our `DollyWrapper`). 18 | 19 | Sample demos and products are in the `demos-and-products` folder. Clone this repository and follow instructions in the `README.md` file in each product folder to run those. 20 | 21 | ## Introduction 22 | 23 | The coming months and years will bring thousands of new products and experienced powered by large language models (LLMs) like ChatGPT or its increasing number of variants. Whether you're using OpenAI's ChatGPT, Anthropic's Claude, or something else all together, you'll want to test how well your models and prompts perform against user needs. As more models are launched, you'll also have a bigger range of options. 24 | 25 | PhaseLLM is a framework designed to help manage and test LLM-driven experiences -- products, content, or other experiences that product and brand managers might be driving for their users. 26 | 27 | Here's what PhaseLLM does: 28 | 1. We standardize API calls so you can plug and play models from OpenAI, Cohere, Anthropic, or other providers. 29 | 2. We've built evaluation frameworks so you can compare outputs and decide which ones are driving the best experiences for users. 30 | 3. We're adding automations so you can use advanced models (e.g., GPT-4) to evaluate simpler models (e.g., GPT-3) to determine what combination of prompts yield the best experiences, especially when taking into account costs and speed of model execution. 31 | 32 | PhaseLLM is open source and we envision building more features to help with model understanding. We want to help developers, data scientists, and others launch new, robust products as easily as possible. 33 | 34 | If you're working on an LLM product, please reach out. We'd love to help out. 35 | 36 | ## Example: Evaluating Travel Chatbot Prompts with GPT-3.5, Claude, and more 37 | 38 | PhaseLLM makes it incredibly easy to plug and play LLMs and evaluate them, in some cases with *other* LLMs. Suppose you're building a travel chatbot, and you want to test Claude and Cohere against each other, using GPT-3.5. 39 | 40 | What's awesome with this approach is that (1) you can plug and play models and prompts as needed, and (2) the entire workflow takes a small amount of code. This simple example can easily be scaled to much more complex workflows. 41 | 42 | So, time for the code... First, load your API keys. 43 | 44 | ```python 45 | import os 46 | from dotenv import load_dotenv 47 | 48 | load_dotenv() 49 | openai_api_key = os.getenv("OPENAI_API_KEY") 50 | anthropic_api_key = os.getenv("ANTHROPIC_API_KEY") 51 | cohere_api_key = os.getenv("COHERE_API_KEY") 52 | ``` 53 | 54 | We're going to set up the *Evaluator*, which takes two LLM model outputs and decides which one is better for the objective at hand. 55 | 56 | ```python 57 | from phasellm.eval import GPTEvaluator 58 | 59 | # We'll use GPT-3.5 as the evaluator (default for GPTEvaluator). 60 | e = GPTEvaluator(openai_api_key) 61 | ``` 62 | 63 | Now it's time to set up the experiment. In this case, we'll set up an `objective` which describes what we're trying to achieve with our chatbot. We'll also provide 5 examples of starting chats that we've seen with our users. 64 | 65 | ```python 66 | # Our objective. 67 | objective = "We're building a chatbot to discuss a user's travel preferences and provide advice." 68 | 69 | # Chats that have been launched by users. 70 | travel_chat_starts = [ 71 | "I'm planning to visit Poland in spring.", 72 | "I'm looking for the cheapest flight to Europe next week.", 73 | "I am trying to decide between Prague and Paris for a 5-day trip", 74 | "I want to visit Europe but can't decide if spring, summer, or fall would be better.", 75 | "I'm unsure I should visit Spain by flying via the UK or via France." 76 | ] 77 | ``` 78 | 79 | Now we set up our Cohere and Claude models. 80 | 81 | ```python 82 | from phasellm.llms import CohereWrapper, ClaudeWrapper 83 | cohere_model = CohereWrapper(cohere_api_key) 84 | claude_model = ClaudeWrapper(anthropic_api_key) 85 | ``` 86 | 87 | Finally, we launch our test. We run an experiments where both models generate a chat response and then we have GPT-3.5 evaluate the response. 88 | 89 | ```python 90 | print("Running test. 1 = Cohere, and 2 = Claude.") 91 | for tcs in travel_chat_starts: 92 | 93 | messages = [{"role":"system", "content":objective}, 94 | {"role":"user", "content":tcs}] 95 | 96 | response_cohere = cohere_model.complete_chat(messages, "assistant") 97 | response_claude = claude_model.complete_chat(messages, "assistant") 98 | 99 | pref = e.choose(objective, tcs, response_cohere, response_claude) 100 | print(f"{pref}") 101 | ``` 102 | 103 | In this case, we simply print which of the two models was preferred. 104 | 105 | Voila! You've got a suite to test your models and can plug-and-play three major LLMs. 106 | 107 | ## Contact Us 108 | 109 | If you have questions, requests, ideas, etc. please reach out at w (at) phaseai (dot) com. 110 | -------------------------------------------------------------------------------- /demos-and-products/README.md: -------------------------------------------------------------------------------- 1 | # Demos and Products (/demos-and-products) 2 | 3 | This folder contains various products and demos built using PhaseLLM. 4 | 5 | Every fold contains a self-contained product or demo. Each one also contains a README.md file that includes installation instructions. 6 | 7 | All products require the `phasellm` package to be installed. 8 | -------------------------------------------------------------------------------- /demos-and-products/arxiv-assistant/arxiv_assistant.py: -------------------------------------------------------------------------------- 1 | import os 2 | import re 3 | 4 | from dotenv import load_dotenv 5 | 6 | from feedparser import FeedParserDict 7 | 8 | from phasellm.llms import ClaudeWrapper 9 | 10 | from phasellm.agents import EmailSenderAgent, RSSAgent 11 | 12 | load_dotenv() 13 | 14 | # Load OpenAI and newsapi.org API keys. 15 | anthropic_api_key = os.getenv("ANTHROPIC_API_KEY") 16 | 17 | # Load Gmail credentials. 18 | gmail_email = os.getenv("GMAIL_EMAIL") 19 | gmail_password = os.getenv("GMAIL_PASSWORD") # https://myaccount.google.com/u/1/apppasswords 20 | 21 | # Set up the LLM 22 | llm = ClaudeWrapper(anthropic_api_key) 23 | 24 | 25 | def interest_analysis(title: str, abstract: str, interests: str): 26 | interest_analysis_prompt = \ 27 | f""" 28 | I want to determine if an academic paper is relevant to my interests. I am interested in: {interests}. The paper 29 | is titled: {title}. It has the following abstract: {abstract}. Is this paper relevant to my interests? Respond 30 | with either 'yes' or 'no'. Do not explain your reasoning. 31 | 32 | Example responses are given between the ### ### symbols. Respond exactly as shown in the examples. 33 | 34 | ###yes### 35 | or 36 | ###no### 37 | """ 38 | return llm.text_completion(prompt=interest_analysis_prompt) 39 | 40 | 41 | def summarize(title: str, abstract: str, interests: str): 42 | """ 43 | This function summarizes why the paper might be relevant to the user's interests. 44 | Args: 45 | title: The title of the paper. 46 | abstract: The abstract of the paper. 47 | interests: The user's interests. 48 | 49 | Returns: The summary of why the paper might be relevant to the user's interests. 50 | 51 | """ 52 | # Summarize why the paper might be relevant to the user's interests. 53 | summary_prompt = \ 54 | f""" 55 | Summarize why the the following paper is relevant to my interests. My interests are: {interests}. The paper is 56 | titled: {title}. It has the following abstract: {abstract}. 57 | """ 58 | return llm.text_completion(prompt=summary_prompt) 59 | 60 | 61 | def send_email(title: str, abstract: str, link: str, summary: str) -> None: 62 | """ 63 | This function sends an email to the user with the title of the paper and the summary. 64 | Args: 65 | title: The title of the paper. 66 | abstract: The abstract of the paper. 67 | link: The link to the paper. 68 | summary: The summary of the paper. 69 | 70 | Returns: 71 | 72 | """ 73 | # Send email 74 | print('Sending email...') 75 | 76 | content = f"Title: {title}\n\nSummary:\n{summary}\n\nAbstract:\n{abstract}\n\nLink: {link}" 77 | 78 | email_agent = EmailSenderAgent( 79 | sender_name='arXiv Assistant', 80 | smtp='smtp.gmail.com', 81 | sender_address=gmail_email, 82 | password=gmail_password, 83 | port=587 84 | ) 85 | email_agent.send_plain_email(recipient_email=gmail_email, subject=title, content=content) 86 | 87 | 88 | def analyze_and_email(paper: FeedParserDict, interests: str, retries: int = 0) -> None: 89 | """ 90 | This function analyzes the latest papers from arXiv and emails the user if any of them are relevant to their 91 | interests. 92 | Args: 93 | paper: The paper to analyze. 94 | interests: The user's interests. 95 | retries: The number of retry attempts made so far. 96 | Returns: 97 | 98 | """ 99 | # Allow for a maximum of 1 retry. 100 | max_retries = 1 101 | 102 | title = paper['title'] 103 | abstract = paper['summary'] 104 | link = paper['link'] 105 | interested = interest_analysis(title=title, abstract=abstract, interests=interests) 106 | 107 | # Find the answer within the response. 108 | answer = re.search(r'###(yes|no)###', interested) 109 | if not answer: 110 | if retries < max_retries: 111 | analyze_and_email(paper=paper, interests=interests, retries=retries + 1) 112 | else: 113 | interested = answer.group(0) 114 | 115 | # Send email if the user is interested. 116 | if interested == '###yes###': 117 | summary = summarize(title=title, abstract=abstract, interests=interests) 118 | send_email(title=title, abstract=abstract, link=link, summary=summary) 119 | elif interested == '###no###': 120 | pass 121 | else: 122 | print(f'LLM did not respond in the expected format after {max_retries}. Skipping paper:\n{title}') 123 | 124 | 125 | def main(): 126 | """ 127 | Entry point for the arXiv assistant. 128 | Returns: 129 | 130 | """ 131 | # Ask user what they want to read about. 132 | interests = input("What kinds of papers do you want to be notified about?") 133 | 134 | papers_processed = 0 135 | 136 | rss_agent = RSSAgent(url='https://arxiv.org/rss/cs') 137 | with rss_agent.poll(60) as poller: 138 | for papers in poller(): 139 | print(f'Found {len(papers)} new paper(s).') 140 | for paper in papers: 141 | analyze_and_email( 142 | paper=paper, 143 | interests=interests 144 | ) 145 | papers_processed += 1 146 | print(f'Processed {papers_processed} paper(s).') 147 | 148 | 149 | if __name__ == '__main__': 150 | main() 151 | -------------------------------------------------------------------------------- /demos-and-products/basic-chatbot/demo.py: -------------------------------------------------------------------------------- 1 | 2 | import os 3 | from dotenv import load_dotenv 4 | 5 | from phasellm.llms import OpenAIGPTWrapper, ChatBot 6 | 7 | load_dotenv() 8 | MODEL_LLM = OpenAIGPTWrapper 9 | MODEL_STRING = "gpt-4" 10 | MODEL_API_KEY = os.getenv("OPENAI_API_KEY") 11 | llm = MODEL_LLM(MODEL_API_KEY, MODEL_STRING) 12 | 13 | CHATBOT = None 14 | 15 | from flask import Flask, request, render_template, jsonify 16 | 17 | APP = Flask(__name__) 18 | 19 | # We have a function because we'll eventually add other things, like system prompts, variables, etc. 20 | # Returns True if successful, False otherwise 21 | def resetChatBot(): 22 | global CHATBOT 23 | CHATBOT = ChatBot(llm) 24 | return True 25 | 26 | resetChatBot() 27 | 28 | @APP.route('/submit_chat_message', methods = ['POST']) 29 | def sendchat(): 30 | global CHATBOT 31 | message = request.json["input"] 32 | response = CHATBOT.chat(message) 33 | return {"status":"ok", "content":response,} 34 | 35 | @APP.route('/resetchatbot') 36 | def resetchatbot(): 37 | if resetChatBot(): 38 | return jsonify({"status":"ok", "message":"ChatBot has been restarted."}) 39 | else: 40 | return jsonify({"status":"error", "message":"ChatBot could not be restarted."}) 41 | 42 | @APP.route('/') 43 | def index(): 44 | 45 | # Loop and print all args... 46 | #for key, value in request.args.items(): 47 | # print(f"{key} :: {value}") 48 | #print(request.args) 49 | 50 | if "reset" in request.args: 51 | if request.qrgs['reset'] == 'true': 52 | resetChatBot() 53 | 54 | return render_template('index.html') 55 | 56 | def run(host="127.0.0.1", port=5000): 57 | """ 58 | Launches a local web server for interfacing with PhaseLLM. This is meant to be for testing purposes only. 59 | """ 60 | APP.run(host=host, port=port) 61 | 62 | MAIN_HOST = "127.0.0.1" 63 | MAIN_PORT = 8000 64 | if __name__ == '__main__': 65 | run(MAIN_HOST, MAIN_PORT) -------------------------------------------------------------------------------- /demos-and-products/basic-chatbot/templates/index.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | WorkshopLLM 4 | 5 | 6 | 7 | 8 | 9 | 81 | 82 | 83 | 84 | 85 |
86 | 87 |
88 | 89 | 90 |
91 |
92 | 93 | 94 |
95 |
96 | 97 | 137 | 138 | 139 | -------------------------------------------------------------------------------- /demos-and-products/chaining-workshop/README.md: -------------------------------------------------------------------------------- 1 | # Chaining Workshop 2 | 3 | This provides a front-end and a set of prompt templates where you can then begin chaining and structuring "apps" in various ways. 4 | 5 | ## Example Prompt Types 6 | 7 | - System Message: show a message without any logic around what is shown. 8 | - Linear Order: show a message at a specific time (similar to 'system message' but with order). 9 | - Logic 10 | 11 | ## Sample Apps 12 | 13 | - AmpUp.ai with a "yes/no" from the LLM 14 | - AmpUp.ai with a confidence score 15 | - Newsbot with review of outputs 16 | - Character-focused chatbot 17 | - Travel agent workflow 18 | 19 | ## Data Structure 20 | 21 | { prompt_id, prompt} 22 | fallback prompt (i.e., error) 23 | 24 | { pid_1 -> pid_2, conditions} 25 | 26 | 27 | ## Characters 28 | 29 | ### Socrates 30 | 31 | { "prompt_id": 1, "prompt": "REMINDER: you are playing the role of Socrates and you are meant to reply to every message as if you were Socrates using the Socratic method. Please do so with the message below.\nMESSAGE:{message}", "next_prompt": 2} 32 | 33 | { "prompt_id": 2, "prompt": "REMINDER: you are playing the role of Socrates and you are meant to reply to every message as if you were Socrates using the Socratic method. Please do so with the message below.\nMESSAGE:{message}", "next_prompt": 2} 34 | 35 | 36 | 37 | variables = user/app provided, LLM-provided 38 | 39 | ## How to Add Conditional Flows 40 | 41 | - Output Parser: need to take the output of a model and parse it in some way. This should parse the outputs into specific variables. 42 | - Pass a function to the next prompt? This will be limited, though -- you still need to write functions. Is that bad? 43 | - Prebuilt template functions + custom functions. 44 | 45 | Output Parser -> Environment Variable -> Function 46 | 47 | 48 | OUTPUT PARSER 49 | 50 | 51 | 52 | For all of your responses, please provide them in the following format: 53 | ---MESSAGE 54 | This is where your actual message will go. 55 | ---SENTIMENT-SCORE 56 | A score between 0 and 100 that shows how positive or negative the person's response was when describing their product. 57 | ---END 58 | Include 'yes' or 'no' here. 'Yes' means we've asked 2 follow-up questions or the sentiment score has gotten close to 0 and you think it's safer to end the conversation. 'Yes' will continue the conversation. 59 | 60 | ## Conditional Flows v2 61 | 62 | Right now, this is all hard-coded via " ---VAR" which is a poor way of doing things. -------------------------------------------------------------------------------- /demos-and-products/chaining-workshop/apps.py: -------------------------------------------------------------------------------- 1 | app_socrates = { 2 | 3 | "code":"socrates", 4 | "name":"Chat with Socrates", 5 | 6 | "prompts": { 7 | 8 | 0 : { 9 | "type":"system_message", "message": "You are chatting with Socrates. Enjoy!", "next_prompt": 1 10 | }, 11 | 12 | 1 : { 13 | "prompt": "REMINDER: you are playing the role of Socrates and you are meant to reply to every message as if you were Socrates using the Socratic method. Please do so with the message below.\nMESSAGE:{message}", "next_prompt": 1 14 | } 15 | 16 | } 17 | 18 | } 19 | 20 | app_yoyo = { 21 | 22 | "code":"yoyo", 23 | "name":"Chat with 'Yo Yo'", 24 | 25 | "prompts": { 26 | 27 | 0 : { 28 | "type":"system_message", "message": "You are chatting with someone that uses 'yo' too much. Enjoy!", "next_prompt": 1 29 | }, 30 | 31 | 1 : { 32 | "prompt": "REMINDER: you are a chatbot that starts every message with 'Yo, yo, yo!' and also includes 'yo' throughout responses. lease do so with the message below.\nMESSAGE:{message}", "next_prompt": 1 33 | } 34 | 35 | } 36 | 37 | } 38 | 39 | app_act = { 40 | 41 | "code":"act", 42 | "name":"Acceptance and Commitment Therapy", 43 | 44 | "prompts": { 45 | 46 | 0 : { 47 | "type":"system_message", "message": "This is an 'Acceptance and Commitment Therapy' (ACT) coach. The responses in this chat model will always focus on different follow-up questions or advice around how you should move forward with your day based on this style of positive psychology.", "next_prompt": 1 48 | }, 49 | 50 | 1 : { 51 | "prompt": "REMINDER: you are an Acceptance and Commitment Therapy' (ACT) coach and every message needs to follow the perspective of an ACT therapist that is also steeped in positive and humanistic psychology with a strong focus on ACT.\nMESSAGE:{message}", "next_prompt": 1 52 | } 53 | 54 | } 55 | 56 | } 57 | 58 | app_random_end = { 59 | 60 | "code": "random", 61 | "name": "Random End", 62 | "prompts": { 63 | 64 | 0 : { 65 | "type":"system_message", "message": "This is a demo bot that always follows up with ONE question and also randomly ends the conversation. It's being used to show how conditional app flows could work.", "next_prompt": 1 66 | }, 67 | 68 | 1 : { 69 | "prompt": "REMINDER: you only allowed to respond with ONE SHORT QUESTION to the MESSAGE below. Please make sure that your response follows the following format:\n---RESPONSE\nThis is where your response actually goes.\n---NEXT\nPut 'YES' or 'NO' here randomly, with a 50% split.\n\n\nMESSAGE:{message}", "next_prompt": 1 70 | } 71 | 72 | } 73 | 74 | } 75 | 76 | app_danger_demo = { 77 | 78 | "code": "danger", 79 | "name": "Brand Sentiment", 80 | "prompts": { 81 | 82 | 0 : { 83 | "type":"system_message", "message": "This is a demo bot that interviews you about how you feel about your recent Nike sneaker purchase. If your sentiment goes down quite a bit, then it ends the interview.", "next_prompt": 1 84 | }, 85 | 86 | 1 : { 87 | "prompt": "REMINDER: please always follow up with a question to keep learning about my sentiment around Nike sneakers. Also provide a 'danger' score from 0 to 100, where 100 means the conversation is incredibly negative, and 0 means it's incredibly positive, and 50 means it's neutral. Please make sure that your response follows the following format, always starting with '---RESPONSE':\n\n---RESPONSE\nThis is where your response actually goes.\n---DANGER\nThis is the sentiment score with 100 = negative, 50 = neutral, and 0 = positive.\n\n\nMESSAGE:{message}", "next_prompt": 1 88 | } 89 | 90 | } 91 | 92 | } 93 | 94 | 95 | APP_DATA_SETS = { 96 | "socrates": app_socrates, 97 | "yoyo": app_yoyo, 98 | "act": app_act, 99 | "random": app_random_end, 100 | "danger": app_danger_demo 101 | } -------------------------------------------------------------------------------- /demos-and-products/chaining-workshop/demo.py: -------------------------------------------------------------------------------- 1 | # Import all the data, apps, etc. we have built... 2 | from apps import * 3 | 4 | import os 5 | from dotenv import load_dotenv 6 | 7 | from phasellm.llms import OpenAIGPTWrapper, ChatBot, Prompt 8 | 9 | load_dotenv() 10 | MODEL_LLM = OpenAIGPTWrapper 11 | MODEL_STRING = "gpt-4" 12 | #MODEL_STRING = "gpt-3.5-turbo" # Use for speed. 13 | MODEL_API_KEY = os.getenv("OPENAI_API_KEY") 14 | llm = MODEL_LLM(MODEL_API_KEY, MODEL_STRING) 15 | 16 | CHATBOT = None 17 | 18 | APP_PROMPT_STATE = 0 19 | APP_CODE = None 20 | 21 | from flask import Flask, request, render_template, jsonify 22 | 23 | APP = Flask(__name__) 24 | 25 | # We have a function because we'll eventually add other things, like system prompts, variables, etc. 26 | # Returns True if successful, False otherwise 27 | def resetChatBot(): 28 | global CHATBOT 29 | CHATBOT = ChatBot(llm) 30 | return True 31 | 32 | resetChatBot() 33 | 34 | def parseResponse(r): 35 | lines = r.strip().split("\n") 36 | 37 | # Should eventually throw an error. 38 | if r[0:3] != "---": 39 | return None 40 | #assert r[0:3] == "---" 41 | 42 | var_name = None 43 | v = "" 44 | 45 | rdict = {} 46 | 47 | for line in lines: 48 | if line[0:3] == "---": 49 | if var_name is not None: 50 | rdict[var_name] = v.strip() 51 | var_name = line[3:].strip().upper() 52 | v = "" 53 | else: 54 | v += line 55 | 56 | rdict[var_name] = v.strip() 57 | 58 | return rdict 59 | 60 | @APP.route('/submit_chat_message', methods = ['POST']) 61 | def sendchat(): 62 | global CHATBOT 63 | message = request.json["input"] 64 | response = process_message(message) 65 | return {"status":"ok", "content":response} 66 | 67 | @APP.route('/resetchatbot') 68 | def resetchatbot(): 69 | if resetChatBot(): 70 | return jsonify({"status":"ok", "message":"ChatBot has been restarted."}) 71 | else: 72 | return jsonify({"status":"error", "message":"ChatBot could not be restarted."}) 73 | 74 | def isInt(v): 75 | try: 76 | int(v) 77 | except: 78 | return False 79 | return True 80 | 81 | def process_message(message): 82 | global APP_PROMPT_STATE 83 | global APP_CODE 84 | global CHATBOT 85 | prompt = Prompt(APP_CODE["prompts"][APP_PROMPT_STATE]["prompt"]) 86 | filled_prompt = prompt.fill(message = message) 87 | 88 | print(f"\n\n{filled_prompt}\n\n") 89 | 90 | response = CHATBOT.chat(filled_prompt) 91 | 92 | print(f"\n\n{response}\n\n") 93 | 94 | response_dict = parseResponse(response) 95 | 96 | next_prompt = -1 97 | if isInt(APP_CODE["prompts"][APP_PROMPT_STATE]["next_prompt"]): 98 | next_prompt = APP_CODE["prompts"][APP_PROMPT_STATE]["next_prompt"] 99 | 100 | if response_dict is not None: 101 | print(response_dict) 102 | if "NEXT" in response_dict: 103 | if response_dict["NEXT"].upper() == "NO": 104 | response = "Chat is over!" 105 | else: 106 | if "RESPONSE" in response_dict: 107 | response = response_dict["RESPONSE"] 108 | if "DANGER" in response_dict: 109 | if isInt(response_dict["DANGER"]): 110 | danger_score = int(response_dict["DANGER"]) 111 | if danger_score > 80: 112 | response = "Dangerous topic! Chat is over!" 113 | else: 114 | if "RESPONSE" in response_dict: 115 | response = response_dict["RESPONSE"] 116 | 117 | APP_PROMPT_STATE = next_prompt 118 | 119 | return response 120 | 121 | @APP.route("/") 122 | def index(): 123 | applist = "" 124 | for key in APP_DATA_SETS: 125 | applist += f""" 126 |

{APP_DATA_SETS[key]["name"]} 127 | """ 128 | return render_template('applist.html', applist=applist) 129 | 130 | @APP.route('/app') 131 | def llmapp(): 132 | 133 | global APP_PROMPT_STATE 134 | global APP_CODE 135 | 136 | # Loop and print all args... 137 | #for key, value in request.args.items(): 138 | # print(f"{key} :: {value}") 139 | #print(request.args) 140 | 141 | if "reset" in request.args: 142 | if request.args['reset'] == 'true': 143 | resetChatBot() 144 | 145 | app_name = "" 146 | system_message = "" 147 | if "app" in request.args: 148 | app_code = request.args['app'] 149 | if app_code in APP_DATA_SETS: 150 | system_message = APP_DATA_SETS[app_code]["prompts"][0]["message"] 151 | app_name = app_code 152 | APP_PROMPT_STATE = 0 153 | APP_CODE = APP_DATA_SETS[app_code] 154 | APP_PROMPT_STATE = APP_DATA_SETS[app_code]["prompts"][0]["next_prompt"] 155 | 156 | return render_template('app.html', app_name=app_name, sys_msg=system_message) 157 | 158 | def run(host="127.0.0.1", port=5000): 159 | """ 160 | Launches a local web server for interfacing with PhaseLLM. This is meant to be for testing purposes only. 161 | """ 162 | APP.run(host=host, port=port) 163 | 164 | MAIN_HOST = "127.0.0.1" 165 | MAIN_PORT = 8000 166 | if __name__ == '__main__': 167 | run(MAIN_HOST, MAIN_PORT) -------------------------------------------------------------------------------- /demos-and-products/chaining-workshop/templates/app.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | WorkshopLLM 5 | 6 | 7 | 8 | 9 | 10 | 90 | 91 | 92 | 93 | 94 |

95 | 96 |
97 | 98 | 99 |
100 |
101 | 102 | 103 |
104 |
105 | 106 | 159 | 160 | 161 | -------------------------------------------------------------------------------- /demos-and-products/chaining-workshop/templates/applist.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | App List 5 | 6 | 7 | 8 | {{ applist | safe }} 9 | 10 | 11 | -------------------------------------------------------------------------------- /demos-and-products/chaining-workshop/tests.py: -------------------------------------------------------------------------------- 1 | response_1 = """---FIXED: 2 | 你好!今天是个好日子。 (Your sentence was already correct in grammar and syntax.) 3 | 4 | ---RESPONSE: 5 | 你好!是的,今天天气很好。 6 | 7 | ---ENGLISH: 8 | Hello! Yes, the weather is very good today.""" 9 | 10 | response_2 = """---MESSAGE 11 | I'm sorry to hear about the discomfort you're experiencing. Is there a specific part of the shoe that's causing the blisters or is it more of a general issue? Also, how does the overall comfort and fit compare to other sneakers you've worn in the past? 12 | ---SENTIMENT-SCORE 13 | 40 14 | ---END 15 | No""" 16 | 17 | from demo import * 18 | 19 | print(parseResponse(response_1)) 20 | print(parseResponse(response_2)) -------------------------------------------------------------------------------- /demos-and-products/cot-analytics-frontend/README.md: -------------------------------------------------------------------------------- 1 | # Chain of Thought (COT) Analytics -- Frontend Version 2 | 3 | This is still a work in progress. We need to figure out how to improve the code quality and make it consistent across each step. 4 | 5 | ## Running 6 | 7 | See installation instructions in `cot-analytics`. All the same warnings apply as `researchllm`. Run from this folder with... 8 | ```python 9 | from frontend import * 10 | run() 11 | ``` 12 | 13 | ## Examples of how this works 14 | 15 | The screenshot below shows the first code block being executed. There are no outputs (or errors!) because the code works. 16 | ![Screenshot showing Step 1 running.](cot-scr-1.png) 17 | 18 | Correlation matrix generated via GPT-4 coding. 19 | ![Screenshot showing Step 2 running.](cot-scr-2.png) 20 | 21 | ## Issues 22 | 23 | The `cot-analytics` folder generates a great research plan, but the code isn't consisten across steps. Some thinking below. 24 | 25 | ### How to fix the code generator... 26 | 27 | 1. Provide DF printout every time. 28 | 2. Run the code blocks before moving to the next ones. 29 | 3. Add line #s. 30 | 4. Plugin for Jupyter. 31 | 32 | Note that this is where logging and evaluation become critical. 33 | 34 | ### Want to help? 35 | 36 | Email me: w (at) phaseai (dot) com 37 | -------------------------------------------------------------------------------- /demos-and-products/cot-analytics-frontend/cot-scr-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wgryc/phasellm/974d026dc649e4a71da4c25bf8c934622e56cf5d/demos-and-products/cot-analytics-frontend/cot-scr-1.png -------------------------------------------------------------------------------- /demos-and-products/cot-analytics-frontend/cot-scr-2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wgryc/phasellm/974d026dc649e4a71da4c25bf8c934622e56cf5d/demos-and-products/cot-analytics-frontend/cot-scr-2.png -------------------------------------------------------------------------------- /demos-and-products/cot-analytics-frontend/frontend.py: -------------------------------------------------------------------------------- 1 | """ 2 | A Flask frontend for the COT demo 3 | 4 | To run, start a Python REPL and in the same directory as this file and run the following: 5 | > from frontend import * 6 | > run() # Or, run('0.0.0.0', 80) 7 | 8 | """ 9 | 10 | from flask import Flask, request, render_template 11 | import pandas as pd 12 | import numpy as np 13 | 14 | from researchllm import * 15 | 16 | APP = Flask(__name__) 17 | 18 | ########################################################################## 19 | # 20 | # DATA SET SETUP (START) 21 | # Please review the code below to set up your own data set for analysis. 22 | # 23 | 24 | # Data set to load and analyze. 25 | DATA_SETUP_INTRO = "I am researching the relationship between income and sociodemographic census info." 26 | DATA_FILE_LOC = "incomes.csv" 27 | 28 | # Another sample we explored. 29 | #DATA_SETUP_INTRO = "I am researching car crashes in NYC." 30 | #DATA_FILE_LOC = "nypd-motor-vehicle-collisions.csv" 31 | 32 | # Want to analyze your own data set? Simply replace the two variables above: 33 | # DATA_SETUP_INTRO = "What are you researching? Please provide a short description. 34 | # DATA_FILE_LOC = "The location of the CSV file." 35 | # Note that you DO NOT have to provide metadata about the CSV file. This gets generated automatically. 36 | 37 | # Loads the CSV file. 38 | # If you want to load another file (e.g., Excel file), replace the code below with the relevant function (e.g., read_excel()). 39 | df = pd.read_csv(DATA_FILE_LOC) 40 | 41 | # 42 | # DATA SET SETUP (END) 43 | # 44 | ########################################################################## 45 | 46 | def generateOverview(df): 47 | """ 48 | Generates a prompt providing an overview of a data set. This should only be used to generate the initial data prompt for now. 49 | """ 50 | description = "" 51 | for column in df: 52 | col_name = df[column].name 53 | col_type = df[column].dtype 54 | col_description = f"Column Name: {col_name}\nColumn Type: {col_type}" 55 | if col_type == "object": 56 | column_values = df[col_name].values 57 | uniques = np.unique(column_values) 58 | col_description += f"\nSample Values: {str(uniques)}" 59 | description += col_description + "\n\n" 60 | return description.strip() 61 | 62 | # The prompt used to set up the entire chat session. This prompt is used regularly for analysis. 63 | base_prompt = f"{DATA_SETUP_INTRO} I have imported Pandas as `pd`, Numpy as `np`, `scipy`, and `sklearn`, and have a dataframe called `df` loaded into Python. `df` contains the following variables and variable types:\n\n" + generateOverview(df) 64 | 65 | # Calls the researchllm.py function to set the current dataframe as the main one for analysis. 66 | set_df(df) 67 | start_bi_session() 68 | 69 | ########################################################################## 70 | # 71 | # FLASK FUNCTIONS 72 | # Everything below manages the frontend. 73 | # 74 | ########################################################################## 75 | 76 | @APP.route('/get_prompt') 77 | def get_prompt(): 78 | """ 79 | Returns a JSON object with the prompt being passed on to the language model. 80 | """ 81 | return {"status":"ok", "prompt":base_prompt} 82 | 83 | @APP.route('/') 84 | def index(): 85 | """ 86 | Displays the index page accessible at '/' 87 | """ 88 | return render_template('interface01.html') 89 | 90 | @APP.route("/runcode", methods = ['POST']) 91 | def runcode(): 92 | """ 93 | Runs code in the POST request. 94 | """ 95 | code_to_run = request.json['code'] 96 | response, code_output, is_error = ask_interpret_clean(code_to_run) 97 | return {"response":response, "code_output":code_output, "is_error":is_error} 98 | 99 | @APP.route("/text_completion", methods = ['POST']) 100 | def analysis(): 101 | """ 102 | Calls the researchllm.py code to request analysis and interpretation thereof. 103 | 104 | See run_analysis(message) in researchllm.py for more information. 105 | """ 106 | text_to_complete = request.json["input"] 107 | new_request = base_prompt + text_to_complete 108 | response_object = run_analysis(new_request) 109 | return {"status":"ok", "content":response_object["interpretation"], "code":response_object["code"], "code_output":response_object["code_output"], "error":response_object["error"]} 110 | 111 | def run(host="127.0.0.1", port=5000): 112 | APP.run(host=host, port=port) 113 | -------------------------------------------------------------------------------- /demos-and-products/cot-analytics-frontend/static/interface01.css: -------------------------------------------------------------------------------- 1 | * { 2 | font-family: 'Open Sans', sans-serif; 3 | box-sizing: border-box; 4 | } 5 | 6 | body { 7 | margin:0; 8 | padding:0; 9 | } 10 | 11 | #everything-everywhere-all-at-once { 12 | width:100vw; 13 | height:100vh; 14 | display: grid; 15 | grid-template-columns: 33% 66%; 16 | overflow:hidden; 17 | } 18 | 19 | #col1 { 20 | grid-column:1; 21 | grid-row:1; 22 | padding:25px; 23 | background-color:rgb(248, 245, 228); 24 | } 25 | 26 | #col2 { 27 | grid-column:2; 28 | grid-row: 1; 29 | padding:25px; 30 | } 31 | 32 | #data-overview { 33 | margin-top:30px; 34 | } 35 | 36 | h1 { 37 | font-size:20px; 38 | font-weight:600; 39 | } 40 | 41 | #prompt-info { 42 | max-height:calc(100vh - 150px); 43 | font-size:11px; 44 | font-weight:300; 45 | font-family: 'Open Sans', sans-serif; 46 | overflow-x:auto; 47 | overflow-y:auto; 48 | } 49 | 50 | #cot-output { 51 | overflow-x:auto; 52 | overflow-y:auto; 53 | max-height:calc(100vh - 150px); 54 | } 55 | 56 | .cot-output-cell { 57 | border:1px solid rgd(235,235,235); 58 | border-radius:7px; 59 | padding:15px; 60 | margin:10px; 61 | } 62 | 63 | .cot-output-cell .notes { 64 | white-space: pre-wrap; 65 | } 66 | 67 | .cot-output-cell .code { 68 | width:100%; 69 | white-space: pre; 70 | font-family: monospace; 71 | } 72 | 73 | .run-button { 74 | padding:10px; 75 | border-radius:8px; 76 | border:1px solid rgb(230,230,230); 77 | width:100%; 78 | background:rgb(83, 145, 101); 79 | color:rgb(248, 245, 228); 80 | font-weight:800px; 81 | margin-top:10px; 82 | width:100px; 83 | cursor:pointer; 84 | } 85 | 86 | .code-output-after-run { 87 | width:100%; 88 | white-space: pre; 89 | font-family: monospace; 90 | background:rgb(248, 245, 228); 91 | border-radius:7px; 92 | padding:15px; 93 | margin:10px; 94 | } 95 | 96 | .heading-code-output { 97 | font-weight:600; 98 | } 99 | 100 | .heading-error { 101 | color:crimson; 102 | font-weight:600; 103 | } -------------------------------------------------------------------------------- /demos-and-products/cot-analytics-frontend/static/results.js: -------------------------------------------------------------------------------- 1 | function show_prompt() { 2 | const response = fetch("get_prompt", { 3 | method: "GET", 4 | cache: "no-cache", 5 | credentials: "same-origin", 6 | headers: {"Content-Type": "application/json"}, 7 | }) 8 | .then(response=>response.json()) 9 | .then(data=>{ 10 | var p = data["prompt"].replace(/(?:\r\n|\r|\n)/g, '
'); 11 | var prompt_pre = document.getElementById("prompt-info"); 12 | prompt_pre.innerHTML = p; 13 | }) 14 | } 15 | 16 | show_prompt(); 17 | 18 | function resize_textarea(dom_id) { 19 | var ta = document.getElementById(dom_id); 20 | ta.style.height = ta.scrollHeight; 21 | } 22 | 23 | function add_box(header, notes, code, css_id) { 24 | var cot_div = document.getElementById('cot-output'); 25 | var notes_clean = notes.replace(/(?:\r\n|\r|\n)/g, '
'); 26 | 27 | var code_clean = code.replace("```python", "").replace("```", ""); // This needs to be changed to just deleting the first and last line. 28 | 29 | var new_html = `
30 |

${header}

31 |

${notes_clean}

32 |

33 |

34 |
`; 35 | 36 | cot_div.innerHTML += new_html; 37 | resize_textarea(`code${css_id}`); 38 | } 39 | 40 | function add_code_output(code_output, div_id, is_error) { 41 | var cot_div = document.getElementById(div_id); 42 | var new_html = ""; 43 | if (is_error) { 44 | var new_html = `Error
${code_output}
`; 45 | } else { 46 | var new_html = `Code Output
${code_output}
`; 47 | } 48 | cot_div.innerHTML += new_html; 49 | } 50 | 51 | for (var i = 1; i <= 7; i++) { 52 | add_box(`Step #${i}`, COT_DATA[i]['objective'], COT_DATA[i]['code_block'], `_step_${i}`); 53 | } 54 | 55 | function run(block_id) { 56 | var code = document.getElementById(block_id).value; 57 | data = {"code":code}; 58 | console.log(data); 59 | const response = fetch("runcode", { 60 | method: "POST", 61 | cache: "no-cache", 62 | credentials: "same-origin", 63 | headers: {"Content-Type": "application/json"}, 64 | body: JSON.stringify(data) 65 | }) 66 | .then(response=>response.json()) 67 | .then(data=>{ 68 | var response = data['response']; 69 | var code_output = data['code_output']; 70 | var is_error = data['is_error']; 71 | //console.log(response); 72 | //console.log(code_output); 73 | if (response === "*No outputs.*") { 74 | code_output = "*No outputs.*" 75 | } 76 | //console.log(is_error); 77 | add_code_output(code_output, `cot-cell-${block_id}`, is_error); 78 | }) 79 | } -------------------------------------------------------------------------------- /demos-and-products/cot-analytics-frontend/templates/interface01.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 |
14 | 15 |
16 | 17 |

ResearchLLM: Chain of Thought Analysis

18 |
19 |

Data Overview (Prompt)

20 |
21 |
22 | 23 |
24 | 25 |
26 |

Analysis Output

27 | 28 |
29 | 30 |
31 | 32 |
33 | 34 | 35 | 36 | 37 | 38 | -------------------------------------------------------------------------------- /demos-and-products/cot-analytics/README.md: -------------------------------------------------------------------------------- 1 | # Chain of Thought (CoT) Analytics 2 | 3 | This generates a Chain of Thought (CoT) plan for a data set, and then asks the LLM to generate code for each step in the CoT analysis. 4 | 5 | Curious about the output? Please see `sample_output.md` for an example of an analysis plan for the demo data set. This was not edited! 6 | 7 | ## Installation and Setup 8 | 9 | ### Installation 10 | 11 | Clone the GitHub repository and navigate to the folder containing this README.md file. Install the relevant packages (including PhaseLLM): 12 | 13 | ``` 14 | pip install -r requirements.txt 15 | ``` 16 | 17 | Next, make sure you edit the `cot.py` file to include the proper API keys. You'll find these around line 115: 18 | ```python 19 | openai_api_key = os.getenv("OPENAI_API_KEY") 20 | o = OpenAIGPTWrapper(openai_api_key, 'gpt-4') # We highly recommend using GPT-4 or Claude v1.3 for this. 21 | ``` 22 | 23 | ### Running With Sample Data 24 | 25 | Simply run `cot.py` directly in your command line. This will take a while to run, and will make 10-20 requests to the OpenAI or Anthropic API. 26 | 27 | ## Running on Your Own Data 28 | 29 | This demo is based on the data in `incomes.csv` in the `researchllm` folder. We are working on making this easier to apply to other data sets *and* to actually execute the code generated by the LLM. 30 | 31 | Until then, please don't hesitate to reach out -- hello (at) phaseai (dot) com -- and we'll help you apply this to your data. 32 | -------------------------------------------------------------------------------- /demos-and-products/cot-analytics/cot.py: -------------------------------------------------------------------------------- 1 | """ 2 | Chain of Thought (CoT) analysis for a data set. Please see the README.md for more information. 3 | """ 4 | 5 | import os 6 | from dotenv import load_dotenv 7 | 8 | from phasellm.llms import OpenAIGPTWrapper, ChatBot 9 | 10 | # Where we write the output of this analysis. 11 | OUT_FILE = "output.md" 12 | 13 | def save_output(output, header): 14 | """ 15 | Appends model outputs to a markdown file. Includes a header ("# header") and then the output itself. 16 | """ 17 | with open(OUT_FILE, 'a') as writer: 18 | writer.write(f"# {header}\n\n{output}\n\n") 19 | 20 | # This prompt is basically a copy/paste of what is generated by ResearchLLM for the 'incomes.csv' data set via the generateOverview() function. 21 | messages = [{"role":"system", "content": """You are a data science research assistant. We will ask you about a big data set and would like you to break down the analysis you suggest into specific tasks that we can then write code for."""}, 22 | {"role":"user", "content":"""I am researching the relationship between income and sociodemographic census info. I have imported Pandas as `pd`, Numpy as `np`, `scipy`, and `sklearn`, and have a dataframe called `df` loaded into Python. `df` contains the following variables and variable types: 23 | 24 | Column Name: age 25 | Column Type: int64 26 | 27 | Column Name: workclass 28 | Column Type: object 29 | Sample Values: ['?' 'Federal-gov' 'Local-gov' 'Never-worked' 'Private' 'Self-emp-inc' 30 | 'Self-emp-not-inc' 'State-gov' 'Without-pay'] 31 | 32 | Column Name: fnlwgt 33 | Column Type: int64 34 | 35 | Column Name: education 36 | Column Type: object 37 | Sample Values: ['10th' '11th' '12th' '1st-4th' '5th-6th' '7th-8th' '9th' 'Assoc-acdm' 38 | 'Assoc-voc' 'Bachelors' 'Doctorate' 'HS-grad' 'Masters' 'Preschool' 39 | 'Prof-school' 'Some-college'] 40 | 41 | Column Name: education.num 42 | Column Type: int64 43 | 44 | Column Name: marital.status 45 | Column Type: object 46 | Sample Values: ['Divorced' 'Married-AF-spouse' 'Married-civ-spouse' 47 | 'Married-spouse-absent' 'Never-married' 'Separated' 'Widowed'] 48 | 49 | Column Name: occupation 50 | Column Type: object 51 | Sample Values: ['?' 'Adm-clerical' 'Armed-Forces' 'Craft-repair' 'Exec-managerial' 52 | 'Farming-fishing' 'Handlers-cleaners' 'Machine-op-inspct' 'Other-service' 53 | 'Priv-house-serv' 'Prof-specialty' 'Protective-serv' 'Sales' 54 | 'Tech-support' 'Transport-moving'] 55 | 56 | Column Name: relationship 57 | Column Type: object 58 | Sample Values: ['Husband' 'Not-in-family' 'Other-relative' 'Own-child' 'Unmarried' 'Wife'] 59 | 60 | Column Name: race 61 | Column Type: object 62 | Sample Values: ['Amer-Indian-Eskimo' 'Asian-Pac-Islander' 'Black' 'Other' 'White'] 63 | 64 | Column Name: sex 65 | Column Type: object 66 | Sample Values: ['Female' 'Male'] 67 | 68 | Column Name: capital.gain 69 | Column Type: int64 70 | 71 | Column Name: capital.loss 72 | Column Type: int64 73 | 74 | Column Name: hours.per.week 75 | Column Type: int64 76 | 77 | Column Name: native.country 78 | Column Type: object 79 | Sample Values: ['?' 'Cambodia' 'Canada' 'China' 'Columbia' 'Cuba' 'Dominican-Republic' 80 | 'Ecuador' 'El-Salvador' 'England' 'France' 'Germany' 'Greece' 'Guatemala' 81 | 'Haiti' 'Holand-Netherlands' 'Honduras' 'Hong' 'Hungary' 'India' 'Iran' 82 | 'Ireland' 'Italy' 'Jamaica' 'Japan' 'Laos' 'Mexico' 'Nicaragua' 83 | 'Outlying-US(Guam-USVI-etc)' 'Peru' 'Philippines' 'Poland' 'Portugal' 84 | 'Puerto-Rico' 'Scotland' 'South' 'Taiwan' 'Thailand' 'Trinadad&Tobago' 85 | 'United-States' 'Vietnam' 'Yugoslavia'] 86 | 87 | Column Name: income 88 | Column Type: object 89 | Sample Values: ['<=50K' '>50K'] 90 | 91 | ```````` 92 | 93 | With all of the above in mind, could you please provide me with a set of analysis steps you would recommend I run on the data to better understand what drives income inequality? Please provide a numbered list where each number is a specific analytical step. For each step, include the hypothesis you would test, what variables you'd look at, and what you'd be hoping to find. 94 | 95 | Do not worry about visualizing the data, as I'd like to ensure the outputs are all things that you are able to interpret afterwards. """} 96 | ] 97 | 98 | def split_cot(cot): 99 | """ 100 | Takes a numbered list generated by an LLM and splits it into an array. 101 | """ 102 | lines = cot.split("\n") 103 | cot_steps = [] 104 | 105 | step_text = "" 106 | for i in range(0, len(lines)): 107 | line = lines[i] 108 | if len(line.strip()) > 0: 109 | step_text += line + "\n" 110 | else: 111 | cot_steps.append(step_text.strip()) 112 | step_text = "" 113 | 114 | return cot_steps 115 | 116 | load_dotenv() 117 | 118 | print("Setting up chat...") 119 | 120 | openai_api_key = os.getenv("OPENAI_API_KEY") 121 | o = OpenAIGPTWrapper(openai_api_key, 'gpt-4') 122 | c = ChatBot(o, messages[0]['content']) 123 | 124 | print("Getting CoT...") 125 | 126 | # Step 1, let's get a chain of thought (COT) approach to understanding the data set. 127 | response = c.chat(messages[1]['content']) 128 | save_output(response, "Chain of Thought Plan for Data Analysis") 129 | cot_steps = split_cot(response) 130 | 131 | # Step 2, go through each COT step and ask GPT-4 to generate code. 132 | step_num = 1 133 | for step in cot_steps: 134 | 135 | print(f"Generating code for step {step_num}.") 136 | 137 | prompt = f"""You wrote the following instructions for a step: 138 | {step} 139 | 140 | Please write the Python code for the step above. Assume the following: 141 | 1. Start your response with ```python 142 | 2. End your response with ``` 143 | 3. Do not add any text outside the code. For anything that requires comment, simply add Python comments. 144 | 4. Assume the data was imported into a dataframe called `df` 145 | 5. I have imported Pandas as `pd`, Numpy as `np`, `scipy`, and `sklearn`. You can use those libraries and no others. 146 | """ 147 | 148 | response = c.chat(prompt) 149 | 150 | save_output(step + "\n\n" + response, f"Code for Step #{step_num}") 151 | 152 | step_num += 1 153 | 154 | print("Done!") -------------------------------------------------------------------------------- /demos-and-products/cot-analytics/requirements.txt: -------------------------------------------------------------------------------- 1 | phasellm 2 | scikit-learn 3 | pandas 4 | numpy 5 | scipy 6 | statsmodels -------------------------------------------------------------------------------- /demos-and-products/eval_platform/env-template.txt: -------------------------------------------------------------------------------- 1 | # LLM APIs 2 | OPENAI_API_KEY=...your OpenAI API key... -------------------------------------------------------------------------------- /demos-and-products/eval_platform/eval_platform/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wgryc/phasellm/974d026dc649e4a71da4c25bf8c934622e56cf5d/demos-and-products/eval_platform/eval_platform/__init__.py -------------------------------------------------------------------------------- /demos-and-products/eval_platform/eval_platform/asgi.py: -------------------------------------------------------------------------------- 1 | """ 2 | ASGI config for eval_platform project. 3 | 4 | It exposes the ASGI callable as a module-level variable named ``application``. 5 | 6 | For more information on this file, see 7 | https://docs.djangoproject.com/en/4.2/howto/deployment/asgi/ 8 | """ 9 | 10 | import os 11 | 12 | from django.core.asgi import get_asgi_application 13 | 14 | os.environ.setdefault("DJANGO_SETTINGS_MODULE", "eval_platform.settings") 15 | 16 | application = get_asgi_application() 17 | -------------------------------------------------------------------------------- /demos-and-products/eval_platform/eval_platform/settings.py: -------------------------------------------------------------------------------- 1 | """ 2 | Django settings for eval_platform project. 3 | 4 | Generated by 'django-admin startproject' using Django 4.2. 5 | 6 | For more information on this file, see 7 | https://docs.djangoproject.com/en/4.2/topics/settings/ 8 | 9 | For the full list of settings and their values, see 10 | https://docs.djangoproject.com/en/4.2/ref/settings/ 11 | """ 12 | 13 | import os 14 | from dotenv import load_dotenv 15 | 16 | load_dotenv() 17 | OPENAI_API_KEY = os.getenv("OPENAI_API_KEY") 18 | 19 | from pathlib import Path 20 | 21 | # Build paths inside the project like this: BASE_DIR / 'subdir'. 22 | BASE_DIR = Path(__file__).resolve().parent.parent 23 | 24 | 25 | # Quick-start development settings - unsuitable for production 26 | # See https://docs.djangoproject.com/en/4.2/howto/deployment/checklist/ 27 | 28 | # SECURITY WARNING: keep the secret key used in production secret! 29 | SECRET_KEY = "django-insecure-qhwo&d2q3@p2ov)-6e8il37squqh0ji&3qvqmtciforvkekr+^" 30 | 31 | # SECURITY WARNING: don't run with debug turned on in production! 32 | DEBUG = True 33 | 34 | ALLOWED_HOSTS = [] 35 | 36 | 37 | # Application definition 38 | 39 | INSTALLED_APPS = [ 40 | "django.contrib.admin", 41 | "django.contrib.auth", 42 | "django.contrib.contenttypes", 43 | "django.contrib.sessions", 44 | "django.contrib.messages", 45 | "django.contrib.staticfiles", 46 | "llmevaluator", 47 | ] 48 | 49 | MIDDLEWARE = [ 50 | "django.middleware.security.SecurityMiddleware", 51 | "django.contrib.sessions.middleware.SessionMiddleware", 52 | "django.middleware.common.CommonMiddleware", 53 | "django.middleware.csrf.CsrfViewMiddleware", 54 | "django.contrib.auth.middleware.AuthenticationMiddleware", 55 | "django.contrib.messages.middleware.MessageMiddleware", 56 | "django.middleware.clickjacking.XFrameOptionsMiddleware", 57 | ] 58 | 59 | ROOT_URLCONF = "eval_platform.urls" 60 | 61 | TEMPLATES = [ 62 | { 63 | "BACKEND": "django.template.backends.django.DjangoTemplates", 64 | "DIRS": ["templates"], 65 | "APP_DIRS": True, 66 | "OPTIONS": { 67 | "context_processors": [ 68 | "django.template.context_processors.debug", 69 | "django.template.context_processors.request", 70 | "django.contrib.auth.context_processors.auth", 71 | "django.contrib.messages.context_processors.messages", 72 | ], 73 | }, 74 | }, 75 | ] 76 | 77 | WSGI_APPLICATION = "eval_platform.wsgi.application" 78 | 79 | 80 | # Database 81 | # https://docs.djangoproject.com/en/4.2/ref/settings/#databases 82 | 83 | DATABASES = { 84 | "default": { 85 | "ENGINE": "django.db.backends.sqlite3", 86 | "NAME": BASE_DIR / "db.sqlite3", 87 | } 88 | } 89 | 90 | 91 | # Password validation 92 | # https://docs.djangoproject.com/en/4.2/ref/settings/#auth-password-validators 93 | 94 | AUTH_PASSWORD_VALIDATORS = [ 95 | { 96 | "NAME": "django.contrib.auth.password_validation.UserAttributeSimilarityValidator", 97 | }, 98 | { 99 | "NAME": "django.contrib.auth.password_validation.MinimumLengthValidator", 100 | }, 101 | { 102 | "NAME": "django.contrib.auth.password_validation.CommonPasswordValidator", 103 | }, 104 | { 105 | "NAME": "django.contrib.auth.password_validation.NumericPasswordValidator", 106 | }, 107 | ] 108 | 109 | 110 | # Internationalization 111 | # https://docs.djangoproject.com/en/4.2/topics/i18n/ 112 | 113 | LANGUAGE_CODE = "en-us" 114 | 115 | TIME_ZONE = "UTC" 116 | 117 | USE_I18N = True 118 | 119 | USE_TZ = True 120 | 121 | 122 | # Static files (CSS, JavaScript, Images) 123 | # https://docs.djangoproject.com/en/4.2/howto/static-files/ 124 | 125 | STATIC_URL = "static/" 126 | 127 | STATICFILES_DIRS = [ 128 | BASE_DIR / "static", 129 | ] 130 | 131 | # Default primary key field type 132 | # https://docs.djangoproject.com/en/4.2/ref/settings/#default-auto-field 133 | 134 | DEFAULT_AUTO_FIELD = "django.db.models.BigAutoField" 135 | -------------------------------------------------------------------------------- /demos-and-products/eval_platform/eval_platform/urls.py: -------------------------------------------------------------------------------- 1 | """ 2 | URL configuration for eval_platform project. 3 | 4 | The `urlpatterns` list routes URLs to views. For more information please see: 5 | https://docs.djangoproject.com/en/4.2/topics/http/urls/ 6 | Examples: 7 | Function views 8 | 1. Add an import: from my_app import views 9 | 2. Add a URL to urlpatterns: path('', views.home, name='home') 10 | Class-based views 11 | 1. Add an import: from other_app.views import Home 12 | 2. Add a URL to urlpatterns: path('', Home.as_view(), name='home') 13 | Including another URLconf 14 | 1. Import the include() function: from django.urls import include, path 15 | 2. Add a URL to urlpatterns: path('blog/', include('blog.urls')) 16 | """ 17 | from django.contrib import admin 18 | from django.urls import path 19 | from django.views.generic import TemplateView 20 | 21 | import llmevaluator.views as lv 22 | 23 | urlpatterns = [ 24 | path("admin/", admin.site.urls), 25 | path("", lv.review_jobs), 26 | path( 27 | "import", 28 | TemplateView.as_view( 29 | template_name="create.html", 30 | extra_context={"contenttitle": "Import Chat via JSON"}, 31 | ), 32 | ), 33 | path( 34 | "about", 35 | TemplateView.as_view( 36 | template_name="aboutus.html", 37 | extra_context={"contenttitle": "About Us"}, 38 | ), 39 | ), 40 | path("create_save_ma", lv.createMessageArray), 41 | path("create_save_ma_json", lv.createMessageArrayJson), 42 | path("groups", lv.list_groups), 43 | path("create_group_csv", lv.createGroupFromCSV), 44 | path("jobs", lv.list_jobs), 45 | path("create_job", lv.createJob), 46 | path("chats", lv.get_chats, name="list_chats"), 47 | path("view_chat/", lv.view_chat, name="view_chat"), 48 | path("view_chat", lv.view_chat_new), 49 | path("update_title_via_post", lv.update_title_via_post), 50 | path("overwrite_chat", lv.overwrite_chat), 51 | path("delete_chat/", lv.delete_chat), 52 | ] 53 | -------------------------------------------------------------------------------- /demos-and-products/eval_platform/eval_platform/wsgi.py: -------------------------------------------------------------------------------- 1 | """ 2 | WSGI config for eval_platform project. 3 | 4 | It exposes the WSGI callable as a module-level variable named ``application``. 5 | 6 | For more information on this file, see 7 | https://docs.djangoproject.com/en/4.2/howto/deployment/wsgi/ 8 | """ 9 | 10 | import os 11 | 12 | from django.core.wsgi import get_wsgi_application 13 | 14 | os.environ.setdefault("DJANGO_SETTINGS_MODULE", "eval_platform.settings") 15 | 16 | application = get_wsgi_application() 17 | -------------------------------------------------------------------------------- /demos-and-products/eval_platform/llmevaluator/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wgryc/phasellm/974d026dc649e4a71da4c25bf8c934622e56cf5d/demos-and-products/eval_platform/llmevaluator/__init__.py -------------------------------------------------------------------------------- /demos-and-products/eval_platform/llmevaluator/admin.py: -------------------------------------------------------------------------------- 1 | from django.contrib import admin 2 | 3 | from .models import ChatBotMessageArray, MessageCollection, BatchLLMJob 4 | 5 | admin.site.register(ChatBotMessageArray) 6 | admin.site.register(MessageCollection) 7 | admin.site.register(BatchLLMJob) 8 | -------------------------------------------------------------------------------- /demos-and-products/eval_platform/llmevaluator/apps.py: -------------------------------------------------------------------------------- 1 | from django.apps import AppConfig 2 | 3 | 4 | class LlmevaluatorConfig(AppConfig): 5 | default_auto_field = "django.db.models.BigAutoField" 6 | name = "llmevaluator" 7 | -------------------------------------------------------------------------------- /demos-and-products/eval_platform/llmevaluator/management/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wgryc/phasellm/974d026dc649e4a71da4c25bf8c934622e56cf5d/demos-and-products/eval_platform/llmevaluator/management/__init__.py -------------------------------------------------------------------------------- /demos-and-products/eval_platform/llmevaluator/management/commands/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wgryc/phasellm/974d026dc649e4a71da4c25bf8c934622e56cf5d/demos-and-products/eval_platform/llmevaluator/management/commands/__init__.py -------------------------------------------------------------------------------- /demos-and-products/eval_platform/llmevaluator/management/commands/runjobs.py: -------------------------------------------------------------------------------- 1 | from django.core.management.base import BaseCommand 2 | 3 | from llmevaluator.models import * 4 | 5 | from django.conf import settings 6 | from phasellm.llms import OpenAIGPTWrapper, ChatBot 7 | 8 | 9 | # Returns the new ChatBotMessageArray ID 10 | def run_llm_task_and_save( 11 | message_array, 12 | user_message, 13 | job_id, 14 | original_title="Untitled", 15 | model="gpt-4", 16 | temperature=0.7, 17 | print_response=True, 18 | new_system_prompt=None, 19 | resend_last_user_message=False, 20 | ): 21 | o = OpenAIGPTWrapper(settings.OPENAI_API_KEY, model=model, temperature=temperature) 22 | cb = ChatBot(o, "") 23 | 24 | # If we want to resend the last user message *and* provide a new user message, then we'll have to ignore one of those options 25 | assert not (resend_last_user_message == True and len(user_message) > 0) 26 | 27 | ma_copy = message_array.copy() 28 | if new_system_prompt is not None: 29 | if len(new_system_prompt.strip()) > 0: 30 | # If the first message is not a system prompt, then error out. 31 | assert ma_copy[0]["role"] == "system" 32 | ma_copy[0]["content"] = new_system_prompt 33 | 34 | cb.messages = ma_copy 35 | 36 | if resend_last_user_message: 37 | response = cb.resend() 38 | else: 39 | response = cb.chat(user_message) 40 | 41 | new_cbma = ChatBotMessageArray( 42 | message_array=cb.messages, 43 | source_batch_job_id=job_id, 44 | title=f"{original_title} w/ T={temperature}, model={model}", 45 | ) 46 | 47 | new_cbma.llm_temperature = temperature 48 | new_cbma.llm_model = model 49 | 50 | new_cbma.save() 51 | 52 | if print_response: 53 | print(response) 54 | 55 | return new_cbma 56 | 57 | 58 | def run_job(job): 59 | print(f"Starting job: {job.title}") 60 | 61 | mc = MessageCollection.objects.get(id=job.message_collection_id) 62 | chat_ids_string = mc.chat_ids 63 | chat_ids = chat_ids_string.strip().split(",") 64 | 65 | results_ids = [] 66 | results_to_append = [] 67 | 68 | for _cid in chat_ids: 69 | print(f"Analyzing chat ID: {_cid}") 70 | 71 | cid = int(_cid) 72 | cbma = ChatBotMessageArray.objects.get(id=cid) 73 | 74 | # SETTING: run_n_times 75 | run_n_times = job.run_n_times 76 | for i in range(0, run_n_times): 77 | # SETTING: include_gpt_4 78 | if job.include_gpt_4: 79 | if job.temperature_range: 80 | for t in [0.25, 0.75, 1.25]: 81 | nc = run_llm_task_and_save( 82 | cbma.message_array.copy(), 83 | job.user_message, 84 | job.id, 85 | cbma.title, 86 | model="gpt-4", 87 | temperature=t, 88 | new_system_prompt=job.new_system_prompt, 89 | resend_last_user_message=job.resend_last_user_message, 90 | ) 91 | results_ids.append(str(nc.id)) 92 | results_to_append.append(nc) 93 | else: 94 | nc = run_llm_task_and_save( 95 | cbma.message_array.copy(), 96 | job.user_message, 97 | job.id, 98 | cbma.title, 99 | "gpt-4", 100 | new_system_prompt=job.new_system_prompt, 101 | resend_last_user_message=job.resend_last_user_message, 102 | ) 103 | results_ids.append(str(nc.id)) 104 | results_to_append.append(nc) 105 | 106 | # SETTING: include_gpt_35 107 | if job.include_gpt_35: 108 | if job.temperature_range: 109 | for t in [0.25, 0.75, 1.25]: 110 | nc = run_llm_task_and_save( 111 | cbma.message_array.copy(), 112 | job.user_message, 113 | job.id, 114 | cbma.title, 115 | model="gpt-3.5-turbo", 116 | temperature=t, 117 | new_system_prompt=job.new_system_prompt, 118 | resend_last_user_message=job.resend_last_user_message, 119 | ) 120 | results_ids.append(str(nc.id)) 121 | results_to_append.append(nc) 122 | else: 123 | nc = run_llm_task_and_save( 124 | cbma.message_array.copy(), 125 | job.user_message, 126 | job.id, 127 | cbma.title, 128 | "gpt-3.5-turbo", 129 | new_system_prompt=job.new_system_prompt, 130 | resend_last_user_message=job.resend_last_user_message, 131 | ) 132 | results_ids.append(str(nc.id)) 133 | results_to_append.append(nc) 134 | 135 | new_chats_str = ",".join(results_ids) 136 | results_mc = MessageCollection( 137 | title=f"Results from '{job.title}' job", 138 | chat_ids=new_chats_str, 139 | source_collection_id=mc.id, 140 | source_batch_job_id=job.id, 141 | ) 142 | results_mc.save() 143 | 144 | for r in results_to_append: 145 | results_mc.chats.add(r) 146 | results_mc.save() 147 | 148 | job.status = "complete" 149 | job.results_array = results_mc 150 | job.save() 151 | 152 | print("Done!") 153 | 154 | 155 | class Command(BaseCommand): 156 | help = "Runs all scheduled batch jobs." 157 | 158 | def handle(self, *args, **options): 159 | jobs = BatchLLMJob.objects.filter(status="scheduled") 160 | for job in jobs: 161 | run_job(job) 162 | -------------------------------------------------------------------------------- /demos-and-products/eval_platform/llmevaluator/migrations/0001_initial.py: -------------------------------------------------------------------------------- 1 | # Generated by Django 4.2 on 2023-09-24 16:39 2 | 3 | from django.db import migrations, models 4 | 5 | 6 | class Migration(migrations.Migration): 7 | initial = True 8 | 9 | dependencies = [] 10 | 11 | operations = [ 12 | migrations.CreateModel( 13 | name="ChatBotMessageArray", 14 | fields=[ 15 | ( 16 | "id", 17 | models.BigAutoField( 18 | auto_created=True, 19 | primary_key=True, 20 | serialize=False, 21 | verbose_name="ID", 22 | ), 23 | ), 24 | ("created_at", models.DateTimeField(auto_now_add=True)), 25 | ("updated_at", models.DateTimeField(auto_now=True)), 26 | ("message_array", models.JSONField(default=dict)), 27 | ("comments", models.TextField(blank=True, default="", null=True)), 28 | ], 29 | ), 30 | migrations.CreateModel( 31 | name="MessageCollection", 32 | fields=[ 33 | ( 34 | "id", 35 | models.BigAutoField( 36 | auto_created=True, 37 | primary_key=True, 38 | serialize=False, 39 | verbose_name="ID", 40 | ), 41 | ), 42 | ("created_at", models.DateTimeField(auto_now_add=True)), 43 | ("updated_at", models.DateTimeField(auto_now=True)), 44 | ("title", models.TextField(blank=True, default="", null=True)), 45 | ("chat_ids", models.TextField(blank=True, default="", null=True)), 46 | ], 47 | ), 48 | ] 49 | -------------------------------------------------------------------------------- /demos-and-products/eval_platform/llmevaluator/migrations/0002_batchllmjob.py: -------------------------------------------------------------------------------- 1 | # Generated by Django 4.2 on 2023-09-26 18:50 2 | 3 | from django.db import migrations, models 4 | 5 | 6 | class Migration(migrations.Migration): 7 | dependencies = [ 8 | ("llmevaluator", "0001_initial"), 9 | ] 10 | 11 | operations = [ 12 | migrations.CreateModel( 13 | name="BatchLLMJob", 14 | fields=[ 15 | ( 16 | "id", 17 | models.BigAutoField( 18 | auto_created=True, 19 | primary_key=True, 20 | serialize=False, 21 | verbose_name="ID", 22 | ), 23 | ), 24 | ("created_at", models.DateTimeField(auto_now_add=True)), 25 | ("updated_at", models.DateTimeField(auto_now=True)), 26 | ("title", models.TextField(blank=True, default="", null=True)), 27 | ("message_collection_id", models.IntegerField()), 28 | ("user_message", models.TextField(blank=True, default="", null=True)), 29 | ( 30 | "status", 31 | models.TextField(blank=True, default="scheduled", null=True), 32 | ), 33 | ], 34 | ), 35 | ] 36 | -------------------------------------------------------------------------------- /demos-and-products/eval_platform/llmevaluator/migrations/0003_chatbotmessagearray_source_batch_job_id_and_more.py: -------------------------------------------------------------------------------- 1 | # Generated by Django 4.2 on 2023-09-28 14:54 2 | 3 | from django.db import migrations, models 4 | 5 | 6 | class Migration(migrations.Migration): 7 | dependencies = [ 8 | ("llmevaluator", "0002_batchllmjob"), 9 | ] 10 | 11 | operations = [ 12 | migrations.AddField( 13 | model_name="chatbotmessagearray", 14 | name="source_batch_job_id", 15 | field=models.IntegerField(null=True), 16 | ), 17 | migrations.AddField( 18 | model_name="messagecollection", 19 | name="source_batch_job_id", 20 | field=models.IntegerField(null=True), 21 | ), 22 | migrations.AddField( 23 | model_name="messagecollection", 24 | name="source_collection_id", 25 | field=models.IntegerField(null=True), 26 | ), 27 | ] 28 | -------------------------------------------------------------------------------- /demos-and-products/eval_platform/llmevaluator/migrations/0004_alter_chatbotmessagearray_message_array.py: -------------------------------------------------------------------------------- 1 | # Generated by Django 4.2 on 2023-09-28 19:42 2 | 3 | import django.core.serializers.json 4 | from django.db import migrations, models 5 | 6 | 7 | class Migration(migrations.Migration): 8 | dependencies = [ 9 | ("llmevaluator", "0003_chatbotmessagearray_source_batch_job_id_and_more"), 10 | ] 11 | 12 | operations = [ 13 | migrations.AlterField( 14 | model_name="chatbotmessagearray", 15 | name="message_array", 16 | field=models.JSONField( 17 | default=dict, encoder=django.core.serializers.json.DjangoJSONEncoder 18 | ), 19 | ), 20 | ] 21 | -------------------------------------------------------------------------------- /demos-and-products/eval_platform/llmevaluator/migrations/0005_alter_chatbotmessagearray_source_batch_job_id_and_more.py: -------------------------------------------------------------------------------- 1 | # Generated by Django 4.2 on 2023-09-29 15:22 2 | 3 | from django.db import migrations, models 4 | 5 | 6 | class Migration(migrations.Migration): 7 | dependencies = [ 8 | ("llmevaluator", "0004_alter_chatbotmessagearray_message_array"), 9 | ] 10 | 11 | operations = [ 12 | migrations.AlterField( 13 | model_name="chatbotmessagearray", 14 | name="source_batch_job_id", 15 | field=models.IntegerField(blank=True, null=True), 16 | ), 17 | migrations.AlterField( 18 | model_name="messagecollection", 19 | name="source_batch_job_id", 20 | field=models.IntegerField(blank=True, null=True), 21 | ), 22 | migrations.AlterField( 23 | model_name="messagecollection", 24 | name="source_collection_id", 25 | field=models.IntegerField(blank=True, null=True), 26 | ), 27 | ] 28 | -------------------------------------------------------------------------------- /demos-and-products/eval_platform/llmevaluator/migrations/0006_batchllmjob_tags_chatbotmessagearray_tags_and_more.py: -------------------------------------------------------------------------------- 1 | # Generated by Django 4.2 on 2023-09-29 18:52 2 | 3 | from django.db import migrations, models 4 | 5 | 6 | class Migration(migrations.Migration): 7 | dependencies = [ 8 | ("llmevaluator", "0005_alter_chatbotmessagearray_source_batch_job_id_and_more"), 9 | ] 10 | 11 | operations = [ 12 | migrations.AddField( 13 | model_name="batchllmjob", 14 | name="tags", 15 | field=models.TextField(blank=True, default="", null=True), 16 | ), 17 | migrations.AddField( 18 | model_name="chatbotmessagearray", 19 | name="tags", 20 | field=models.TextField(blank=True, default="", null=True), 21 | ), 22 | migrations.AddField( 23 | model_name="messagecollection", 24 | name="tags", 25 | field=models.TextField(blank=True, default="", null=True), 26 | ), 27 | ] 28 | -------------------------------------------------------------------------------- /demos-and-products/eval_platform/llmevaluator/migrations/0007_chatbotmessagearray_title.py: -------------------------------------------------------------------------------- 1 | # Generated by Django 4.2 on 2023-09-30 16:34 2 | 3 | from django.db import migrations, models 4 | 5 | 6 | class Migration(migrations.Migration): 7 | dependencies = [ 8 | ("llmevaluator", "0006_batchllmjob_tags_chatbotmessagearray_tags_and_more"), 9 | ] 10 | 11 | operations = [ 12 | migrations.AddField( 13 | model_name="chatbotmessagearray", 14 | name="title", 15 | field=models.TextField(blank=True, default="Untitled"), 16 | ), 17 | ] 18 | -------------------------------------------------------------------------------- /demos-and-products/eval_platform/llmevaluator/migrations/0008_batchllmjob_include_gpt_35_batchllmjob_include_gpt_4_and_more.py: -------------------------------------------------------------------------------- 1 | # Generated by Django 4.2 on 2023-10-09 13:37 2 | 3 | from django.db import migrations, models 4 | 5 | 6 | class Migration(migrations.Migration): 7 | dependencies = [ 8 | ("llmevaluator", "0007_chatbotmessagearray_title"), 9 | ] 10 | 11 | operations = [ 12 | migrations.AddField( 13 | model_name="batchllmjob", 14 | name="include_gpt_35", 15 | field=models.BooleanField(default=False), 16 | ), 17 | migrations.AddField( 18 | model_name="batchllmjob", 19 | name="include_gpt_4", 20 | field=models.BooleanField(default=True), 21 | ), 22 | migrations.AddField( 23 | model_name="batchllmjob", 24 | name="run_n_times", 25 | field=models.IntegerField(default=1), 26 | ), 27 | migrations.AddField( 28 | model_name="batchllmjob", 29 | name="temperature_range", 30 | field=models.BooleanField(default=False), 31 | ), 32 | ] 33 | -------------------------------------------------------------------------------- /demos-and-products/eval_platform/llmevaluator/migrations/0009_batchllmjob_new_system_prompt_and_more.py: -------------------------------------------------------------------------------- 1 | # Generated by Django 4.2 on 2023-10-10 16:23 2 | 3 | from django.db import migrations, models 4 | 5 | 6 | class Migration(migrations.Migration): 7 | dependencies = [ 8 | ( 9 | "llmevaluator", 10 | "0008_batchllmjob_include_gpt_35_batchllmjob_include_gpt_4_and_more", 11 | ), 12 | ] 13 | 14 | operations = [ 15 | migrations.AddField( 16 | model_name="batchllmjob", 17 | name="new_system_prompt", 18 | field=models.TextField(blank=True, default="", null=True), 19 | ), 20 | migrations.AddField( 21 | model_name="chatbotmessagearray", 22 | name="llm_model", 23 | field=models.TextField(blank=True, default="None", null=True), 24 | ), 25 | migrations.AddField( 26 | model_name="chatbotmessagearray", 27 | name="llm_temperature", 28 | field=models.FloatField(blank=True, null=True), 29 | ), 30 | ] 31 | -------------------------------------------------------------------------------- /demos-and-products/eval_platform/llmevaluator/migrations/0010_batchllmjob_resend_last_user_message.py: -------------------------------------------------------------------------------- 1 | # Generated by Django 4.2 on 2023-10-11 06:30 2 | 3 | from django.db import migrations, models 4 | 5 | 6 | class Migration(migrations.Migration): 7 | dependencies = [ 8 | ("llmevaluator", "0009_batchllmjob_new_system_prompt_and_more"), 9 | ] 10 | 11 | operations = [ 12 | migrations.AddField( 13 | model_name="batchllmjob", 14 | name="resend_last_user_message", 15 | field=models.BooleanField(default=False), 16 | ), 17 | ] 18 | -------------------------------------------------------------------------------- /demos-and-products/eval_platform/llmevaluator/migrations/0011_batchllmjob_description.py: -------------------------------------------------------------------------------- 1 | # Generated by Django 4.2 on 2023-10-11 10:44 2 | 3 | from django.db import migrations, models 4 | 5 | 6 | class Migration(migrations.Migration): 7 | dependencies = [ 8 | ("llmevaluator", "0010_batchllmjob_resend_last_user_message"), 9 | ] 10 | 11 | operations = [ 12 | migrations.AddField( 13 | model_name="batchllmjob", 14 | name="description", 15 | field=models.TextField(blank=True, null=True), 16 | ), 17 | ] 18 | -------------------------------------------------------------------------------- /demos-and-products/eval_platform/llmevaluator/migrations/0012_batchllmjob_message_collection_ref.py: -------------------------------------------------------------------------------- 1 | # Generated by Django 4.2 on 2023-10-11 10:52 2 | 3 | from django.db import migrations, models 4 | import django.db.models.deletion 5 | 6 | 7 | class Migration(migrations.Migration): 8 | dependencies = [ 9 | ("llmevaluator", "0011_batchllmjob_description"), 10 | ] 11 | 12 | operations = [ 13 | migrations.AddField( 14 | model_name="batchllmjob", 15 | name="message_collection_ref", 16 | field=models.ForeignKey( 17 | null=True, 18 | on_delete=django.db.models.deletion.SET_NULL, 19 | to="llmevaluator.messagecollection", 20 | ), 21 | ), 22 | ] 23 | -------------------------------------------------------------------------------- /demos-and-products/eval_platform/llmevaluator/migrations/0013_batchllmjob_results_array_and_more.py: -------------------------------------------------------------------------------- 1 | # Generated by Django 4.2 on 2023-10-11 11:05 2 | 3 | from django.db import migrations, models 4 | import django.db.models.deletion 5 | 6 | 7 | class Migration(migrations.Migration): 8 | dependencies = [ 9 | ("llmevaluator", "0012_batchllmjob_message_collection_ref"), 10 | ] 11 | 12 | operations = [ 13 | migrations.AddField( 14 | model_name="batchllmjob", 15 | name="results_array", 16 | field=models.ForeignKey( 17 | null=True, 18 | on_delete=django.db.models.deletion.SET_NULL, 19 | related_name="results_collection", 20 | to="llmevaluator.messagecollection", 21 | ), 22 | ), 23 | migrations.AlterField( 24 | model_name="batchllmjob", 25 | name="message_collection_ref", 26 | field=models.ForeignKey( 27 | null=True, 28 | on_delete=django.db.models.deletion.SET_NULL, 29 | related_name="source_messages_collection", 30 | to="llmevaluator.messagecollection", 31 | ), 32 | ), 33 | ] 34 | -------------------------------------------------------------------------------- /demos-and-products/eval_platform/llmevaluator/migrations/0014_messagecollection_chats.py: -------------------------------------------------------------------------------- 1 | # Generated by Django 4.2 on 2023-10-11 13:03 2 | 3 | from django.db import migrations, models 4 | 5 | 6 | class Migration(migrations.Migration): 7 | dependencies = [ 8 | ("llmevaluator", "0013_batchllmjob_results_array_and_more"), 9 | ] 10 | 11 | operations = [ 12 | migrations.AddField( 13 | model_name="messagecollection", 14 | name="chats", 15 | field=models.ManyToManyField( 16 | blank=True, null=True, to="llmevaluator.chatbotmessagearray" 17 | ), 18 | ), 19 | ] 20 | -------------------------------------------------------------------------------- /demos-and-products/eval_platform/llmevaluator/migrations/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wgryc/phasellm/974d026dc649e4a71da4c25bf8c934622e56cf5d/demos-and-products/eval_platform/llmevaluator/migrations/__init__.py -------------------------------------------------------------------------------- /demos-and-products/eval_platform/llmevaluator/models.py: -------------------------------------------------------------------------------- 1 | from django.db import models 2 | from django.core.serializers.json import DjangoJSONEncoder 3 | 4 | 5 | def object_has_tag(model_object, tag_string): 6 | tags = model_object.tags.split(",") 7 | for tag in tags: 8 | if tag.strip() == tag_string: 9 | return True 10 | return False 11 | 12 | 13 | class ChatBotMessageArray(models.Model): 14 | created_at = models.DateTimeField(auto_now_add=True) 15 | updated_at = models.DateTimeField(auto_now=True) 16 | message_array = models.JSONField(default=dict, encoder=DjangoJSONEncoder) 17 | comments = models.TextField(default="", null=True, blank=True) 18 | source_batch_job_id = models.IntegerField(null=True, blank=True) 19 | tags = models.TextField(default="", null=True, blank=True) 20 | title = models.TextField(default="Untitled", blank=True) 21 | 22 | # LLM settings for review, later 23 | llm_model = models.TextField(default="None", blank=True, null=True) 24 | llm_temperature = models.FloatField(null=True, blank=True) 25 | 26 | def __str__(self): 27 | return f"ChatBotMessage (ID {self.id}), {self.title}" 28 | 29 | 30 | class MessageCollection(models.Model): 31 | created_at = models.DateTimeField(auto_now_add=True) 32 | updated_at = models.DateTimeField(auto_now=True) 33 | title = models.TextField(default="", null=True, blank=True) 34 | 35 | # Note: we should use an ArrayField or JSONField or a ManyToManyField if we scale this up. 36 | # However, to keep things very simple and supportable in SQLite, we'll assume the chat_ids are in a comma-separated string for now. We'll do some basic validation when saving via the front-end. 37 | chat_ids = models.TextField(default="", null=True, blank=True) 38 | chats = models.ManyToManyField(ChatBotMessageArray, blank=True) 39 | 40 | # We can save source collections in cases where we have batch jobs run. 41 | source_collection_id = models.IntegerField(null=True, blank=True) 42 | source_batch_job_id = models.IntegerField(null=True, blank=True) 43 | tags = models.TextField(default="", null=True, blank=True) 44 | 45 | def __str__(self): 46 | return f"MessageCollection (ID {self.id}), {self.title}" 47 | 48 | 49 | class BatchLLMJob(models.Model): 50 | created_at = models.DateTimeField(auto_now_add=True) 51 | updated_at = models.DateTimeField(auto_now=True) 52 | title = models.TextField(default="", null=True, blank=True) 53 | description = models.TextField(null=True, blank=True) 54 | message_collection_id = models.IntegerField() 55 | message_collection_ref = models.ForeignKey( 56 | MessageCollection, 57 | on_delete=models.SET_NULL, 58 | null=True, 59 | related_name="source_messages_collection", 60 | ) 61 | results_array = models.ForeignKey( 62 | MessageCollection, 63 | on_delete=models.SET_NULL, 64 | null=True, 65 | related_name="results_collection", 66 | ) 67 | 68 | # scheduled, complete 69 | status = models.TextField(default="scheduled", null=True, blank=True) 70 | tags = models.TextField(default="", null=True, blank=True) 71 | 72 | # settings 73 | # By default we only run the LLM on GPT-4 with a user message. The 74 | # settings below let you do other things. 75 | 76 | # Messages 77 | user_message = models.TextField(default="", null=True, blank=True) 78 | new_system_prompt = models.TextField(default="", null=True, blank=True) 79 | resend_last_user_message = models.BooleanField(default=False) 80 | 81 | # Repeat the run 'n' times 82 | run_n_times = models.IntegerField(default=1) 83 | 84 | # Which LLM models to run 85 | include_gpt_4 = models.BooleanField(default=True) 86 | include_gpt_35 = models.BooleanField(default=False) 87 | 88 | # Run temperature tests; True = run across 0.25 to 1.75 with 0.5 increments 89 | temperature_range = models.BooleanField(default=False) 90 | 91 | def __str__(self): 92 | return f"Batch LLM Job (ID {self.id}), {self.title}" 93 | -------------------------------------------------------------------------------- /demos-and-products/eval_platform/llmevaluator/tests.py: -------------------------------------------------------------------------------- 1 | from django.test import TestCase 2 | 3 | # Create your tests here. 4 | -------------------------------------------------------------------------------- /demos-and-products/eval_platform/manage.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | """Django's command-line utility for administrative tasks.""" 3 | import os 4 | import sys 5 | 6 | 7 | def main(): 8 | """Run administrative tasks.""" 9 | os.environ.setdefault("DJANGO_SETTINGS_MODULE", "eval_platform.settings") 10 | try: 11 | from django.core.management import execute_from_command_line 12 | except ImportError as exc: 13 | raise ImportError( 14 | "Couldn't import Django. Are you sure it's installed and " 15 | "available on your PYTHONPATH environment variable? Did you " 16 | "forget to activate a virtual environment?" 17 | ) from exc 18 | execute_from_command_line(sys.argv) 19 | 20 | 21 | if __name__ == "__main__": 22 | main() 23 | -------------------------------------------------------------------------------- /demos-and-products/eval_platform/readme.md: -------------------------------------------------------------------------------- 1 | # PhaseLLM Evaluation 2 | 3 | *PhaseLLM Evaluation* helps you run batch jobs across LLMs. Think of it as a playground where you can easily run multiple LLM calls across different models. 4 | 5 | Example use cases: 6 | - Run the same set of messages `n` times to see how responses differ. 7 | - Run messages across different models (e.g., GPT-4 and GPT-3.5) to see performance differences. 8 | - Replace or update system prompts across multiple chats to see if they have an impact on responses. 9 | 10 | [5-minute demo below:](https://www.youtube.com/watch?v=Ycu2eKkCO7Y) 11 | [![PhaseLLM Evaluation screenshot](screenshot.png)](https://www.youtube.com/watch?v=Ycu2eKkCO7Y) 12 | 13 | ## Installation and Running 14 | 15 | Please follow the step below to run *PhaseLLM Evaluation*. 16 | 17 | Run the code below in the `eval_platform` directory. 18 | 19 | ```bash 20 | pip3 install -r requirements.txt 21 | python3 manage.py migrate 22 | ``` 23 | 24 | The code above will install `phasellm` and `Django`, and set up the relevant SQLite database. 25 | 26 | Update the `env_template.txt` file with your OpenAI API key and save it to `.env`. 27 | 28 | Finally, to run the server, type the following: 29 | ```bash 30 | python3 manage.py runserver 31 | ``` 32 | 33 | You'll then be able to navigate to `http://localhost:8000` and run your evaluations. 34 | 35 | ## Running Batch Jobs 36 | 37 | Once you've created the proper chats, chat groups, and jobs, open a second terminal window and type the following in your `eval_platform` directory: 38 | 39 | ```bash 40 | python3 manage.py runjobs 41 | ``` 42 | 43 | This is a custom Django job that will run your jobs. The outputs will be printed in the terminal, btu will also be saved in the front-end. 44 | 45 | ## Hosting 46 | 47 | Want us to host the *Evaluation* demo product for you? Please reach out to us at w [at] phaseai [dot] com 48 | 49 | ## Feedback? 50 | 51 | Any feedback is welcome. Please reach out to w [at] phaseai [dot] com and we'll get back to you as soon as we can! 52 | -------------------------------------------------------------------------------- /demos-and-products/eval_platform/requirements.txt: -------------------------------------------------------------------------------- 1 | Django==4.2 2 | phasellm>=0.0.17,<0.1.0 3 | -------------------------------------------------------------------------------- /demos-and-products/eval_platform/screenshot.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wgryc/phasellm/974d026dc649e4a71da4c25bf8c934622e56cf5d/demos-and-products/eval_platform/screenshot.png -------------------------------------------------------------------------------- /demos-and-products/eval_platform/static/main.css: -------------------------------------------------------------------------------- 1 | /** 2 | * DEFAULT AND UNIVERSAL VALUES 3 | */ 4 | 5 | * { 6 | margin: 0; 7 | padding: 0; 8 | font-family: 'Open Sans', sans-serif; 9 | font-weight: 200; 10 | font-size: 15px; 11 | box-sizing: border-box; 12 | } 13 | 14 | :root { 15 | --standard-margin-spacing-text: 20px; 16 | --internal-standard-padding: 5px; 17 | --standard-border-radius: 5px; 18 | --lightgray-borders-backgrounds: rgb(235, 235, 235); 19 | } 20 | 21 | b { 22 | font-weight: 900; 23 | } 24 | 25 | ul { 26 | margin-left: var(--standard-margin-spacing-text); 27 | } 28 | 29 | a { 30 | text-decoration: none; 31 | color: #4682B4; 32 | font-weight: 500; 33 | } 34 | 35 | /** 36 | * TWO COLUMN CONTAINER TEST 37 | */ 38 | 39 | .two-col-content-container { 40 | display: grid; 41 | grid-template-columns: 50% 1fr; 42 | column-gap: calc(3*var(--internal-standard-padding)); 43 | height: 100%; 44 | overflow: hidden; 45 | } 46 | 47 | .two-col-content-left { 48 | background-color: white; 49 | overflow: auto; 50 | padding: calc(2*var(--internal-standard-padding)); 51 | } 52 | 53 | .two-col-content-right { 54 | background-color: white; 55 | overflow: auto; 56 | padding: calc(2*var(--internal-standard-padding)); 57 | } 58 | 59 | /** 60 | * TWO ROW CONTAINER TEST 61 | */ 62 | 63 | .two-row-container { 64 | display: grid; 65 | grid-template-rows: auto auto; 66 | row-gap: var(--internal-standard-padding); 67 | height: 100%; 68 | } 69 | 70 | .two-row-top-row { 71 | background-color: white; 72 | padding-bottom: calc(3*var(--internal-standard-padding)); 73 | border-bottom: 1px solid var(--lightgray-borders-backgrounds); 74 | } 75 | 76 | .two-row-bottom-row { 77 | background-color: white; 78 | } 79 | 80 | /** 81 | * EVERYTHING ELSE 82 | */ 83 | 84 | .two-col-container { 85 | display: grid; 86 | grid-template-columns: 200px 1fr; 87 | } 88 | 89 | #left-menu { 90 | height: 100vh; 91 | background-color: white; 92 | overflow: hidden; 93 | padding: 15px; 94 | border-right: 1px solid lightgray; 95 | } 96 | 97 | #navlogo { 98 | font-family: 'Playfair Display', serif; 99 | font-size: 25px; 100 | font-weight: 300; 101 | letter-spacing: 1px; 102 | display: block; 103 | } 104 | 105 | #navlogo_sub { 106 | font-family: 'Playfair Display', serif; 107 | font-size: 15px; 108 | font-weight: 600; 109 | letter-spacing: 2px; 110 | display: block; 111 | color: gray; 112 | } 113 | 114 | .navlink { 115 | display: block; 116 | margin-top: 10px; 117 | cursor: pointer; 118 | text-decoration: none; 119 | color: black; 120 | font-weight: 100; 121 | } 122 | 123 | .navlink:first-of-type { 124 | margin-top: 25px; 125 | } 126 | 127 | .navlink .navicon { 128 | margin-right: 10px; 129 | } 130 | 131 | #main-content { 132 | height: 100vh; 133 | background-color: white; 134 | overflow: auto; 135 | padding: 15px; 136 | } 137 | 138 | .content-title { 139 | font-weight: 200; 140 | font-size: 25px; 141 | padding-bottom: var(--standard-margin-spacing-text); 142 | } 143 | 144 | input, 145 | textarea, 146 | .formfield { 147 | margin: 0 0 var(--standard-margin-spacing-text) 0; 148 | padding: var(--internal-standard-padding); 149 | border-radius: var(--standard-border-radius); 150 | border: 1px solid lightgray; 151 | } 152 | 153 | .formfield-hover { 154 | cursor: pointer; 155 | } 156 | 157 | .formfield-hover:hover { 158 | background-color: var(--lightgray-borders-backgrounds); 159 | } 160 | 161 | .error_message { 162 | color: crimson; 163 | } 164 | 165 | .job_info_container { 166 | padding: calc(2*var(--internal-standard-padding)); 167 | border-radius: var(--standard-border-radius); 168 | background-color: var(--lightgray-borders-backgrounds); 169 | margin: var(--internal-standard-padding) 0 var(--internal-standard-padding) 0; 170 | display: grid; 171 | grid-template-columns: 33% 33% 1fr; 172 | } 173 | 174 | .job_info_container div { 175 | margin-right: var(--internal-standard-padding); 176 | } 177 | 178 | .jobtitle { 179 | font-weight: 900; 180 | margin: calc(2*var(--internal-standard-padding)) 0 calc(2*var(--internal-standard-padding)) 0; 181 | } 182 | 183 | .general-list-container { 184 | padding: calc(2*var(--internal-standard-padding)); 185 | border-radius: var(--standard-border-radius); 186 | background-color: var(--lightgray-borders-backgrounds); 187 | margin: var(--internal-standard-padding) 0 var(--internal-standard-padding) 0; 188 | } 189 | 190 | .tag-label-green { 191 | display: inline-block; 192 | padding: var(--internal-standard-padding); 193 | border-radius: var(--standard-border-radius); 194 | background-color: #2E8B57; 195 | color: white; 196 | font-weight: 600; 197 | font-size: 12px; 198 | } 199 | 200 | .tag-label-blue { 201 | display: inline-block; 202 | padding: var(--internal-standard-padding); 203 | border-radius: var(--standard-border-radius); 204 | background-color: #4682B4; 205 | color: white; 206 | font-weight: 600; 207 | font-size: 12px; 208 | } 209 | 210 | .delete-icon { 211 | margin: 0 var(--internal-standard-padding) 0 var(--internal-standard-padding); 212 | cursor: pointer; 213 | } -------------------------------------------------------------------------------- /demos-and-products/eval_platform/templates/aboutus.html: -------------------------------------------------------------------------------- 1 | {% extends 'base-navigation.html' %} 2 | 3 | {% block bodycontent %} 4 | 5 |

The PhaseLLM Evaluation project is built by Phase AI. You can learn about the PhaseLLM package by visiting phasellm.com. Learn more about Phase AI at phaseai.com.

8 | 9 |

 

10 | 11 |

If you have questions about this LLM evaluation project, you can also email w [at] phaseai [dot] com.

12 | 13 | {% endblock %} -------------------------------------------------------------------------------- /demos-and-products/eval_platform/templates/base-navigation-two-cols.html: -------------------------------------------------------------------------------- 1 | {% extends 'base.html' %} 2 | 3 | {% block content %} 4 | 5 |
6 | 7 |
8 | 9 | EVALUATION 10 | 🏠Home 11 | 💬Chat Reviews 12 | 📖New Chat 13 | 📋Paste JSON 14 | 📚Chat Groups 15 | 🤖Create Job 16 | 👋About Us 17 |
18 |
19 |
20 |
21 |

{{ contenttitle|default:"LLM Evaluator" }}

22 | {% block bodycontent %} 23 | {% endblock %} 24 |
25 |
26 |

{{ contenttitle2 }}

27 | {% block bodycontent2 %} 28 | {% endblock %} 29 |
30 |
31 |
32 | 33 |
34 | 35 | {% endblock %} -------------------------------------------------------------------------------- /demos-and-products/eval_platform/templates/base-navigation-two-rows.html: -------------------------------------------------------------------------------- 1 | {% extends 'base.html' %} 2 | 3 | {% block content %} 4 | 5 |
6 | 7 |
8 | 9 | EVALUATION 10 | 🏠Home 11 | 💬Chat Reviews 12 | 📖New Chat 13 | 📋Paste JSON 14 | 📚Chat Groups 15 | 🤖Create Job 16 | 👋About Us 17 |
18 |
19 |
20 |
21 |

{{ contenttitle|default:"LLM Evaluator" }}

22 | {% block bodycontent %} 23 | {% endblock %} 24 |
25 |
26 |

{{ contenttitle2 }}

27 | {% block bodycontent2 %} 28 | {% endblock %} 29 |
30 |
31 |
32 | 33 |
34 | 35 | {% endblock %} -------------------------------------------------------------------------------- /demos-and-products/eval_platform/templates/base-navigation.html: -------------------------------------------------------------------------------- 1 | {% extends 'base.html' %} 2 | 3 | {% block content %} 4 | 5 |
6 | 7 |
8 | 9 | EVALUATION 10 | 🏠Home 11 | 💬Chat Reviews 12 | 📖New Chat 13 | 📋Paste JSON 14 | 📚Chat Groups 15 | 🤖Create Job 16 | 👋About Us 17 |
18 |
19 |

{{ contenttitle|default:"LLM Evaluator" }}

20 | {% block bodycontent %} 21 | {% endblock %} 22 |
23 | 24 |
25 | 26 | {% endblock %} -------------------------------------------------------------------------------- /demos-and-products/eval_platform/templates/base.html: -------------------------------------------------------------------------------- 1 | {% load static %} 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 15 | 16 | 18 | 19 | 20 | 21 | 22 | {% block title %} 23 | {{ contenttitle|default:"LLM Evaluator" }} 24 | {% endblock %} 25 | 26 | 27 | 28 | 29 | 30 | {% block content %} 31 | 32 | {% endblock %} 33 | 34 | 35 | 36 | -------------------------------------------------------------------------------- /demos-and-products/eval_platform/templates/batch.html: -------------------------------------------------------------------------------- 1 | {% extends 'base-navigation.html' %} 2 | 3 | {% block bodycontent %} 4 | 5 |

6 | 7 |

8 | 9 |

10 | 11 |

12 | 13 |

Advanced Options
14 | Resend Last User Message
15 | Run GPT-4
16 | Run GPT-3.5
17 | Run across temperature = 0.25, 0.75, and 1.25
18 | Number of times to run: 19 |

20 | 21 |

23 | 24 | 25 |

Queue Job

26 | 27 | 76 | 77 | {% endblock %} -------------------------------------------------------------------------------- /demos-and-products/eval_platform/templates/batch_review.html: -------------------------------------------------------------------------------- 1 | {% extends 'base-navigation.html' %} 2 | 3 | {% block bodycontent %} 4 | 5 | {% if jobs %} 6 | 7 | {% for job in jobs %} 8 | 9 |
10 |
11 |

{{ job.title }} 12 | {% if job.status == "complete" %} 13 | complete 14 | {% elif job.status == "scheduled" %} 15 | scheduled 16 | {% endif %} 17 |

18 | {% if job.description %} 19 |

{{ job.description }}

20 | {% else %} 21 |

No Description Provided

22 | {% endif %} 23 |
24 |
25 |

Input Chats

26 | {% if job.message_collection_ref %} 27 |
    28 | {% for chat in job.message_collection_ref.chats.all %} 29 |
  • {{ chat.title }}
  • 30 | {% endfor %} 31 |
32 | {% else %} 33 |

No input chats.

34 | {% endif %} 35 |
36 |
37 |

Generated Chats

38 | {% if job.results_array %} 39 |
    40 | {% for chat in job.results_array.chats.all %} 41 |
  • {{ chat.title }}
  • 42 | {% endfor %} 43 |
44 | {% else %} 45 |

No output chats (yet).

46 | {% endif %} 47 |
48 |
49 | 50 | {% endfor %} 51 | 52 | {% else %} 53 |

No jobs created yet.

54 | 55 |

 

56 | 57 |

If this is your first time using the Evaluation platform, try doing the following!
  58 |

65 |

66 | {% endif %} 67 | 68 | {% endblock %} -------------------------------------------------------------------------------- /demos-and-products/eval_platform/templates/chats.html: -------------------------------------------------------------------------------- 1 | {% extends 'base-navigation.html' %} 2 | 3 | {% block bodycontent %} 4 | 5 |

Please visit /view_chat/chat_id where chat_id is the ID of the chat you want to view.

6 | 7 | {% if all_chats %} 8 | 13 | {% endif %} 14 | 15 | {% endblock %} -------------------------------------------------------------------------------- /demos-and-products/eval_platform/templates/create-group.html: -------------------------------------------------------------------------------- 1 | {% extends 'base-navigation-two-cols.html' %} 2 | 3 | {% block bodycontent2 %} 4 | 5 |

6 |

7 |

Save

8 | 9 | 36 | 37 | 38 | {% endblock %} 39 | 40 | {% block bodycontent %} 41 | {% if all_groups %} 42 | {% for g in all_groups %} 43 |
44 | ID: {{g.id}} {{ g.title }} 45 |
46 | {% endfor %} 47 | {% else %} 48 |

No groups created yet.

49 | {% endif %} 50 | {% endblock %} -------------------------------------------------------------------------------- /demos-and-products/eval_platform/templates/create.html: -------------------------------------------------------------------------------- 1 | {% extends 'base-navigation.html' %} 2 | 3 | {% block bodycontent %} 4 | 5 |

6 |

7 |

Import

8 | 9 | 36 | 37 | {% endblock %} -------------------------------------------------------------------------------- /demos-and-products/eval_platform/templates/view-chat.html: -------------------------------------------------------------------------------- 1 | {% extends 'base-navigation-two-cols.html' %} 2 | 3 | {% block bodycontent %} 4 | {% if all_chats %} 5 | {% for chat in all_chats %} 6 |
7 | ID: {{chat.id}} 8 | {{chat.title }} 9 | 10 |
11 | {% endfor %} 12 | {% endif %} 13 | {% endblock %} 14 | 15 | 16 | {% block bodycontent2 %} 17 | 18 | {% if chat_id == -1 %} 19 |

{{ chat_title }}

20 |

Select a chat to review and edit it.

21 | {% else %} 22 | 23 | {% if error_msg %} 24 |

Error! {{ error_msg | safe}}

25 | {% else %} 26 | 27 |

28 | Save to New Chat 29 | 30 | Overwrite 31 | Delete 32 |

33 | 34 |
Chat Title:
35 | {{ chat_title }} 36 |
37 | 38 |

 

39 | 40 |
41 | 42 |

 

43 | 44 |

45 | + System 46 | + Assistant 47 | + User 48 |

49 | 50 | 185 | {% endif %} 186 | 187 | {% endif %} 188 | 189 | {% endblock %} -------------------------------------------------------------------------------- /demos-and-products/newsbot/README.md: -------------------------------------------------------------------------------- 1 | # NewsBot 2 | 3 | An autonomous news summarizer. You can set this up to execute regularly and it will email you a summary of news articles for a given period, on specific queries or topics. 4 | 5 | ## Installation and Setup 6 | 7 | You need `phasellm` installed; no additional packages need to be installed. However, you do need to have... 8 | 9 | - An OpenAI API key 10 | - A GMail account (we'll use this to send news summaries) 11 | - A newsapi.org API key 12 | 13 | Set up a .env file with the above, as follows: 14 | 15 | ``` 16 | OPENAI_API_KEY= 17 | NEWS_API_API_KEY= 18 | GMAIL_EMAIL= 19 | GMAIL_PASSWORD= 20 | ``` 21 | 22 | Note that you'll likely need to set up an [app password](https://myaccount.google.com/apppasswords) for your GMail account, rather than using your actual password. This is something GMail requires for security purposes (and it's a great idea!). [Learn more here.](https://support.google.com/mail/answer/185833) 23 | 24 | ## Running 25 | 26 | Once you've done the above, simply run `python newsbot.py` and you're good to go! 27 | -------------------------------------------------------------------------------- /demos-and-products/newsbot/newsbot.py: -------------------------------------------------------------------------------- 1 | """ 2 | Sample code for getting a list of news articles, having OpenAI summarize them, and then deploying an email with the summaries. 3 | """ 4 | 5 | from phasellm.agents import EmailSenderAgent, NewsSummaryAgent 6 | from phasellm.llms import OpenAIGPTWrapper, ClaudeWrapper 7 | 8 | queries = ["inflation", "openai", "llm"] # We will generate a summary for each element in the list 9 | 10 | ########################################################################## 11 | # 12 | # ENVIRONMENT VARIABLES (Gmail, News API, etc.) (START) 13 | # Update this to customize your newsbot experience. 14 | # 15 | 16 | import os 17 | from dotenv import load_dotenv 18 | 19 | load_dotenv() 20 | 21 | # Load OpenAI and newsapi.org API keys 22 | openai_api_key = os.getenv("OPENAI_API_KEY") 23 | news_api_api_key = os.getenv("NEWS_API_API_KEY") 24 | 25 | # Load Anthropic API key 26 | anthropic_api_key = os.getenv("ANTHROPIC_API_KEY") 27 | 28 | # Gmail credentials. 29 | gmail_email = os.getenv("GMAIL_EMAIL") 30 | gmail_password = os.getenv("GMAIL_PASSWORD") 31 | 32 | RECIPIENT_EMAIL="" 33 | SENDER_NAME="" 34 | 35 | # 36 | # ENVIRONMENT VARIABLES (END) 37 | # 38 | ########################################################################## 39 | 40 | def getArticlesAndSummarize(news_agent, llm, query, days_back=1, include_description=True, max_articles=30): 41 | """ 42 | See NewsSummaryAgent docs for what the above variables mean. 43 | """ 44 | 45 | # First, we obtain the news articles for the query We limit this to 30 articles going back 1 day. 46 | news_articles = news_agent.getQuery(query, days_back=1, include_descriptions=True, max_articles=max_articles) 47 | 48 | # Set up messages for summarization. 49 | system = "You are a helpful news summarizer. We will provide you with a list of news articles and will ask that you summarize them and retain links to source by adding footnotes. For example, if you have a news article describing XYZ and URL to the article, you would discuss XYZ[1] and add '[1] URL' to the bottom of the message. The footnote numbers should start at [1] and increase consecutively. In other words, footnotes should start at 1, 2, 3, etc. For the actual paragraph, you can reorder reference articles and choose the ones to include as to make the paragraph as informative, pithy, and concise as possible. You can also have multiple footnotes per sentence if this helps tell the story. While you should avoid adding your own commentary in most cases, feel free to do so if it will help the reader understand the context of the paragraph you are writing." 50 | user_prompt = f"The articles below are about '{query}'. Please summarize them into a short paragraph with link retained as per the earlier instructions.\n\n{news_articles}" 51 | messages = [{"role":"system", "content":system}, {"role":"user", "content":user_prompt}] 52 | 53 | news_message = llm.complete_chat(messages) 54 | 55 | return news_message 56 | 57 | # News agent 58 | news_agent = NewsSummaryAgent(news_api_api_key, name="tester agent") 59 | 60 | # OpenAI model, GPT-4. You can use other models, of course. 61 | #llm = OpenAIGPTWrapper(openai_api_key, model="gpt-4") 62 | #MAX_ARTICLES = 30 63 | 64 | # Claude (Anthropic) with 100K tokens. 65 | llm = ClaudeWrapper(anthropic_api_key, model="claude-v1-100k") 66 | MAX_ARTICLES = 100 67 | 68 | news_content = "" 69 | for query in queries: 70 | content = getArticlesAndSummarize(news_agent, llm, query, max_articles=MAX_ARTICLES) 71 | news_content += f"# News for {query}\n\n{content}\n\n" 72 | 73 | # Generate subject line. 74 | news_subject = f"News about: {', '.join(queries)}" 75 | 76 | # Send email. 77 | e = EmailSenderAgent(SENDER_NAME, 'smtp.gmail.com', gmail_email, gmail_password, 587) 78 | e.sendPlainEmail(RECIPIENT_EMAIL, news_subject, news_content) 79 | -------------------------------------------------------------------------------- /demos-and-products/newsbot/newsbot_create.py: -------------------------------------------------------------------------------- 1 | ### IMPORTS 2 | 3 | from phasellm.llms import OpenAIGPTWrapper, ClaudeWrapper, ChatPrompt 4 | from phasellm.agents import NewsSummaryAgent 5 | import json 6 | 7 | ### ENVIRONMENT VARIABLES 8 | 9 | import os 10 | from dotenv import load_dotenv 11 | 12 | load_dotenv() 13 | openai_api_key = os.getenv("OPENAI_API_KEY") 14 | anthropic_api_key = os.getenv("ANTHROPIC_API_KEY") 15 | news_api_api_key = os.getenv("NEWS_API_API_KEY") 16 | 17 | ### SETUP THE EXPERIMENTAL DATA 18 | 19 | queries = ['spacex', 'federal reserve', 'shopify', 'openai', 'biden', 'trump', 'met gala', 'king charles', 'poland', 'euro'] 20 | JSON_FILE = "news_articles.json" 21 | 22 | llm_1 = OpenAIGPTWrapper(openai_api_key, model="gpt-4") 23 | llm_2 = OpenAIGPTWrapper(openai_api_key, model="gpt-4") # ClaudeWrapper(anthropic_api_key) 24 | 25 | chat_prompt_raw_1 = [ 26 | {"role":"system", 27 | "content": "You are a helpful news summarizer. We will provide you with a list of news articles and will ask that you summarize them and retain links to source by adding footnotes. For example, if you have a news article describing XYZ and URL to the article, you would discuss XYZ[1] and add '[1] URL' to the bottom of the message. Note that the footnotes should be counted as of the summary; you do not need to keep the numbers from the earlier order, just from your summary. In other words, footnotes should start at 1, 2, 3, etc..."}, 28 | {"role":"user", 29 | "content": "The articles below are about '{query}'. Please summarize them into a short paragraph with link retained as per the earlier instructions.\n\n{news_articles}"}, 30 | ] 31 | 32 | chat_prompt_raw_2 = [ 33 | {"role":"system", 34 | "content": "You are a helpful news summarizer. We will provide you with a list of news articles and will ask that you summarize them and retain links to source by adding footnotes. For example, if you have a news article describing XYZ and URL to the article, you would discuss XYZ[1] and add '[1] URL' to the bottom of the message. The footnote numbers should start at [1] and increase consecutively. In other words, footnotes should start at 1, 2, 3, etc. For the actual paragraph, you can reorder reference articles and choose the ones to include as to make the paragraph as informative, pithy, and concise as possible. You can also have multiple footnotes per sentence if this helps tell the story. While you should avoid adding your own commentary in most cases, feel free to do so if it will help the reader understand the context of the paragraph you are writing."}, 35 | {"role":"user", 36 | "content": "The articles below are about '{query}'. Please take on the role of an entertaining, successful, AI-driven investigative journalists and summarize them into a short paragraph. Make sure to follow the 'system' instructions.\n\n{news_articles}"}, 37 | ] 38 | 39 | chat_prompt_1 = ChatPrompt(chat_prompt_raw_1) 40 | chat_prompt_2 = ChatPrompt(chat_prompt_raw_2) 41 | 42 | ### DATA HELPERS 43 | 44 | def create_data_set(queries, json_file): 45 | article_dict = {} 46 | news_agent = NewsSummaryAgent(news_api_api_key, name="tester agent") 47 | for query in queries: 48 | news_articles = news_agent.getQuery(query, days_back=1, include_descriptions=True, max_articles=30) 49 | article_dict[query] = {"articles":news_articles} 50 | 51 | update_data_set(article_dict, json_file) 52 | 53 | def update_data_set(dict_obj, json_file): 54 | with open(json_file, 'w') as writer: 55 | writer.write(json.dumps(dict_obj)) 56 | 57 | def load_data_set(json_file): 58 | articles = None 59 | with open(json_file, 'r') as reader: 60 | articles = json.loads(reader.read()) 61 | return articles 62 | 63 | ### RUNNING DATA SET CREATION 64 | 65 | create_data_set(queries, JSON_FILE) 66 | 67 | articles = load_data_set(JSON_FILE) 68 | for query, article_dict in articles.items(): 69 | 70 | print(f"Generating news summary for '{query}'") 71 | 72 | print("... llm_1") 73 | llm_1_completion = llm_1.complete_chat(chat_prompt_1.fill(query=query, news_articles=article_dict['articles'])) 74 | 75 | print("... llm_2") 76 | llm_2_completion = llm_2.complete_chat(chat_prompt_2.fill(query=query, news_articles=article_dict['articles'])) 77 | 78 | # Saving results... 79 | article_dict["llm_1"] = llm_1_completion 80 | article_dict["llm_2"] = llm_2_completion 81 | articles[query] = article_dict 82 | 83 | update_data_set(articles, JSON_FILE) -------------------------------------------------------------------------------- /demos-and-products/newsbot/newsbot_evaluate.py: -------------------------------------------------------------------------------- 1 | from phasellm.eval import EvaluationStream 2 | 3 | import json 4 | 5 | JSON_FILE = "news_articles.json" 6 | 7 | def load_data_set(json_file): 8 | articles = None 9 | with open(json_file, 'r') as reader: 10 | articles = json.loads(reader.read()) 11 | return articles 12 | 13 | articles = load_data_set(JSON_FILE) 14 | 15 | # Note that we don't pass the two LLMs to the Evaluation Stream -- no need to do so in this example. 16 | es = EvaluationStream("Which news summary is higher quality and more engaging?", "You are a helpful news summarizer. We will provide you with a list of news articles and will ask that you summarize them and retain links to source by adding footnotes. For example, if you have a news article describing XYZ and URL to the article, you would discuss XYZ[1] and add '[1] URL' to the bottom of the message. Note that the footnotes should be counted as of the summary; you do not need to keep the numbers from the earlier order, just from your summary. In other words, footnotes should start at 1, 2, 3, etc...", [None, None]) 17 | 18 | for key, article_dict in articles.items(): 19 | r1 = article_dict["llm_1"] 20 | r2 = article_dict["llm_2"] 21 | es.evaluate(r1, r2) 22 | 23 | print(es.prefs) -------------------------------------------------------------------------------- /demos-and-products/newsbot/notes.md: -------------------------------------------------------------------------------- 1 | # Notes on Evaluations 2 | 3 | There's a four-step process to testing these applications: 4 | 1. input data 5 | 2. prompt 6 | 3. execute 7 | 4. evaluate 8 | 9 | We'll begin by very specifically exploring this from the perspective of newsbot.py 10 | 11 | ## Input Data 12 | 13 | In this case, we have the following input data for each query: 14 | (a) A purpose for the news bot. This is basically a higher-level prompt (e.g., system prompt) that stays the same within an experiment but might be optimized or changed across models or experiments. 15 | (b) A query. This is the actual news topic we are asking to summarize. We have multiple queries per experiment. 16 | (c) A list of articles with descriptions and links. This is generated by our agent. 17 | 18 | ## Prompt 19 | 20 | There are two types of prompts, based on what we're doign so far: (1) text completion prompts, and (2) chat prompts. 21 | 22 | A text completion prompt is our traditional approach to generating prompts. You have a set of instructions, and varibales will be replaced as needed (e.g., replace {query} with the topic of interest). 23 | 24 | A chat prompt is different. Since a chat prompt has multiple messages, we might actually need to convert variables across the entire structure of chat. Today, we do not support chat prompts, but will need to do so for the news bot demo. 25 | 26 | ## Execute 27 | 28 | This is the actual model execution loop. In this case, we take the input data and insert it into our prompts. Then we take those prompts and execute against models. We get the results and save them. 29 | 30 | ## Evaluation 31 | 32 | Once all of the above has taken place, we then go ahead and review all the results. We want to do this in a 'blind peer review' approach where we randomize the order of outputs so we do not know which prompt/model combination is which. -------------------------------------------------------------------------------- /demos-and-products/researchllm/README.md: -------------------------------------------------------------------------------- 1 | # ResearchLLM 2 | 3 | An autonomous statistics helper that converts your natural language queries about a data set to insights. 4 | 5 | - Converts natural language questions to Python code 6 | - Runs code locally without sharing data with third parties (just shares metadata) 7 | - Interpets results 8 | - Provide access to underlying Python code for audit and review 9 | 10 | [2-minute demo below:](https://www.youtube.com/watch?v=-fzFCii6UoA) 11 | [![ResearchLLM screenshot](screenshot.png)](https://www.youtube.com/watch?v=-fzFCii6UoA) 12 | 13 | Please note that we originally launched this as *ResearchLLM* and have since renamed the demo to *ResearchLLM*. Apologies for any confusion! 14 | 15 | ## 🚨🚨 WARNING: Runs LLM-Generated Python Code 16 | 17 | This product will run LLM-generated Python code on your computer/server. We highly recommend sandboxing the code or running this on a server that doesn't contain any sensitive information or processes. 18 | 19 | ## Installation and Setup 20 | 21 | ### Installation 22 | 23 | Clone the GitHub repository and navigate to the folder containing this README.md file. Install the relevant packages (including PhaseLLM): 24 | 25 | ``` 26 | pip install -r requirements.txt 27 | ``` 28 | 29 | Next, make sure you edit the `researchllm.py` file to include the proper API keys. You'll find these around line 19: 30 | ```python 31 | ANTHROPIC_API_KEY = os.getenv("ANTHROPIC_API_KEY") 32 | MODEL = ClaudeWrapper(ANTHROPIC_API_KEY) 33 | ``` 34 | 35 | You can change the model type from ClaudeWrapper to other PhaseLLM wrappers. Make sure to update your API key accordingly, either via an environment variable or directly in the code. 36 | 37 | ### Running With Sample Data 38 | 39 | Start a Python REPL (i.e, run `python` in the folder with all the files from this repo) and then type the following: 40 | 41 | ``` 42 | from frontend import * 43 | run() # Or, run('0.0.0.0', 80) for a public server 44 | ``` 45 | 46 | Running `run()` will launch the server on 127.0.0.1:5000 (i.e., the default Flask setting). 47 | 48 | ### Running With Your Own Custom Data 49 | 50 | Running this with your own data only requires a few simple changes to `frontend.py`. Around Line 20, you'll see the following comments: 51 | ```python 52 | ########################################################################## 53 | # 54 | # DATA SET SETUP (START) 55 | # Please review the code below to set up your own data set for analysis. 56 | # 57 | ``` 58 | 59 | All the instructions are there, but we repeat them here for your convenience. You will have to update the two variables below: 60 | ```python 61 | DATA_SETUP_INTRO = "I am researching the relationship between income and sociodemographic census info." 62 | DATA_FILE_LOC = "incomes.csv" 63 | ``` 64 | 65 | `DATA_SETUP_INTRO` should be one short sentence on the context of your data, while `DATA_FILE_LOC` is the location of the file you're loading. 66 | 67 | If you are *not* using a CSV file, you can also load the DataFrame via a few lines down: 68 | ```python 69 | df = pd.read_csv(DATA_FILE_LOC) 70 | ``` 71 | 72 | Replace the line above with your custom loader (e.g., read_excel() or something else). The `df` variable needs to be a Pandas dataframe for this to work. 73 | 74 | ## Sample Data Files and Credits 75 | 76 | The sample data set included in this project and in the demo video is from the 1994 US census. It was put together by Ron Kohavi and is [available on Kaggle](https://www.kaggle.com/datasets/uciml/adult-census-income?select=adult.csv). 77 | 78 | The other data set referenced in our code is [also on Kaggle](https://www.kaggle.com/datasets/new-york-city/nypd-motor-vehicle-collisions), focusing on motor vehicle collisions in New York City. We didn't include it in the repository as it's about 500MB in size. It's a good alternative to the census data above because it contains location data (latitude, longitude pairs), leading to some really interesting analysis options. 79 | -------------------------------------------------------------------------------- /demos-and-products/researchllm/frontend.py: -------------------------------------------------------------------------------- 1 | """ 2 | A Flask frontend for ResearchLLM 3 | 4 | To run, start a Python REPL and in the same directory as this file and run the following: 5 | > from frontend import * 6 | > run() # Or, run('0.0.0.0', 80) 7 | 8 | """ 9 | 10 | from researchllm import * 11 | 12 | from flask import Flask, request, render_template 13 | import pandas as pd 14 | import numpy as np 15 | 16 | APP = Flask(__name__) 17 | 18 | ########################################################################## 19 | # 20 | # DATA SET SETUP (START) 21 | # Please review the code below to set up your own data set for analysis. 22 | # 23 | 24 | # Data set to load and analyze. 25 | DATA_SETUP_INTRO = "I am researching the relationship between income and sociodemographic census info." 26 | DATA_FILE_LOC = "incomes.csv" 27 | 28 | # Another sample we explored. 29 | #DATA_SETUP_INTRO = "I am researching car crashes in NYC." 30 | #DATA_FILE_LOC = "nypd-motor-vehicle-collisions.csv" 31 | 32 | # Want to analyze your own data set? Simply replace the two variables above: 33 | # DATA_SETUP_INTRO = "What are you researching? Please provide a short description. 34 | # DATA_FILE_LOC = "The location of the CSV file." 35 | # Note that you DO NOT have to provide metadata about the CSV file. This gets generated automatically. 36 | 37 | # Loads the CSV file. 38 | # If you want to load another file (e.g., Excel file), replace the code below with the relevant function (e.g., read_excel()). 39 | df = pd.read_csv(DATA_FILE_LOC) 40 | 41 | # Advanced settings 42 | INCLUDE_COL_DESCRIPTION_VALS = True # Choose whether to include sample values in the column descriptions (within the prompt) 43 | MAX_UNIQUES_FOR_DESC = 10 # Number of unique values to show in column description 44 | 45 | # 46 | # DATA SET SETUP (END) 47 | # 48 | ########################################################################## 49 | 50 | def generateOverview(df): 51 | """ 52 | Generates a prompt providing an overview of a data set. This should only be used to generate the initial data prompt for now. 53 | """ 54 | description = "" 55 | for column in df: 56 | col_name = df[column].name 57 | col_type = df[column].dtype 58 | col_description = f"Column Name: {col_name}\nColumn Type: {col_type}" 59 | if col_type == "object": 60 | 61 | # Get unique values for column descriptions. 62 | column_values = df[col_name].values 63 | uniques = list(set(column_values)) 64 | 65 | if INCLUDE_COL_DESCRIPTION_VALS: 66 | if len(uniques) > MAX_UNIQUES_FOR_DESC: 67 | col_description += f"\nSample Values: {str(uniques[0:MAX_UNIQUES_FOR_DESC])}" 68 | else: 69 | col_description += f"\nSample Values: {str(uniques)}" 70 | description += col_description + "\n\n" 71 | return description.strip() 72 | 73 | # The prompt used to set up the entire chat session. This prompt is used regularly for analysis. 74 | base_prompt = f"{DATA_SETUP_INTRO} I have imported Pandas as `pd`, Numpy as `np`, `scipy`, and `sklearn`, and have a dataframe called `df` loaded into Python. `df` contains the following variables and variable types:\n\n" + generateOverview(df) 75 | 76 | # Calls the researchllm.py function to set the current dataframe as the main one for analysis. 77 | set_df(df) 78 | 79 | ########################################################################## 80 | # 81 | # FLASK FUNCTIONS 82 | # Everything below manages the frontend. 83 | # 84 | ########################################################################## 85 | 86 | @APP.route('/get_prompt') 87 | def get_prompt(): 88 | """ 89 | Returns a JSON object with the prompt being passed on to the language model. 90 | """ 91 | return {"status":"ok", "prompt":base_prompt} 92 | 93 | @APP.route('/') 94 | def index(): 95 | """ 96 | Displays the index page accessible at '/' 97 | """ 98 | return render_template('index.html') 99 | 100 | @APP.route("/text_completion", methods = ['POST']) 101 | def analysis(): 102 | """ 103 | Calls the researchllm.py code to request analysis and interpretation thereof. 104 | 105 | See run_analysis(message) in researchllm.py for more information. 106 | """ 107 | text_to_complete = request.json["input"] 108 | new_request = base_prompt + text_to_complete 109 | response_object = run_analysis(new_request) 110 | return {"status":"ok", "content":response_object["interpretation"], "code":response_object["code"], "code_output":response_object["code_output"], "error":response_object["error"]} 111 | 112 | def run(host="127.0.0.1", port=5000): 113 | """ 114 | Launches a local web server for interfacing with PhaseLLM. This is meant to be for testing purposes only. 115 | """ 116 | start_bi_session() 117 | APP.run(host=host, port=port) 118 | -------------------------------------------------------------------------------- /demos-and-products/researchllm/requirements.txt: -------------------------------------------------------------------------------- 1 | phasellm 2 | scikit-learn 3 | pandas 4 | numpy 5 | scipy 6 | statsmodels -------------------------------------------------------------------------------- /demos-and-products/researchllm/screenshot.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wgryc/phasellm/974d026dc649e4a71da4c25bf8c934622e56cf5d/demos-and-products/researchllm/screenshot.png -------------------------------------------------------------------------------- /demos-and-products/web-search-chatbot/demo.py: -------------------------------------------------------------------------------- 1 | import os 2 | from dotenv import load_dotenv 3 | 4 | from phasellm.llms import ClaudeWrapper, ChatBot 5 | from phasellm.agents import WebSearchAgent 6 | 7 | from flask import Flask, request, render_template, jsonify 8 | 9 | load_dotenv() 10 | llm = ClaudeWrapper(os.getenv("ANTHROPIC_API_KEY"), model='claude-2') 11 | web_search_agent = WebSearchAgent( 12 | api_key=os.getenv("GOOGLE_SEARCH_API_KEY") 13 | ) 14 | 15 | CHATBOT: ChatBot 16 | 17 | APP = Flask(__name__) 18 | 19 | 20 | def reset_chatbot(): 21 | """ 22 | Reset the chatbot state. 23 | Returns: 24 | 25 | """ 26 | global CHATBOT 27 | CHATBOT = ChatBot(llm) 28 | return True 29 | 30 | 31 | # Call reset_chatbot() to initialize the chatbot. 32 | reset_chatbot() 33 | 34 | 35 | @APP.route('/submit-chat-message', methods=['POST']) 36 | def route_send_chat(): 37 | try: 38 | global CHATBOT 39 | message = request.json["input"] 40 | 41 | query = CHATBOT.chat( 42 | f'Come up with a google search query that will provide more information to help answer the question: ' 43 | f'"{message}". Respond with only the query.' 44 | ) 45 | print(f'Google search query: {query}') 46 | 47 | # Submit the query to the Google Search Agent. 48 | results = web_search_agent.search_google( 49 | query, 50 | custom_search_engine_id=os.getenv("GOOGLE_SEARCH_ENGINE_ID"), 51 | num=2 52 | ) 53 | 54 | sources = [] 55 | # Add the contents of the top result into the chatbot message queue. 56 | if len(results) >= 1: 57 | for result in results: 58 | CHATBOT.append_message( 59 | role='search result', 60 | message=result.content 61 | ) 62 | sources.append(result.url) 63 | 64 | # Resubmit the message with the new search result as context. 65 | response = CHATBOT.chat(message + '. Answer using the information from the search results above.') 66 | 67 | return {"status": "ok", "content": response, "sources": sources} 68 | except Exception as e: 69 | return {"status": "error", "message": e} 70 | 71 | 72 | @APP.route('/reset-chatbot') 73 | def route_reset_chatbot(): 74 | if reset_chatbot(): 75 | return jsonify({"status": "ok", "message": "ChatBot has been restarted."}) 76 | else: 77 | return jsonify({"status": "error", "message": "ChatBot could not be restarted."}) 78 | 79 | 80 | @APP.route('/') 81 | def route_index(): 82 | 83 | if "reset" in request.args: 84 | if request.args['reset'] == 'true': 85 | reset_chatbot() 86 | 87 | return render_template('index.html') 88 | 89 | 90 | def run(host="127.0.0.1", port=5000): 91 | """ 92 | Launches a local web server for interfacing with PhaseLLM. This is meant to be for testing purposes only. 93 | """ 94 | APP.run(host=host, port=port) 95 | 96 | 97 | MAIN_HOST = "127.0.0.1" 98 | MAIN_PORT = 8000 99 | if __name__ == '__main__': 100 | run(MAIN_HOST, MAIN_PORT) 101 | -------------------------------------------------------------------------------- /demos-and-products/web-search-chatbot/templates/index.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | Web Search Chatbot 4 | 5 | 6 | 7 | 8 | 9 | 81 | 82 | 83 | 84 | 85 |
86 | 87 |
88 | 89 | 90 |
91 |
92 | 93 | 94 |
95 |
96 | 97 | 147 | 148 | 149 | -------------------------------------------------------------------------------- /docs/Makefile: -------------------------------------------------------------------------------- 1 | # Minimal makefile for Sphinx documentation 2 | # 3 | 4 | # You can set these variables from the command line, and also 5 | # from the environment for the first two. 6 | SPHINXOPTS ?= 7 | SPHINXBUILD ?= sphinx-build 8 | SOURCEDIR = source 9 | BUILDDIR = build 10 | 11 | # Put it first so that "make" without argument is like "make help". 12 | help: 13 | @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 14 | 15 | .PHONY: help Makefile 16 | 17 | # Catch-all target: route all unknown targets to Sphinx using the new 18 | # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). 19 | %: Makefile 20 | @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 21 | -------------------------------------------------------------------------------- /docs/README.md: -------------------------------------------------------------------------------- 1 | ### Docs Setup 2 | 3 | 1) Install docs dependencies 4 | ``` 5 | pip install -e .[docs] 6 | ``` 7 | 8 | 2) Run a local docs server 9 | ``` 10 | sphinx-autobuild docs/source/ docs/build/html 11 | ``` 12 | 13 | ### Manual Build 14 | 15 | ``` 16 | cd docs 17 | make html 18 | ``` 19 | 20 | ### Helpful Tools 21 | 22 | * Convert reStructuredText (.rst) to Markdown (.md) 23 | ``` 24 | pip install rst-to-myst[sphinx] 25 | rst2myst convert docs/**/*.rst 26 | ``` 27 | 28 | ### Useful Resources 29 | 30 | * Document Your Scientific Project With Markdown, Sphinx, and Read the Docs | PyData Global 2021 31 | * https://www.sphinx-doc.org/en/master/usage/quickstart.html 32 | * https://www.youtube.com/watch?v=qRSb299awB0 -------------------------------------------------------------------------------- /docs/make.bat: -------------------------------------------------------------------------------- 1 | @ECHO OFF 2 | 3 | pushd %~dp0 4 | 5 | REM Command file for Sphinx documentation 6 | 7 | if "%SPHINXBUILD%" == "" ( 8 | set SPHINXBUILD=sphinx-build 9 | ) 10 | set SOURCEDIR=source 11 | set BUILDDIR=build 12 | 13 | %SPHINXBUILD% >NUL 2>NUL 14 | if errorlevel 9009 ( 15 | echo. 16 | echo.The 'sphinx-build' command was not found. Make sure you have Sphinx 17 | echo.installed, then set the SPHINXBUILD environment variable to point 18 | echo.to the full path of the 'sphinx-build' executable. Alternatively you 19 | echo.may add the Sphinx directory to PATH. 20 | echo. 21 | echo.If you don't have Sphinx installed, grab it from 22 | echo.https://www.sphinx-doc.org/ 23 | exit /b 1 24 | ) 25 | 26 | if "%1" == "" goto help 27 | 28 | %SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% 29 | goto end 30 | 31 | :help 32 | %SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% 33 | 34 | :end 35 | popd 36 | -------------------------------------------------------------------------------- /docs/source/conf.py: -------------------------------------------------------------------------------- 1 | import sys 2 | 3 | sys.path.append('../../') 4 | 5 | from project_metadata import NAME, VERSION, AUTHOR # noqa: E402 6 | 7 | # Configuration file for the Sphinx documentation builder. 8 | # 9 | # For the full list of built-in configuration values, see the documentation: 10 | # https://www.sphinx-doc.org/en/master/usage/configuration.html 11 | 12 | # -- Project information ----------------------------------------------------- 13 | # https://www.sphinx-doc.org/en/master/usage/configuration.html#project-information 14 | 15 | project = NAME 16 | copyright = f'2023, {AUTHOR}' 17 | author = AUTHOR 18 | release = VERSION 19 | 20 | # -- General configuration --------------------------------------------------- 21 | # https://www.sphinx-doc.org/en/master/usage/configuration.html#general-configuration 22 | 23 | # Add paths to the Python source code. 24 | sys.path.append('../../phasellm') 25 | 26 | # Allow markdown files to be used. 27 | extensions = [ 28 | 'myst_parser', 29 | 'autoapi.extension', 30 | 'sphinx.ext.duration', 31 | 'sphinx.ext.autodoc', 32 | 'sphinx.ext.napoleon' 33 | ] 34 | 35 | # Configure autoapi. 36 | autoapi_dirs = ['../../phasellm'] 37 | autoapi_python_class_content = "init" 38 | 39 | templates_path = ['_templates'] 40 | exclude_patterns = [] 41 | 42 | # -- Options for HTML output ------------------------------------------------- 43 | # https://www.sphinx-doc.org/en/master/usage/configuration.html#options-for-html-output 44 | 45 | html_theme = 'furo' 46 | html_static_path = ['_static'] 47 | -------------------------------------------------------------------------------- /docs/source/index.md: -------------------------------------------------------------------------------- 1 | % Phasellm documentation master file, created by 2 | % sphinx-quickstart on Tue Aug 8 15:42:56 2023. 3 | % You can adapt this file completely to your liking, but it should at least 4 | % contain the root `toctree` directive. 5 | 6 | ```{include} ../../README.md 7 | :relative-images: 8 | ``` 9 | 10 | ## Contents 11 | ```{toctree} 12 | :maxdepth: 2 13 | 14 | ``` 15 | -------------------------------------------------------------------------------- /phasellm/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | Welcome to PhaseLLM! 3 | 4 | We are a framework to help you build robust Large Language Model (LLM)-based apps. Please visit our site at phasellm.com for documents, tutorials, and more. 5 | 6 | The module comes with the following submodules: 7 | - agents: components that can execute specific tasks, such as downloading the latest news, executing code, sending an email, and more. 8 | - eval: ways to evaluate LLM and app performance. 9 | - exceptions: classes to track LLM-specific types of exceptions. 10 | - llms: helper classes for dealing with LLMs, including wrappers for popular models, chatbots, and more. 11 | 12 | Have any questions? Reach out at hello (at) phaseai (dot) com 13 | """ -------------------------------------------------------------------------------- /phasellm/configurations_utils.py: -------------------------------------------------------------------------------- 1 | import re 2 | 3 | from warnings import warn 4 | 5 | 6 | def coerce_azure_base_url(url: str) -> str: 7 | """ 8 | This function coerces the base URL to the proper format for the Azure OpenAI API. This is used for backwards 9 | compatibility of base_url and api_base arguments. 10 | Args: 11 | url: The url to coerce. 12 | 13 | Returns: 14 | The coerced URL. 15 | 16 | """ 17 | match = re.match(r'https:\/\/.*\.openai\.azure.com\/openai\/deployments\/.*', url) 18 | if not match: 19 | # Ensure proper format of the base URL. 20 | res = re.search(r'https:\/\/.*\.openai\.azure.com(?!\/openai\/deployments\/)', url) 21 | if res.group(): 22 | # see https://github.com/openai/openai-python/blob/v1/examples/azure.py 23 | warn('The base_url argument must be in the format:' 24 | 'https://{resource}.openai.azure.com/openai/deployments/{model}\n' 25 | 'Attempting to coerce base_url to the proper format.') 26 | url = f"{url[:res.end()]}/openai/deployments{url[res.end():]}" 27 | warn(f'Coerced url to: {url}') 28 | return url 29 | return url 30 | -------------------------------------------------------------------------------- /phasellm/eval.py: -------------------------------------------------------------------------------- 1 | """ 2 | Support for LLM evaluation. 3 | """ 4 | 5 | from typing import Optional, List 6 | 7 | from .llms import OpenAIGPTWrapper, ChatBot 8 | 9 | import pandas as pd 10 | 11 | import random 12 | 13 | 14 | def simulate_n_chat_simulations(chatbot: ChatBot, n: int, out_path_excel: Optional[str] = None) -> List[str]: 15 | """ 16 | Reruns a chat message n times, returning a list of responses. Note that this will query an external API n times, so 17 | please be careful with costs. 18 | 19 | Args: 20 | chatbot: the chat sequence to rerun. The last message will be resent. 21 | n: number of times to run the simulation. 22 | out_path_excel: if provides, the output will also be written to an Excel file. 23 | 24 | Returns: 25 | A list of messages representing the responses in the chat. 26 | 27 | """ 28 | 29 | original_chat_messages = chatbot.messages.copy() 30 | responses = [] 31 | 32 | for i in range(0, n): 33 | r = chatbot.resend() 34 | responses.append(r) 35 | chatbot.messages = original_chat_messages.copy() 36 | 37 | if out_path_excel: 38 | df = pd.DataFrame({'responses': responses}) 39 | df.to_excel(out_path_excel, sheet_name='responses', index=False) 40 | 41 | return responses 42 | 43 | 44 | class BinaryPreference: 45 | 46 | def __init__(self, prompt: str, prompt_vars: str, response1: str, response2: str): 47 | """ 48 | Tracks a prompt, prompt variables, responses, and the calculated preference. 49 | 50 | Args: 51 | prompt: The prompt 52 | prompt_vars: The variables to use in the prompt. 53 | response1: The first response. 54 | response2: The second response. 55 | 56 | """ 57 | self.prompt = prompt 58 | self.prompt_vars = prompt_vars 59 | self.response1 = response1 60 | self.response2 = response2 61 | self.preference = -1 62 | 63 | def __repr__(self): 64 | return "" 65 | 66 | def set_preference(self, pref): 67 | """ 68 | Set the preference of the class. 69 | """ 70 | self.preference = pref 71 | 72 | def get_preference(self): 73 | """ 74 | Get the preference of the class. 75 | """ 76 | return self.preference 77 | 78 | 79 | class EvaluationStream: 80 | 81 | def __init__(self, objective, prompt, models): 82 | """ 83 | Tracks human evaluation on the command line and records results. 84 | 85 | Args: 86 | objective: what you are trying to do. 87 | prompt: the prompt you are using. Could be a summary thereof, too. We do not actively use this prompt in 88 | generating data for evaluation. 89 | models: an array of two models. These can be referenced later if need be, but are not necessary for running 90 | the evaluation workflow. 91 | 92 | """ 93 | self.models = models 94 | self.objective = objective 95 | self.prompt = prompt 96 | self.objective = objective 97 | self.evaluator = HumanEvaluatorCommandLine() 98 | self.prefs = [0] * len(models) # This will be a simple counter for now. 99 | 100 | def __repr__(self): 101 | return f"" 102 | 103 | def evaluate(self, response1, response2): 104 | """ 105 | Shows both sets of options for review and tracks the result. 106 | """ 107 | pref = self.evaluator.choose(self.objective, self.prompt, response1, response2) 108 | self.prefs[pref - 1] += 1 109 | 110 | 111 | class HumanEvaluatorCommandLine(): 112 | 113 | def __init__(self): 114 | """ 115 | Presents an objective, prompt, and two potential responses and has a human choose between the two. 116 | """ 117 | pass 118 | 119 | def __repr__(self): 120 | return "" 121 | 122 | def choose(self, objective, prompt, response1, response2): 123 | response_map = {"A": 1, "B": 2} 124 | response_a = response1 125 | response_b = response2 126 | if random.random() <= 0.5: 127 | response_map = {"A": 2, "B": 1} 128 | response_a = response2 129 | response_b = response1 130 | 131 | output_string = f"""OBJECTIVE: {objective} 132 | 133 | PROMPT: {prompt} 134 | 135 | -------------------- 136 | RESPONSE 'A': 137 | {response_a} 138 | 139 | -------------------- 140 | RESPONSE 'B': 141 | {response_b} 142 | 143 | -------------------- 144 | """ 145 | 146 | print(output_string) 147 | user_input = "" 148 | user_input = input() 149 | if user_input not in ["A", "B"]: 150 | print("Please put in 'A' or 'B' to tell us which is the better response.") 151 | user_input = input() 152 | 153 | return response_map[user_input] 154 | 155 | 156 | class GPTEvaluator: 157 | 158 | def __init__(self, apikey, model="gpt-3.5-turbo"): 159 | """ 160 | Passes two model outputs to GPT-3.5 or GPT-4 and has it decide which is the better output. 161 | 162 | Args: 163 | apikey: the OpenAI API key. 164 | model: the model to use. Defaults to GPT-3.5 Turbo. 165 | """ 166 | self.model = OpenAIGPTWrapper(apikey, model=model) 167 | 168 | def __repr__(self): 169 | return f"GPT35Evaluator()" 170 | 171 | def choose(self, objective, prompt, response1, response2): 172 | """ 173 | Presents the objective of the evaluation task, a prompt, and then two responses. GPT-3.5/GPT-4 chooses the 174 | preference. 175 | Args: 176 | objective: the objective of the modeling task. 177 | prompt: the prompt to use. 178 | response1: the first response. 179 | response2: the second response. 180 | 181 | Returns: 182 | 1 if response1 is preferred, 2 if response2 is preferred. 183 | 184 | """ 185 | 186 | response_map = {"A": 1, "B": 2} 187 | response_a = response1 188 | response_b = response2 189 | if random.random() <= 0.5: 190 | response_map = {"A": 2, "B": 1} 191 | response_a = response2 192 | response_b = response1 193 | 194 | prompt = f"""We would like your feedback on a large language model we are building. Specifically, we would like you to compare two different LLM responses and let us know which one is better. 195 | 196 | Our objective for the LLM is: 197 | {objective} 198 | 199 | The prompt we are using for the LLM is: 200 | {prompt} 201 | 202 | Here are the two pieces of generated text. 203 | 204 | A: `{response_a}` 205 | 206 | B: `{response_b}` 207 | 208 | Please simply respond 'A' or 'B' as to which of the texts above address our earlier objective more effectively. Do not add any additional explanations, thoughts, punctuation, or anything; simply write 'A' or 'B'.""" 209 | 210 | messages = [ 211 | {"role": "system", 212 | "content": "You are an AI assistant helping with prompt engineering and model evaluation."}, 213 | {"role": "user", "content": prompt}, 214 | ] 215 | 216 | response = self.model.complete_chat(messages, ['\n']) 217 | 218 | # ChatGPT has a knack for adding "." to the end of the reply. 219 | if len(response) == 2: 220 | response = response[0] 221 | 222 | choice = response_map[response] 223 | 224 | return choice 225 | -------------------------------------------------------------------------------- /phasellm/exceptions.py: -------------------------------------------------------------------------------- 1 | """ 2 | Exception classes and tests for prompts, LLMs, and workflows. 3 | """ 4 | 5 | from typing import List 6 | 7 | from phasellm.llms import ChatPrompt 8 | 9 | 10 | def isAcceptableLLMResponse(response_given, acceptable_options) -> bool: 11 | """ 12 | Tests to confirm the response_given is in the list of acceptable_options. acceptable_options can also be a single 13 | string. 14 | 15 | Args: 16 | response_given: The response given by the LLM. 17 | acceptable_options: The acceptable options. 18 | 19 | Returns: 20 | True if the response is 'acceptable', otherwise throws an LLMResponseException. 21 | """ 22 | 23 | compare_to = None 24 | if isinstance(acceptable_options, str): 25 | compare_to = [acceptable_options] 26 | elif isinstance(acceptable_options, list): 27 | compare_to = acceptable_options 28 | 29 | if compare_to is None: 30 | raise Exception("testLLMResponse() only accepts a list or string object for acceptable_options.") 31 | 32 | if response_given not in acceptable_options: 33 | raise LLMResponseException(response_given, compare_to) 34 | 35 | return True 36 | 37 | 38 | def isLLMCodeExecutable(llm_code: str) -> bool: 39 | """ 40 | Runs code and checks if any errors occur. Returns True if there are no errors. 41 | 42 | Args: 43 | llm_code: The code to run. 44 | 45 | Returns: 46 | True if the code is executable, otherwise throws an LLMCodeException. 47 | 48 | """ 49 | try: 50 | exec(llm_code) 51 | except Exception as e: 52 | raise LLMCodeException(llm_code, e) 53 | 54 | return True 55 | 56 | 57 | def isProperlyStructuredChat(messages, force_roles=False) -> bool: 58 | """ 59 | Checks if messages are an array of dicts with (role, content) keys. 60 | 61 | force_roles=True also confirms we only have roles of "system", "user", and "assistant" to abide by OpenAI's API. 62 | 63 | Args: 64 | messages: The messages to check. 65 | force_roles: If True, checks that the roles are "system", "user", and "assistant". 66 | 67 | Returns: 68 | True if the messages are properly structured, otherwise False. 69 | 70 | """ 71 | 72 | for m in messages: 73 | keys = m.keys() 74 | if not (len(keys) == 2 and "role" in keys and "content" in keys): 75 | return False 76 | if force_roles: 77 | role = m["role"] 78 | if role not in ["system", "user", "assistant"]: 79 | return False 80 | return True 81 | 82 | 83 | def reviewOutputWithLLM(text, requirements, llm): 84 | """ 85 | Has an LLM review an output and determines whether the output is OK or not. 86 | Args: 87 | text: The text to review. 88 | requirements: The requirements to review against. 89 | llm: The LLM to use for the review. 90 | 91 | Returns: 92 | True if the text meets the requirements, otherwise throws an LLMReviewException. 93 | 94 | """ 95 | prompt = ChatPrompt( 96 | [{"role": "system", 97 | "content": "Follow the user's instructions exactly, and only respond with YES or NO (with additional info)."}, 98 | {"role": "user", 99 | "content": "I'm working with a large language model and hope you can confirm if the following text abides by " 100 | "a set of requirements I've provided. Here is the text:\n-----\n{output}\n-----\n\nBelow are the " 101 | "requirements the text above is supposed to meet.\n\n-----\n{requirements}\n-----\n\nDoes the " 102 | "text meet the requirements? Please only answer YES or NO. If NO, you can provide additional " 103 | "information on what the text is missing."} 104 | ]) 105 | 106 | result = llm.complete_chat(prompt.fill(text=text, requirements=requirements)) 107 | if result == "YES": 108 | return True 109 | else: 110 | raise LLMReviewException(result) 111 | 112 | 113 | class LLMReviewException(Exception): 114 | 115 | def __init__(self, message): 116 | """ 117 | Exception that gets thrown when an LLM review does not meet requirements. 118 | 119 | Args: 120 | message: The error message 121 | 122 | """ 123 | super().__init__("LLM Review Exception: text does not meet requirements.\nInfo: " + message) 124 | self.message = message 125 | 126 | def __repr__(self): 127 | return "LLM Review Exception: text does not meet requirements.\nInfo: " + self.message 128 | 129 | 130 | class ChatStructureException(Exception): 131 | 132 | def __init__(self): 133 | """ 134 | Exception that gets thrown when a chat structure isn't correct (i.e., role, content pairs are not pairs) 135 | """ 136 | super().__init__("Chat Structure Exception: chat messages are not following the proper chat structure.") 137 | 138 | def __repr__(self): 139 | return f"Chat Structure Exception: chat messages are not following the proper chat structure." 140 | 141 | 142 | class LLMCodeException(Exception): 143 | 144 | def __init__(self, code, exc): 145 | """ 146 | Exception to track exceptions from code generated by LLMs. 147 | 148 | Args: 149 | code: The code that is raising an error. 150 | exc: The exception that is being raised. 151 | 152 | """ 153 | super().__init__("LLM Code Exception: code is raising an error.") 154 | self.code = code 155 | self.exception = exc 156 | self.exception_string = str(exc) 157 | 158 | def __repr__(self): 159 | return f"LLM Code Exception: code is raising an error." 160 | 161 | 162 | class LLMResponseException(Exception): 163 | 164 | def __init__(self, response_given: str, acceptable_options: List[str]): 165 | """ 166 | Exception to track acceptable responses from an LLM. 167 | 168 | Args: 169 | response_given: The response given by the LLM. 170 | acceptable_options: The acceptable options for the LLM. 171 | 172 | """ 173 | super().__init__("LLM Response Exception: response given is not in the list of acceptable options.") 174 | self.response_given = response_given 175 | self.acceptable_options = acceptable_options 176 | 177 | def __repr__(self): 178 | return f"LLM Response Exception: response given is not in the list of acceptable options." 179 | -------------------------------------------------------------------------------- /phasellm/html.py: -------------------------------------------------------------------------------- 1 | """ 2 | Support for convering LLM-related classes and objects to HTML and various outputs. 3 | """ 4 | 5 | import re 6 | 7 | # Easier to have this variable than to escape all the "{" and "}" later. 8 | style = """ 9 | .phasellm_chatbot_stream { 10 | margin:5px; 11 | box-sizing:content-box; 12 | padding:8px; 13 | border-radius:8px; 14 | border:1px solid black; 15 | display:inline-block; 16 | } 17 | .phasellm_chatbot_stream .response_container { 18 | display:block; 19 | display:block; 20 | margin:5px; 21 | padding:8px; 22 | } 23 | 24 | .content_user { 25 | background-color:green; 26 | color:white; 27 | } 28 | 29 | .content_system { 30 | color:gray; 31 | background-color:lightgray; 32 | font-style:italic; 33 | } 34 | 35 | .content_assistant { 36 | color:white; 37 | background-color:crimson; 38 | } 39 | 40 | .response { 41 | padding:8px; 42 | border-radius:8px; 43 | } 44 | 45 | .phasellm_chatbot_stream .timestamp { 46 | margin:5px 5px 5px 15px; 47 | font-size:70%; 48 | color:gray; 49 | font-style:italic; 50 | display:inline-block; 51 | } 52 | 53 | .phasellm_chatbot_stream .time_taken { 54 | margin:5px 5px 5px 15px; 55 | font-size:70%; 56 | color:gray; 57 | font-style:italic; 58 | display:inline-block; 59 | } 60 | 61 | .legend { 62 | font-size:70%; 63 | text-align:right; 64 | padding-right:15px; 65 | } 66 | 67 | .legend_box { 68 | width:10px; 69 | height:10px; 70 | display:inline-block; 71 | position:relative; 72 | top:2px; 73 | margin-left:8px; 74 | margin-right:2px; 75 | } 76 | """ 77 | 78 | 79 | def _formatContentToHtml(string) -> str: 80 | """ 81 | Converts a String into an HTML-friendly representation. 82 | 83 | Args: 84 | string: The string to convert. 85 | 86 | Returns: 87 | The HTML formatted string. 88 | 89 | """ 90 | new_string = re.sub("<", "<", string) 91 | new_string = re.sub(">", ">", new_string) 92 | new_string = re.sub("[\r\n]+", "
", new_string) 93 | return new_string 94 | 95 | 96 | def toHtmlFile(html, filepath) -> None: 97 | """ 98 | Takes a html object generated by PhaseLLM and saves it to an HTML file. 99 | 100 | Args: 101 | html: The HTML object to save. 102 | filepath: The path to save the HTML file to. 103 | 104 | """ 105 | 106 | html_content = f""" 107 | 108 | 109 | 110 | 113 | 114 | 115 | {html} 116 | 117 | 118 | """ 119 | with open(filepath, "w") as w: 120 | w.write(html_content) 121 | 122 | 123 | def chatbotToJson(chatbot, order_field=None) -> str: 124 | """ 125 | Converts a chatbot's message stack to a JSON array. Optionally, add an order_field key to save the order of the array itself. 126 | 127 | Args: 128 | chatbot: The ChatBot object whose message stack we want. 129 | order_field: Optional key to include the array order value into the dictionary. 130 | 131 | Returns: 132 | The JSON dictionary representing the mesages from the chatbot object. 133 | """ 134 | 135 | messages = chatbot.messages 136 | json_to_return = [] 137 | ctr = 0 138 | 139 | for m in messages: 140 | new_m = m.copy() 141 | if order_field is not None: 142 | new_m[order_field] = ctr 143 | ctr += 1 144 | json_to_return.append(new_m) 145 | 146 | return json_to_return 147 | 148 | 149 | def chatbotToHtml(chatbot) -> str: 150 | """ 151 | Converts a chatbot's message stack to HTML. 152 | 153 | Args: 154 | chatbot: The chatbot to convert. 155 | 156 | Returns: 157 | The HTML representation of the chatbot message stack. 158 | 159 | """ 160 | 161 | chatbot_html = """
162 |
163 | Legend
 
System
 
Assistant
 
User 164 |
""" 165 | 166 | messages = chatbot.messages 167 | for m in messages: 168 | m_timestamp = "" 169 | if "timestamp_utc" in m: 170 | m_timestamp = m["timestamp_utc"].strftime("%d %B %Y at %H:%M:%S") 171 | 172 | m_log_time_seconds_string = "" 173 | if "log_time_seconds" in m: 174 | m_log_time_seconds_string = f"""
({str(round(m['log_time_seconds'], 3))} seconds)
""" 175 | 176 | response_html = f""" 177 |
178 |
{_formatContentToHtml(m['content'])}
179 |
{m_timestamp}
180 | {m_log_time_seconds_string} 181 |
182 | """ 183 | 184 | chatbot_html += response_html 185 | 186 | chatbot_html += "\n
" 187 | 188 | return chatbot_html 189 | -------------------------------------------------------------------------------- /phasellm/llms_utils.py: -------------------------------------------------------------------------------- 1 | def extract_vertex_ai_kwargs(kwargs: dict) -> dict: 2 | """ 3 | Extracts the Vertex AI kwargs from the kwargs dictionary. 4 | Args: 5 | kwargs: The kwargs dictionary. 6 | 7 | Returns: 8 | The Vertex AI kwargs. 9 | 10 | """ 11 | 12 | return { 13 | 'max_output_tokens': kwargs['max_output_tokens'] if 'max_output_tokens' in kwargs else None, 14 | 'candidate_count': kwargs['candidate_count'] if 'candidate_count' in kwargs else None, 15 | 'top_p': kwargs['top_p'] if 'top_p' in kwargs else None, 16 | 'top_k': kwargs['top_k'] if 'top_k' in kwargs else None, 17 | 'logprobs': kwargs['logprobs'] if 'logprobs' in kwargs else None, 18 | 'presence_penalty': kwargs['presence_penalty'] if 'presence_penalty' in kwargs else None, 19 | 'frequency_penalty': kwargs['frequency_penalty'] if 'frequency_penalty' in kwargs else None, 20 | 'logit_bias': kwargs['logit_bias'] if 'logit_bias' in kwargs else None 21 | } 22 | 23 | 24 | def extract_vertex_ai_response_metadata(response) -> dict: 25 | last_response_header = {} 26 | if hasattr(response, '_raw_response'): 27 | last_response_header = { 28 | **last_response_header, 29 | **response._raw_response.PromptFeedback.to_dict(response._raw_response.prompt_feedback), 30 | **response._raw_response.UsageMetadata.to_dict(response._raw_response.usage_metadata) 31 | } 32 | if hasattr(response, '_prediction_response'): 33 | last_response_header = { 34 | **last_response_header, 35 | **response._prediction_response.metadata 36 | } 37 | if hasattr(response, 'safety_attributes'): 38 | last_response_header = { 39 | **last_response_header, 40 | **response.safety_attributes 41 | } 42 | return last_response_header 43 | -------------------------------------------------------------------------------- /phasellm/logging.py: -------------------------------------------------------------------------------- 1 | """ 2 | Logging support. This allows you to use the phasellm library to send chats to evals.phasellm.com and review them via our hosted front-end. 3 | """ 4 | 5 | import requests 6 | import json 7 | 8 | from typing import List, Optional 9 | 10 | from .llms import Message, ChatBot 11 | 12 | import hashlib 13 | import os 14 | 15 | _PHASELLM_EVALS_BASE_URL = "https://evals.phasellm.com/api/v0.1" 16 | 17 | 18 | class FileLogger: 19 | """ 20 | This logger will save chats to disk. It will export chats to a flat TXT file. 21 | """ 22 | 23 | def __init__(self, folder_path: str, separator: str = "\n\n-----------------\n\n"): 24 | """ 25 | Args: 26 | folder_path: The path to the folder where the logs will be saved. 27 | separator: The separator between messages in the log file. 28 | """ 29 | self.folder_path = folder_path 30 | if not os.path.exists(folder_path): 31 | os.makedirs(folder_path) 32 | self.separator = separator 33 | 34 | def log( 35 | self, 36 | messages: List[Message], 37 | chat_id: Optional[int] = None, 38 | title: Optional[str] = None, 39 | source_id: Optional[str] = None, 40 | file_name: str = None, 41 | ) -> str: 42 | """ 43 | Saves or updates the relevant chat to a folder. 44 | 45 | Args: 46 | messages: The messages array from the chat. 47 | chat_id: Optional chat ID. If you provide a chat ID from an earlier log event, the messages will overwrite the original chat. This should be used for updating conversations rather than replacing them. 48 | title: Optional title for the chat. 49 | source_id: Optional String representing an ID for the chat. This is to enable easier referencing of chats for end users and is not used by PhaseLLM Evals. 50 | file_name: Optional String for what to call the file. Otherwise will use chat_id. If chat_id is not given, then will use an MD5 sum of the content. 51 | 52 | Returns: 53 | The chat_id associated with the chat. 54 | """ 55 | 56 | file_content = "" 57 | for ctr, m in enumerate(messages): 58 | file_content += f"ROLE: {m['role']}\n{m['content']}\n\n" 59 | if ctr < len(messages) - 1: 60 | file_content += self.separator 61 | 62 | if file_name is None: 63 | if chat_id is not None: 64 | file_name = chat_id + ".txt" 65 | else: 66 | chat_id = hashlib.md5(file_content.encode()).hexdigest() 67 | file_name = chat_id + ".txt" 68 | 69 | with open(self.folder_path + "/" + file_name, "w") as f: 70 | f.write(file_content) 71 | 72 | return chat_id 73 | 74 | def logChatBot( 75 | self, 76 | chatbot: ChatBot, 77 | chat_id: Optional[int] = None, 78 | title: Optional[str] = None, 79 | source_id: Optional[str] = None, 80 | file_name: str = None, 81 | ) -> str: 82 | """ 83 | Logs the message stack for a chatbot to a folder. 84 | 85 | Args: 86 | chatbot: The chatbot object to log. 87 | chat_id: Optional chat ID. If you provide a chat ID from an earlier log event, the messages will overwrite the original chat. This should be used for updating conversations rather than replacing them. 88 | title: Optional title for the chat. 89 | source_id: Optional String representing an ID for the chat. This is to enable easier referencing of chats for end users and is not used by PhaseLLM Evals. 90 | file_name: Optional String for what to call the file. Otherwise will use chat_id. If chat_id is not given, then will use an MD5 sum of the content. 91 | 92 | Returns: 93 | The chat_id associated with the chat. 94 | """ 95 | 96 | message_array = [] 97 | for m in chatbot.messages: 98 | new_m = {"role": m["role"], "content": m["content"]} 99 | message_array.append(new_m) 100 | return self.log(message_array, chat_id, title, source_id, file_name) 101 | 102 | 103 | class PhaseLogger: 104 | 105 | def __init__( 106 | self, 107 | apikey: str, 108 | ): 109 | """ 110 | Helper class for logging chats to evals.phasellm.com. 111 | 112 | Args: 113 | apikey: The API key associated with your evals.phasellm.com account. 114 | """ 115 | super().__init__() 116 | self.apikey = apikey 117 | 118 | def log( 119 | self, 120 | messages: List[Message], 121 | chat_id: Optional[int] = None, 122 | title: Optional[str] = None, 123 | source_id: Optional[str] = None, 124 | ) -> int: 125 | """ 126 | Saves or updates the relevant chat at evals.phasellm.com 127 | 128 | Args: 129 | messages: The messages array from the chat. 130 | chat_id: Optional chat ID. If you provide a chat ID from an earlier log event, the messages will overwrite the original chat. This should be used for updating conversations rather than replacing them. 131 | title: Optional title for the chat. 132 | source_id: Optional String representing an ID for the chat. This is to enable easier referencing of chats for end users and is not used by PhaseLLM Evals. 133 | 134 | Returns: 135 | The chat_id associated with the chat. 136 | """ 137 | 138 | save_url = _PHASELLM_EVALS_BASE_URL + "/save_chat" 139 | headers = { 140 | "Authorization": f"Bearer {self.apikey}", 141 | "Content-Type": "application/json", 142 | } 143 | payload = {"messages": messages} 144 | if chat_id is not None: 145 | payload["chat_id"] = chat_id 146 | 147 | if title is not None: 148 | payload["title"] = title 149 | 150 | if source_id is not None: 151 | payload["source_id"] = source_id 152 | 153 | response = requests.post(save_url, json=payload, headers=headers) 154 | data = json.loads(response.text) 155 | if data["status"] == "error": 156 | raise Exception(f"PhaseLLM Evals: an error occured. {data['message']}") 157 | 158 | return data["chat_id"] 159 | 160 | def logChatBot( 161 | self, 162 | chatbot: ChatBot, 163 | chat_id: Optional[int] = None, 164 | title: Optional[str] = None, 165 | source_id: Optional[str] = None, 166 | ) -> int: 167 | """ 168 | Logs the message stack for a chatbot to evals.phasellm.com. 169 | 170 | Args: 171 | chatbot: The chatbot object to log. 172 | chat_id: Optional chat ID. If you provide a chat ID from an earlier log event, the messages will overwrite the original chat. This should be used for updating conversations rather than replacing them. 173 | title: Optional title for the chat. 174 | source_id: Optional String representing an ID for the chat. This is to enable easier referencing of chats for end users and is not used by PhaseLLM Evals. 175 | 176 | Returns: 177 | The chat_id associated with the chat. 178 | """ 179 | message_array = [] 180 | for m in chatbot.messages: 181 | new_m = {"role": m["role"], "content": m["content"]} 182 | message_array.append(new_m) 183 | return self.log(message_array, chat_id, title, source_id) 184 | -------------------------------------------------------------------------------- /phasellm/types.py: -------------------------------------------------------------------------------- 1 | from phasellm.configurations import OpenAIConfiguration, AzureAPIConfiguration, AzureActiveDirectoryConfiguration, \ 2 | VertexAIConfiguration 3 | 4 | from typing import Union, Literal 5 | 6 | CLAUDE_MODEL = Union[ 7 | str, 8 | Literal["claude-v1"], 9 | Literal["claude-instant-1"], 10 | Literal["claude-instant-1.1"], 11 | Literal["claude-2"], 12 | Literal["claude-2.0"], 13 | ] 14 | 15 | OPENAI_API_CONFIG = Union[ 16 | OpenAIConfiguration, 17 | AzureAPIConfiguration, 18 | AzureActiveDirectoryConfiguration 19 | ] 20 | 21 | VERTEXAI_API_CONFIG = VertexAIConfiguration 22 | -------------------------------------------------------------------------------- /project_metadata.py: -------------------------------------------------------------------------------- 1 | NAME = "phasellm" 2 | 3 | AUTHOR = "Wojciech Gryc" 4 | 5 | VERSION = "0.0.25" 6 | 7 | DESCRIPTION = ( 8 | "Wrappers for common large language models (LLMs) with support for evaluation." 9 | ) 10 | 11 | LONG_DESCRIPTION = ( 12 | "PhaseLLM provides wrappers for common large language models and use cases. This makes it easy to " 13 | "swap models in and out as needed. We also provide support for evaluation of models so you can " 14 | "choose which models are better to use." 15 | ) 16 | -------------------------------------------------------------------------------- /readthedocs.yaml: -------------------------------------------------------------------------------- 1 | version: 2 2 | 3 | build: 4 | os: "ubuntu-20.04" 5 | tools: 6 | python: "3.8" 7 | 8 | sphinx: 9 | configuration: docs/source/conf.py 10 | 11 | python: 12 | install: 13 | - method: pip 14 | path: . 15 | extra_requirements: 16 | - docs -------------------------------------------------------------------------------- /release_checklist.md: -------------------------------------------------------------------------------- 1 | # Release Checklist 2 | 3 | This checklist is used prior to a new release, to ensure everything works properly and that we have a high quality release. 4 | 5 | - [ ] Update version # 6 | - [ ] Do a local package install, ensuring all tests run properly 7 | - [ ] Publish final version to PyPI 8 | - [ ] Publish release in GitHub 9 | - [ ] Tweet about it 10 | - [ ] Update *Change Log* on site 11 | -------------------------------------------------------------------------------- /requirements-dev.txt: -------------------------------------------------------------------------------- 1 | twine>=4.0.2 2 | wheel>=0.41.3 -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | Flask>=2.0.0 2 | requests>=2.24.0 3 | httpx>=0.25.0 4 | openai>=1.1.0 5 | cohere>=4.0.0 6 | transformers>=4.25.0 7 | accelerate>=0.16.0 8 | torch>=1.0.0 9 | python-dotenv 10 | typing-extensions>=4.6.3 11 | urllib3==2.0.7 12 | sseclient-py>=1.7.2 13 | docker>=6.1.3 14 | pandas>=2.0.0 15 | openpyxl>=3.1.0 16 | beautifulsoup4>=4.12.2 17 | lxml>=4.9.2 18 | fake-useragent>=1.2.1 19 | playwright>=1.35.0 20 | feedparser>=6.0.10 21 | azure-identity>=1.14.0 22 | replicate==0.20.0 23 | google-cloud-aiplatform>=1.42.1 24 | anthropic>=0.30.1 -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup, find_packages 2 | 3 | from project_metadata import NAME, VERSION, AUTHOR, DESCRIPTION, LONG_DESCRIPTION 4 | 5 | setup( 6 | name=NAME, 7 | version=VERSION, 8 | description=DESCRIPTION, 9 | long_description=LONG_DESCRIPTION, 10 | author=AUTHOR, 11 | author_email="hello@phaseai.com", 12 | license="MIT", 13 | packages=find_packages(), 14 | install_requires=[ 15 | "Flask>=2.0.0", 16 | "requests>=2.24.0", 17 | "httpx>=0.25.0", 18 | "openai>=1.1.0", 19 | "cohere>=4.0.0", 20 | "python-dotenv", 21 | "pandas>=2.0.0", 22 | "openpyxl>=3.1.0", 23 | "typing-extensions>=4.6.3", 24 | "urllib3==2.0.7", 25 | "sseclient-py>=1.7.2", 26 | "docker>=6.1.3", 27 | "beautifulsoup4>=4.12.2", 28 | "lxml>=4.9.2", 29 | "fake-useragent>=1.2.1", 30 | "playwright>=1.35.0", 31 | "feedparser>=6.0.10", 32 | "azure-identity>=1.14.0", 33 | "replicate==0.20.0", 34 | "google-cloud-aiplatform>=1.42.1", 35 | "anthropic>=0.30.1", 36 | ], 37 | extras_require={ 38 | "complete": [ 39 | "transformers>=4.25.0", 40 | "accelerate>=0.16.0", 41 | "torch>=1.0.0", 42 | ], 43 | "docs": [ 44 | "furo", 45 | "sphinx>=7.1.2", 46 | "myst_parser>=2.0.0", 47 | "sphinx-autoapi>=2.1.1", 48 | "sphinx-autobuild>=2021.3.14", 49 | ], 50 | }, 51 | python_requires=">=3.8.0", 52 | keywords="llm, nlp, evaluation, ai", 53 | classifiers=[ 54 | "Development Status :: 3 - Alpha", 55 | "Intended Audience :: Developers", 56 | "License :: OSI Approved :: MIT License", 57 | "Programming Language :: Python :: 3", 58 | ], 59 | ) 60 | -------------------------------------------------------------------------------- /tests-non-deterministic/README.md: -------------------------------------------------------------------------------- 1 | ### Non Deterministic Tests 2 | 3 | These tests are non-deterministic in nature, so they should only be run and reviewed by a human. 4 | 5 | **Do not include these tests in an automated CI pipeline, or you may experience transient 6 | failures** 7 | 8 | Note: we may be able to integrate these into CI if we set them up with retries and acceptable 9 | success rate thresholds. -------------------------------------------------------------------------------- /tests-non-deterministic/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wgryc/phasellm/974d026dc649e4a71da4c25bf8c934622e56cf5d/tests-non-deterministic/__init__.py -------------------------------------------------------------------------------- /tests-non-deterministic/llms/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wgryc/phasellm/974d026dc649e4a71da4c25bf8c934622e56cf5d/tests-non-deterministic/llms/__init__.py -------------------------------------------------------------------------------- /tests-non-deterministic/llms/test_llms.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | from unittest import TestCase 4 | 5 | from dotenv import load_dotenv 6 | 7 | from phasellm.llms import OpenAIGPTWrapper, ChatBot 8 | 9 | load_dotenv() 10 | openai_api_key = os.getenv("OPENAI_API_KEY") 11 | 12 | 13 | class TestChatBot(TestCase): 14 | 15 | def test_openai_gpt_chat_temperature(self): 16 | prompt = 'What is the capital of Jupiter?' 17 | verbose = True 18 | 19 | # Test low temperature 20 | llm = OpenAIGPTWrapper(openai_api_key, "gpt-3.5-turbo", temperature=0) 21 | fixture = ChatBot(llm) 22 | low_temp_res = fixture.chat(prompt) 23 | 24 | # Test high temperature 25 | llm = OpenAIGPTWrapper(openai_api_key, "gpt-3.5-turbo", temperature=2) 26 | fixture = ChatBot(llm) 27 | high_temp_res = fixture.chat(prompt) 28 | 29 | if verbose: 30 | print(f'Low temp response:\n{low_temp_res}') 31 | print(f'Low temperature len: {len(low_temp_res)}') 32 | 33 | print(f'High temp response:\n{high_temp_res}') 34 | print(f'High temperature len: {len(high_temp_res)}') 35 | 36 | # Responses should differ. 37 | self.assertNotEqual(low_temp_res, high_temp_res) 38 | 39 | # High temperature should generally produce longer responses. 40 | self.assertTrue(len(low_temp_res) < len(high_temp_res)) 41 | -------------------------------------------------------------------------------- /tests/README.MD: -------------------------------------------------------------------------------- 1 | # Tests 2 | 3 | ### Structure 4 | 5 | #### Release Checklist 6 | 7 | `release_checklist_code.py` contains manual tests for the release checklist. 8 | 9 | #### E2E Tests 10 | 11 | E2E style tests are used when network communication is required. 12 | 13 | To run the E2E tests, run `python -m unittest discover tests/e2e`. 14 | 15 | ##### NOTE 16 | 17 | You may want to disable testing local models due to resource constraints. If so, set the environment variable 18 | `SKIP_LOCAL_MODELS` to `True`. 19 | 20 | #### Unit Tests 21 | 22 | Unit tests are for testing individual functions and when network communication is not required. 23 | 24 | Can mock network communication if necessary. 25 | 26 | To run the unit tests, run `python -m unittest discover tests/unit`. 27 | 28 | #### Running tests on a VM 29 | 30 | We use a Paperspace VM to run PhaseLLM tests. 31 | 32 | ##### VM Requirements 33 | 34 | - GPU with >= 30GM VRAM 35 | - 100GB disk (for model weights + packages + docker) 36 | 37 | ##### Connecting to VM 38 | 39 | You need to create + add an SSH key to the VM. 40 | 1) Create a SSH key if you haven’t already. 41 | https://docs.github.com/en/authentication/connecting-to-github-with-ssh/generating-a-new-ssh-key-and-adding-it-to-the-ssh-agent 42 | 2) Copy it into paperspace 43 | https://console.paperspace.com/account/settings/ssh-keys 44 | 45 | ##### Initial VM Setup (Already Done) 46 | 47 | Install Docker 48 | https://docs.docker.com/engine/install/ubuntu/ 49 | 50 | Get base python image. 51 | ``` 52 | docker pull python:3 53 | ``` 54 | 55 | Create a code directory in the VM. 56 | ``` 57 | mkdir code 58 | ``` 59 | 60 | ##### New Tester Setup 61 | 62 | Create code folder for repos + forks. Currently, there is a ‘garett’ folder for holding my repo fork. You may want to 63 | add a ‘wojciech’ folder for the master repo. 64 | ``` 65 | cd code 66 | mkdir myname 67 | ``` 68 | 69 | Clone repo into your folder. Make sure you clone using https, otherwise you need to add an SSH key to the VM to access 70 | the repo. 71 | ``` 72 | cd myname 73 | git clone https://github.com/... 74 | cd phasellm 75 | ``` 76 | 77 | Create a virtual environment in the cloned repository. 78 | ``` 79 | python -m venv .env 80 | ``` 81 | 82 | ##### Test Specific Setup 83 | 84 | Ensure you are on the branch you want to test. 85 | ``` 86 | cd code/myname/phasellm 87 | git fetch –all 88 | git checkout -- track origin/your-branch 89 | ``` 90 | 91 | Create a testing bash script if you don’t already have one. Example below. 92 | ``` 93 | #!/bin/bash 94 | export ANTHROPIC_API_KEY=key 95 | export COHERE_API_KEY=key 96 | export HUGGING_FACE_API_KEY=key 97 | export OPENAI_API_KEY=key 98 | 99 | cd phasellm 100 | . .env/bin/activate 101 | pip install -r requirements.txt 102 | nohup python -m unittest discover -s tests -v > ../test.log & 103 | cd .. 104 | ``` 105 | 106 | Run the bootstrap_tests.sh 107 | ``` 108 | sudo /bin/sh bootstrap_tests.sh 109 | ``` 110 | 111 | The tests run as a background process, so once you see the nohup message, uou can escape the script and follow the 112 | output. 113 | `ctrl + c (or command + c on mac)` 114 | ``` 115 | tail -f test.log 116 | ``` 117 | 118 | ##### Useful Commands 119 | Monitor the process resources and find process IDs 120 | ``` 121 | top 122 | ``` 123 | 124 | Kill a process 125 | ``` 126 | sudo kill process_id 127 | ``` 128 | 129 | Check why process was killed 130 | ``` 131 | dmesg | less 132 | ``` 133 | 134 | Check Nvidia GPU usage 135 | ``` 136 | nvidia-smi 137 | ``` 138 | 139 | Shut down VM 140 | ``` 141 | sudo shutdown now 142 | ``` 143 | 144 | ##### Gotchas 145 | 146 | The Paperspace VM turns off on its own after 1 hour, so watch out for that. -------------------------------------------------------------------------------- /tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wgryc/phasellm/974d026dc649e4a71da4c25bf8c934622e56cf5d/tests/__init__.py -------------------------------------------------------------------------------- /tests/e2e/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wgryc/phasellm/974d026dc649e4a71da4c25bf8c934622e56cf5d/tests/e2e/__init__.py -------------------------------------------------------------------------------- /tests/e2e/agents/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wgryc/phasellm/974d026dc649e4a71da4c25bf8c934622e56cf5d/tests/e2e/agents/__init__.py -------------------------------------------------------------------------------- /tests/e2e/llms/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wgryc/phasellm/974d026dc649e4a71da4c25bf8c934622e56cf5d/tests/e2e/llms/__init__.py -------------------------------------------------------------------------------- /tests/e2e/sse/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wgryc/phasellm/974d026dc649e4a71da4c25bf8c934622e56cf5d/tests/e2e/sse/__init__.py -------------------------------------------------------------------------------- /tests/e2e/sse/test_e2e_sse.py: -------------------------------------------------------------------------------- 1 | import os 2 | import re 3 | import unittest 4 | import requests 5 | import sseclient 6 | 7 | from typing import Generator 8 | 9 | from unittest import TestCase 10 | 11 | from dotenv import load_dotenv 12 | 13 | from flask import Flask, Response 14 | 15 | from multiprocessing import Process 16 | 17 | from phasellm.llms import StreamingOpenAIGPTWrapper, _format_sse 18 | 19 | load_dotenv() 20 | openai_api_key = os.getenv("OPENAI_API_KEY") 21 | 22 | 23 | def mock_generator_failure() -> Generator: 24 | """ 25 | Mock generator for a failure modes of sse streaming. 26 | 27 | Desired output on client side is: 28 | ''' 29 | 123 30 | 31 | 456 32 | 78 33 | 34 | 35 | 9 36 | 10 37 | id: 1 38 | id: 2 39 | event: test 40 | ''' 41 | Returns: 42 | 43 | """ 44 | yield "data: 1\n\n" 45 | yield "data: 2\n\n" 46 | yield "data: 3\n\n4\n\n" 47 | yield "data: 5\n\n" 48 | yield "data: 6\n\n\n" 49 | yield "data: 7\n\n" 50 | yield "data: 8\n\n\n\n\n" 51 | yield "data: 9\n\n\n" 52 | yield "data: 10\nid: 1\n\n\n" 53 | yield "data: id: 2\n\n\n" 54 | yield "data: event: test\n\n\n" 55 | yield "data: <|END|>\n\n" 56 | 57 | 58 | def mock_generator_success() -> Generator: 59 | """ 60 | Mock generator for a success mode of sse streaming. 61 | 62 | Desired output on client side is: 63 | ''' 64 | 123 65 | 66 | 456 67 | 78 68 | 69 | 70 | 9 71 | 10 72 | id: 1 73 | id: 2 74 | event: test 75 | ''' 76 | Returns: 77 | 78 | """ 79 | yield "data: 1\n\n" 80 | yield "data: 2\n\n" 81 | yield "data: 3\ndata:\ndata:4\n\n" 82 | yield "data: 5\n\n" 83 | yield "data: 6\ndata:\n\n" 84 | yield "data: 7\n\n" 85 | yield "data: 8\ndata:\ndata:\ndata:\n\n" 86 | yield "data: 9\ndata:\n\n" 87 | yield "data: 10\ndata:id: 1\ndata:\n\n" 88 | yield "data: id: 2\ndata:\n\n" 89 | yield "data: event: test\n\n" 90 | yield "data: <|END|>\n\n" 91 | 92 | 93 | def mock_generator_success_format_sse() -> Generator: 94 | """ 95 | Mock generator for a success mode of sse streaming. 96 | 97 | Desired output on client side is: 98 | ''' 99 | 123 100 | 101 | 456 102 | 78 103 | 104 | 105 | 9 106 | 10 107 | id: 1 108 | id: 2 109 | event: test 110 | ''' 111 | Returns: 112 | 113 | """ 114 | yield _format_sse("1") 115 | yield _format_sse("2") 116 | yield _format_sse("3\n\n4") 117 | yield _format_sse("5") 118 | yield _format_sse("6\n") 119 | yield _format_sse("7") 120 | yield _format_sse("8\n\n\n") 121 | yield _format_sse("9\n") 122 | yield _format_sse("10\nid: 1\n") 123 | yield _format_sse("id: 2\n") 124 | yield _format_sse("event: test") 125 | yield _format_sse("<|END|>") 126 | 127 | 128 | def server_mock(generator: Generator): 129 | """ 130 | SSE test server. 131 | Returns: 132 | 133 | """ 134 | app = Flask(__name__) 135 | 136 | @app.route('/stream') 137 | def stream(): 138 | return Response(generator, mimetype="text/event-stream") 139 | 140 | app.run(debug=False, port=5000, host='0.0.0.0') 141 | 142 | 143 | def process_stream() -> str: 144 | url = 'http://localhost:5000/stream' 145 | headers = {'Accept': 'text/event-stream'} 146 | 147 | res = requests.get(url, headers=headers, stream=True) 148 | client = sseclient.SSEClient(res) 149 | data = [] 150 | for event in client.events(): 151 | if event.data == "<|END|>": 152 | break 153 | else: 154 | data.append(event.data) 155 | client.close() 156 | res = ''.join(data) 157 | return res 158 | 159 | 160 | def server_success_mock(): 161 | print(''.join(mock_generator_success())) 162 | server_mock(mock_generator_success()) 163 | 164 | 165 | def server_failure_mock(): 166 | print(''.join(mock_generator_failure())) 167 | server_mock(mock_generator_failure()) 168 | 169 | 170 | def print_intercept_generator(generator: Generator) -> Generator: 171 | res = [] 172 | for item in generator: 173 | res.append(item) 174 | yield item 175 | print(''.join(res)) 176 | 177 | 178 | def server_llm(): 179 | llm = StreamingOpenAIGPTWrapper( 180 | apikey=openai_api_key, model='text-davinci-003', format_sse=True, append_stop_token=True 181 | ) 182 | generator: Generator = llm.text_completion( 183 | "List two countries with two new line characters between them. " 184 | "Example:\n" 185 | "USA\n\nCanada\n\n" 186 | ) 187 | 188 | # Line below is for debugging purposes. 189 | # generator: Generator = print_intercept_generator(generator) 190 | 191 | server_mock(generator) 192 | 193 | 194 | class TestSSE(TestCase): 195 | 196 | def test_sse_client_server_mock_success(self): 197 | """ 198 | Test SSE success mode using a mock generator. 199 | Returns: 200 | 201 | """ 202 | # Start test server 203 | process = Process(target=server_success_mock) 204 | process.start() 205 | 206 | res = process_stream() 207 | 208 | expected = ( 209 | "123\n" 210 | "\n" 211 | "456\n" 212 | "78\n" 213 | "\n" 214 | "\n" 215 | "9\n" 216 | "10\n" 217 | "id: 1\n" 218 | "id: 2\n" 219 | "event: test" 220 | ) 221 | self.assertEqual(res, expected) 222 | 223 | self.tearDown() 224 | 225 | process.terminate() 226 | process.join() 227 | 228 | def test_sse_client_server_mock_failure(self): 229 | """ 230 | Test SSE failure mode using a mock generator. 231 | Returns: 232 | 233 | """ 234 | # Start test server 235 | process = Process(target=server_failure_mock) 236 | process.start() 237 | 238 | res = process_stream() 239 | 240 | # Notice the missing 4. Notice the lack of '\n' and 'id: 1' 241 | expected = "1235678910id: 2event: test" 242 | self.assertEqual(res, expected) 243 | 244 | process.terminate() 245 | process.join() 246 | 247 | def test_sse_client_server_llm(self): 248 | """ 249 | Test SSE success mode using an LLM wrapper. 250 | Returns: 251 | 252 | """ 253 | # Start test server 254 | process = Process(target=server_llm) 255 | process.start() 256 | 257 | res = process_stream() 258 | 259 | print(repr(res)) 260 | 261 | matches = re.findall(r'\w+\n\n\w+', res) 262 | self.assertTrue(len(matches) > 0, "Expected a word followed by two newlines, followed by a word.") 263 | 264 | process.terminate() 265 | process.join() 266 | 267 | def test_success_generator_equality(self): 268 | """ 269 | Test equality of success generators. 270 | Returns: 271 | 272 | """ 273 | self.assertEqual(list(mock_generator_success()), list(mock_generator_success_format_sse())) 274 | 275 | 276 | if __name__ == '__main__': 277 | unittest.main() 278 | -------------------------------------------------------------------------------- /tests/release_checklist_code.py: -------------------------------------------------------------------------------- 1 | """ 2 | This code is used to test various aspects of PhaseLLM. We recommend running this on a P3 EC2 instance with Ubuntu 22.04 installed. To get this up and running, run the following code: 3 | 4 | sudo apt-get update 5 | sudo apt-get upgrade 6 | sudo apt-get install xorg 7 | sudo apt-get install nvidia-driver-460 8 | sudo reboot 9 | 10 | Run `nvidia-smi` to ensure you have GPU devices with CUDA installed. 11 | 12 | """ 13 | 14 | ########################################################################################## 15 | # GPU SETUP 16 | # 17 | 18 | import torch 19 | 20 | # Confirm GPUs are installed and usable. 21 | print(torch.cuda.is_available()) 22 | print(torch.cuda.current_device()) 23 | 24 | ########################################################################################## 25 | # ENVIRONMENT VARIABLES 26 | # 27 | 28 | # Load all environment variables and API keys 29 | 30 | import os 31 | from dotenv import load_dotenv 32 | 33 | load_dotenv() 34 | openai_api_key = os.getenv("OPENAI_API_KEY") 35 | anthropic_api_key = os.getenv("ANTHROPIC_API_KEY") 36 | cohere_api_key = os.getenv("COHERE_API_KEY") 37 | hugging_face_api_key = os.getenv("HUGGING_FACE_API_KEY") 38 | 39 | ########################################################################################## 40 | # GPT-3.5 EVALUATOR WITH COHERE AND CLAUDE COMPARISONS 41 | # 42 | 43 | # Run GPT-3.5 evaluator 44 | from phasellm.eval import GPTEvaluator 45 | 46 | # We'll use GPT-3.5 as the evaluator; this is the default setting in the class below 47 | e = GPTEvaluator(openai_api_key) 48 | 49 | # Our objective. 50 | objective = "We're building a chatbot to discuss a user's travel preferences and provide advice." 51 | 52 | # Chats that have been launched by users. 53 | travel_chat_starts = [ 54 | "I'm planning to visit Poland in spring.", 55 | "I'm looking for the cheapest flight to Europe next week.", 56 | "I am trying to decide between Prague and Paris for a 5-day trip", 57 | "I want to visit Europe but can't decide if spring, summer, or fall would be better.", 58 | "I'm unsure I should visit Spain by flying via the UK or via France." 59 | ] 60 | 61 | from phasellm.llms import CohereWrapper, ClaudeWrapper 62 | cohere_model = CohereWrapper(cohere_api_key) 63 | claude_model = ClaudeWrapper(anthropic_api_key) 64 | 65 | print("Running test. 1 = Cohere, and 2 = Claude.") 66 | for tcs in travel_chat_starts: 67 | messages = [{"role":"system", "content":objective}, 68 | {"role":"user", "content":tcs}] 69 | response_cohere = cohere_model.complete_chat(messages, "assistant") 70 | response_claude = claude_model.complete_chat(messages, "assistant") 71 | pref = e.choose(objective, tcs, response_cohere, response_claude) 72 | print(f"{pref}") 73 | 74 | ########################################################################################## 75 | # DOLLY TESTS 76 | # 77 | 78 | from phasellm.llms import DollyWrapper 79 | dw = DollyWrapper() 80 | 81 | # Testing chat capability. 82 | messages = [{"role":"user", "content":"What should I eat for lunch today?"}] 83 | dw.complete_chat(messages, 'assistant') 84 | 85 | # Run a text completion. 86 | dw.text_completion("The capital of Poland is") 87 | 88 | ########################################################################################## 89 | # GPT EVALUATOR WITH COHERE AND DOLLY COMPARISONS 90 | # 91 | 92 | import os 93 | from dotenv import load_dotenv 94 | 95 | load_dotenv() 96 | openai_api_key = os.getenv("OPENAI_API_KEY") 97 | anthropic_api_key = os.getenv("ANTHROPIC_API_KEY") 98 | cohere_api_key = os.getenv("COHERE_API_KEY") 99 | 100 | from phasellm.eval import GPTEvaluator 101 | 102 | # We'll use GPT-3.5 as the evaluator. 103 | e = GPTEvaluator(openai_api_key) 104 | 105 | # Our objective. 106 | objective = "We're building a chatbot to discuss a user's travel preferences and provide advice." 107 | 108 | # Chats that have been launched by users. 109 | travel_chat_starts = [ 110 | "I'm planning to visit Poland in spring.", 111 | "I'm looking for the cheapest flight to Europe next week.", 112 | "I am trying to decide between Prague and Paris for a 5-day trip", 113 | "I want to visit Europe but can't decide if spring, summer, or fall would be better.", 114 | "I'm unsure I should visit Spain by flying via the UK or via France." 115 | ] 116 | 117 | from phasellm.llms import CohereWrapper 118 | from phasellm.llms import DollyWrapper # NEW: importing the DollyWrapper... 119 | dw = DollyWrapper() # NEW: ... and instantiating it. 120 | 121 | cohere_model = CohereWrapper(cohere_api_key) 122 | 123 | print("Running test. 1 = Cohere, and 2 = Dolly.") 124 | for tcs in travel_chat_starts: 125 | messages = [{"role":"system", "content":objective}, 126 | {"role":"user", "content":tcs}] 127 | response_cohere = cohere_model.complete_chat(messages, "assistant") 128 | response_dw = dw.complete_chat(messages, "assistant") # NEW: minor change to variable name 129 | pref = e.choose(objective, tcs, response_cohere, response_dw) 130 | print(f"{pref}") 131 | 132 | ########################################################################################## 133 | # HUGGINGFACE INFERENCE API TESTS 134 | # 135 | 136 | from phasellm.llms import HuggingFaceInferenceWrapper 137 | hf = HuggingFaceInferenceWrapper(hugging_face_api_key, model_url='https://api-inference.huggingface.co/models/google/flan-t5-xxl') 138 | 139 | # Testing chat capability. 140 | messages = [{"role":"user", "content":"What should I eat for lunch today?"}] 141 | hf.complete_chat(messages, 'assistant') 142 | 143 | # Run a text completion. 144 | hf.text_completion("The capital of Poland is") 145 | 146 | ########################################################################################## 147 | # CHATBOT resend() TEST 148 | # 149 | 150 | from phasellm.llms import OpenAIGPTWrapper, ChatBot 151 | 152 | oaiw = OpenAIGPTWrapper(openai_api_key, 'gpt-4') 153 | cb = ChatBot(oaiw) 154 | m = [{'role': 'system', 'content': "You are a robot that adds 'YO!' to the end of every sentence."}, {'role': 'user', 'content': 'Tell me about Poland.'}] 155 | cb.messages = m 156 | cb.resend() 157 | 158 | ########################################################################################## 159 | # EVAL simulations TO EXCEL 160 | # 161 | 162 | from phasellm.llms import ChatBot, OpenAIGPTWrapper 163 | from phasellm.eval import * 164 | 165 | o = OpenAIGPTWrapper(openai_api_key) 166 | c = ChatBot(o) 167 | c.messages = [ {"role":"system", "content":"You're a mathbot."}, {"role":"user", "content":"What is 3*4*5*zebra?"} ] 168 | 169 | x = simulate_n_chat_simulations(c, 4, 'responses.xlsx') -------------------------------------------------------------------------------- /tests/unit/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wgryc/phasellm/974d026dc649e4a71da4c25bf8c934622e56cf5d/tests/unit/__init__.py -------------------------------------------------------------------------------- /tests/unit/agents/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wgryc/phasellm/974d026dc649e4a71da4c25bf8c934622e56cf5d/tests/unit/agents/__init__.py -------------------------------------------------------------------------------- /tests/unit/llms/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wgryc/phasellm/974d026dc649e4a71da4c25bf8c934622e56cf5d/tests/unit/llms/__init__.py -------------------------------------------------------------------------------- /tests/unit/llms/test_llms.py: -------------------------------------------------------------------------------- 1 | from unittest import TestCase 2 | 3 | from phasellm.llms import Prompt, OpenAIGPTWrapper, StreamingOpenAIGPTWrapper 4 | 5 | 6 | class TestPrompt(TestCase): 7 | 8 | def test_prompt_fill(self): 9 | p = "1: {fill_1}, 2: {fill_2}, 3: {fill_3}" 10 | prompt = Prompt(p) 11 | 12 | actual = prompt.fill(fill_1="one", fill_2="two", fill_3="three") 13 | 14 | expected = "1: one, 2: two, 3: three" 15 | 16 | self.assertEqual(actual, expected, f"{actual} != {expected}") 17 | 18 | 19 | class TestOpenAIGPTWrapper(TestCase): 20 | CONFIG_ERROR = 'Must pass apikey or api_config. If using kwargs, check capitalization.' 21 | 22 | def test_config_error_incorrect_kwarg(self): 23 | error = False 24 | try: 25 | self.fixture = OpenAIGPTWrapper(apiKey='test') 26 | except Exception as e: 27 | self.assertEqual(e.__str__(), self.CONFIG_ERROR) 28 | error = True 29 | self.assertTrue(error, 'Expected error to occur.') 30 | 31 | def test_config_error_missing_config(self): 32 | error = False 33 | try: 34 | self.fixture = OpenAIGPTWrapper() 35 | except Exception as e: 36 | self.assertEqual(e.__str__(), self.CONFIG_ERROR) 37 | error = True 38 | self.assertTrue(error, 'Expected error to occur.') 39 | -------------------------------------------------------------------------------- /tests/utils.py: -------------------------------------------------------------------------------- 1 | import time 2 | 3 | from queue import Queue 4 | 5 | from threading import Thread 6 | 7 | 8 | class Timeout: 9 | 10 | def __init__(self, seconds=5): 11 | """ 12 | This class is used to timeout tests. 13 | Args: 14 | seconds: The timeout in seconds. 15 | """ 16 | 17 | self.seconds = seconds 18 | 19 | self._exception_queue = Queue() 20 | self._finished = False 21 | self._timeout_thread = None 22 | 23 | def _timeout(self): 24 | time.sleep(self.seconds) 25 | if not self._finished: 26 | self._exception_queue.put(True) 27 | else: 28 | self._exception_queue.put(False) 29 | 30 | def start(self): 31 | self._timeout_thread = Thread(target=self._timeout) 32 | self._timeout_thread.start() 33 | 34 | def stop(self): 35 | self._finished = True 36 | self._exception_queue.put(False) 37 | 38 | def check(self): 39 | if not self._exception_queue.empty(): 40 | exception = self._exception_queue.get(block=False) 41 | if exception: 42 | raise TimeoutError(f"Timeout of {self.seconds} seconds exceeded.") 43 | --------------------------------------------------------------------------------