├── .gitignore
├── CHANGELOG.md
├── LICENSE
├── README.md
├── demos-and-products
├── README.md
├── arxiv-assistant
│ └── arxiv_assistant.py
├── basic-chatbot
│ ├── demo.py
│ └── templates
│ │ └── index.html
├── chaining-workshop
│ ├── README.md
│ ├── apps.py
│ ├── demo.py
│ ├── templates
│ │ ├── app.html
│ │ └── applist.html
│ └── tests.py
├── cot-analytics-frontend
│ ├── README.md
│ ├── cot-scr-1.png
│ ├── cot-scr-2.png
│ ├── frontend.py
│ ├── incomes.csv
│ ├── researchllm.py
│ ├── static
│ │ ├── data.js
│ │ ├── interface01.css
│ │ ├── output.json
│ │ └── results.js
│ └── templates
│ │ └── interface01.html
├── cot-analytics
│ ├── README.md
│ ├── cot.py
│ ├── requirements.txt
│ └── sample_output.md
├── eval_platform
│ ├── env-template.txt
│ ├── eval_platform
│ │ ├── __init__.py
│ │ ├── asgi.py
│ │ ├── settings.py
│ │ ├── urls.py
│ │ └── wsgi.py
│ ├── llmevaluator
│ │ ├── __init__.py
│ │ ├── admin.py
│ │ ├── apps.py
│ │ ├── management
│ │ │ ├── __init__.py
│ │ │ └── commands
│ │ │ │ ├── __init__.py
│ │ │ │ └── runjobs.py
│ │ ├── migrations
│ │ │ ├── 0001_initial.py
│ │ │ ├── 0002_batchllmjob.py
│ │ │ ├── 0003_chatbotmessagearray_source_batch_job_id_and_more.py
│ │ │ ├── 0004_alter_chatbotmessagearray_message_array.py
│ │ │ ├── 0005_alter_chatbotmessagearray_source_batch_job_id_and_more.py
│ │ │ ├── 0006_batchllmjob_tags_chatbotmessagearray_tags_and_more.py
│ │ │ ├── 0007_chatbotmessagearray_title.py
│ │ │ ├── 0008_batchllmjob_include_gpt_35_batchllmjob_include_gpt_4_and_more.py
│ │ │ ├── 0009_batchllmjob_new_system_prompt_and_more.py
│ │ │ ├── 0010_batchllmjob_resend_last_user_message.py
│ │ │ ├── 0011_batchllmjob_description.py
│ │ │ ├── 0012_batchllmjob_message_collection_ref.py
│ │ │ ├── 0013_batchllmjob_results_array_and_more.py
│ │ │ ├── 0014_messagecollection_chats.py
│ │ │ └── __init__.py
│ │ ├── models.py
│ │ ├── tests.py
│ │ └── views.py
│ ├── manage.py
│ ├── readme.md
│ ├── requirements.txt
│ ├── screenshot.png
│ ├── static
│ │ └── main.css
│ └── templates
│ │ ├── aboutus.html
│ │ ├── base-navigation-two-cols.html
│ │ ├── base-navigation-two-rows.html
│ │ ├── base-navigation.html
│ │ ├── base.html
│ │ ├── batch.html
│ │ ├── batch_review.html
│ │ ├── chats.html
│ │ ├── create-group.html
│ │ ├── create.html
│ │ └── view-chat.html
├── newsbot
│ ├── README.md
│ ├── news_articles.json
│ ├── newsbot.py
│ ├── newsbot_create.py
│ ├── newsbot_evaluate.py
│ └── notes.md
├── researchllm
│ ├── README.md
│ ├── frontend.py
│ ├── incomes.csv
│ ├── requirements.txt
│ ├── researchllm.py
│ ├── screenshot.png
│ └── templates
│ │ └── index.html
└── web-search-chatbot
│ ├── demo.py
│ └── templates
│ └── index.html
├── docs
├── Makefile
├── README.md
├── make.bat
└── source
│ ├── conf.py
│ └── index.md
├── phasellm
├── __init__.py
├── agents.py
├── configurations.py
├── configurations_utils.py
├── eval.py
├── exceptions.py
├── html.py
├── llms.py
├── llms_utils.py
├── logging.py
└── types.py
├── project_metadata.py
├── readthedocs.yaml
├── release_checklist.md
├── requirements-dev.txt
├── requirements.txt
├── setup.py
├── tests-non-deterministic
├── README.md
├── __init__.py
└── llms
│ ├── __init__.py
│ └── test_llms.py
└── tests
├── README.MD
├── __init__.py
├── e2e
├── __init__.py
├── agents
│ ├── __init__.py
│ └── test_e2e_agents.py
├── llms
│ ├── __init__.py
│ ├── test_e2e_llms.py
│ └── utils.py
└── sse
│ ├── __init__.py
│ └── test_e2e_sse.py
├── release_checklist_code.py
├── unit
├── __init__.py
├── agents
│ ├── __init__.py
│ └── test_agents.py
└── llms
│ ├── __init__.py
│ └── test_llms.py
└── utils.py
/.gitignore:
--------------------------------------------------------------------------------
1 | # Byte-compiled / optimized / DLL files
2 | __pycache__/
3 | *.py[cod]
4 | *$py.class
5 |
6 | # C extensions
7 | *.so
8 |
9 | # MacOS
10 | .DS_Store
11 |
12 | # Distribution / packaging
13 | .Python
14 | build/
15 | develop-eggs/
16 | dist/
17 | downloads/
18 | eggs/
19 | .eggs/
20 | lib/
21 | lib64/
22 | parts/
23 | sdist/
24 | var/
25 | wheels/
26 | pip-wheel-metadata/
27 | share/python-wheels/
28 | *.egg-info/
29 | .installed.cfg
30 | *.egg
31 | MANIFEST
32 |
33 | # PyInstaller
34 | # Usually these files are written by a python script from a template
35 | # before PyInstaller builds the exe, so as to inject date/other infos into it.
36 | *.manifest
37 | *.spec
38 |
39 | # Installer logs
40 | pip-log.txt
41 | pip-delete-this-directory.txt
42 |
43 | # Unit test / coverage reports
44 | htmlcov/
45 | .tox/
46 | .nox/
47 | .coverage
48 | .coverage.*
49 | .cache
50 | nosetests.xml
51 | coverage.xml
52 | *.cover
53 | *.py,cover
54 | .hypothesis/
55 | .pytest_cache/
56 |
57 | # Translations
58 | *.mo
59 | *.pot
60 |
61 | # Django stuff:
62 | *.log
63 | local_settings.py
64 | db.sqlite3
65 | db.sqlite3-journal
66 |
67 | # Flask stuff:
68 | instance/
69 | .webassets-cache
70 |
71 | # Scrapy stuff:
72 | .scrapy
73 |
74 | # Sphinx documentation
75 | docs/_build/
76 |
77 | # PyBuilder
78 | target/
79 |
80 | # Jupyter Notebook
81 | .ipynb_checkpoints
82 |
83 | # IPython
84 | profile_default/
85 | ipython_config.py
86 |
87 | # pyenv
88 | .python-version
89 |
90 | # pipenv
91 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
92 | # However, in case of collaboration, if having platform-specific dependencies or dependencies
93 | # having no cross-platform support, pipenv may install dependencies that don't work, or not
94 | # install all needed dependencies.
95 | #Pipfile.lock
96 |
97 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow
98 | __pypackages__/
99 |
100 | # Celery stuff
101 | celerybeat-schedule
102 | celerybeat.pid
103 |
104 | # SageMath parsed files
105 | *.sage.py
106 |
107 | # Environments
108 | .env
109 | .venv
110 | env/
111 | venv/
112 | ENV/
113 | env.bak/
114 | venv.bak/
115 |
116 | # Jetbrains IDEs
117 | .idea
118 |
119 | # Spyder project settings
120 | .spyderproject
121 | .spyproject
122 |
123 | # Rope project settings
124 | .ropeproject
125 |
126 | # mkdocs documentation
127 | /site
128 |
129 | # mypy
130 | .mypy_cache/
131 | .dmypy.json
132 | dmypy.json
133 |
134 | # Pyre type checker
135 | .pyre/
136 |
137 | # Workspaces
138 | /workspace
139 |
140 | # Scratch directories
141 | .tmp
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2023 Phase AI Technologies Inc.
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # PhaseLLM
2 |
3 | Large language model evaluation and workflow framework from [Phase AI](https://phaseai.com/).
4 |
5 | - [Follow us on Twitter](https://twitter.com/phasellm) for updates.
6 | - [Star us on GitHub](https://github.com/wgryc/phasellm).
7 | - [Read the Docs](https://phasellm.readthedocs.io/en/latest/autoapi/phasellm/index.html) -- Module reference. Tutorials and code examples are below.
8 |
9 | ## Installation
10 |
11 | You can install PhaseLLM via pip:
12 |
13 | ```
14 | pip install phasellm
15 | ```
16 |
17 | Installing from PyPI does not include libraries for running LLMs locally. Please run `pip install phasellm[complete]` if you plan on using LLMs locally (e.g., our `DollyWrapper`).
18 |
19 | Sample demos and products are in the `demos-and-products` folder. Clone this repository and follow instructions in the `README.md` file in each product folder to run those.
20 |
21 | ## Introduction
22 |
23 | The coming months and years will bring thousands of new products and experienced powered by large language models (LLMs) like ChatGPT or its increasing number of variants. Whether you're using OpenAI's ChatGPT, Anthropic's Claude, or something else all together, you'll want to test how well your models and prompts perform against user needs. As more models are launched, you'll also have a bigger range of options.
24 |
25 | PhaseLLM is a framework designed to help manage and test LLM-driven experiences -- products, content, or other experiences that product and brand managers might be driving for their users.
26 |
27 | Here's what PhaseLLM does:
28 | 1. We standardize API calls so you can plug and play models from OpenAI, Cohere, Anthropic, or other providers.
29 | 2. We've built evaluation frameworks so you can compare outputs and decide which ones are driving the best experiences for users.
30 | 3. We're adding automations so you can use advanced models (e.g., GPT-4) to evaluate simpler models (e.g., GPT-3) to determine what combination of prompts yield the best experiences, especially when taking into account costs and speed of model execution.
31 |
32 | PhaseLLM is open source and we envision building more features to help with model understanding. We want to help developers, data scientists, and others launch new, robust products as easily as possible.
33 |
34 | If you're working on an LLM product, please reach out. We'd love to help out.
35 |
36 | ## Example: Evaluating Travel Chatbot Prompts with GPT-3.5, Claude, and more
37 |
38 | PhaseLLM makes it incredibly easy to plug and play LLMs and evaluate them, in some cases with *other* LLMs. Suppose you're building a travel chatbot, and you want to test Claude and Cohere against each other, using GPT-3.5.
39 |
40 | What's awesome with this approach is that (1) you can plug and play models and prompts as needed, and (2) the entire workflow takes a small amount of code. This simple example can easily be scaled to much more complex workflows.
41 |
42 | So, time for the code... First, load your API keys.
43 |
44 | ```python
45 | import os
46 | from dotenv import load_dotenv
47 |
48 | load_dotenv()
49 | openai_api_key = os.getenv("OPENAI_API_KEY")
50 | anthropic_api_key = os.getenv("ANTHROPIC_API_KEY")
51 | cohere_api_key = os.getenv("COHERE_API_KEY")
52 | ```
53 |
54 | We're going to set up the *Evaluator*, which takes two LLM model outputs and decides which one is better for the objective at hand.
55 |
56 | ```python
57 | from phasellm.eval import GPTEvaluator
58 |
59 | # We'll use GPT-3.5 as the evaluator (default for GPTEvaluator).
60 | e = GPTEvaluator(openai_api_key)
61 | ```
62 |
63 | Now it's time to set up the experiment. In this case, we'll set up an `objective` which describes what we're trying to achieve with our chatbot. We'll also provide 5 examples of starting chats that we've seen with our users.
64 |
65 | ```python
66 | # Our objective.
67 | objective = "We're building a chatbot to discuss a user's travel preferences and provide advice."
68 |
69 | # Chats that have been launched by users.
70 | travel_chat_starts = [
71 | "I'm planning to visit Poland in spring.",
72 | "I'm looking for the cheapest flight to Europe next week.",
73 | "I am trying to decide between Prague and Paris for a 5-day trip",
74 | "I want to visit Europe but can't decide if spring, summer, or fall would be better.",
75 | "I'm unsure I should visit Spain by flying via the UK or via France."
76 | ]
77 | ```
78 |
79 | Now we set up our Cohere and Claude models.
80 |
81 | ```python
82 | from phasellm.llms import CohereWrapper, ClaudeWrapper
83 | cohere_model = CohereWrapper(cohere_api_key)
84 | claude_model = ClaudeWrapper(anthropic_api_key)
85 | ```
86 |
87 | Finally, we launch our test. We run an experiments where both models generate a chat response and then we have GPT-3.5 evaluate the response.
88 |
89 | ```python
90 | print("Running test. 1 = Cohere, and 2 = Claude.")
91 | for tcs in travel_chat_starts:
92 |
93 | messages = [{"role":"system", "content":objective},
94 | {"role":"user", "content":tcs}]
95 |
96 | response_cohere = cohere_model.complete_chat(messages, "assistant")
97 | response_claude = claude_model.complete_chat(messages, "assistant")
98 |
99 | pref = e.choose(objective, tcs, response_cohere, response_claude)
100 | print(f"{pref}")
101 | ```
102 |
103 | In this case, we simply print which of the two models was preferred.
104 |
105 | Voila! You've got a suite to test your models and can plug-and-play three major LLMs.
106 |
107 | ## Contact Us
108 |
109 | If you have questions, requests, ideas, etc. please reach out at w (at) phaseai (dot) com.
110 |
--------------------------------------------------------------------------------
/demos-and-products/README.md:
--------------------------------------------------------------------------------
1 | # Demos and Products (/demos-and-products)
2 |
3 | This folder contains various products and demos built using PhaseLLM.
4 |
5 | Every fold contains a self-contained product or demo. Each one also contains a README.md file that includes installation instructions.
6 |
7 | All products require the `phasellm` package to be installed.
8 |
--------------------------------------------------------------------------------
/demos-and-products/arxiv-assistant/arxiv_assistant.py:
--------------------------------------------------------------------------------
1 | import os
2 | import re
3 |
4 | from dotenv import load_dotenv
5 |
6 | from feedparser import FeedParserDict
7 |
8 | from phasellm.llms import ClaudeWrapper
9 |
10 | from phasellm.agents import EmailSenderAgent, RSSAgent
11 |
12 | load_dotenv()
13 |
14 | # Load OpenAI and newsapi.org API keys.
15 | anthropic_api_key = os.getenv("ANTHROPIC_API_KEY")
16 |
17 | # Load Gmail credentials.
18 | gmail_email = os.getenv("GMAIL_EMAIL")
19 | gmail_password = os.getenv("GMAIL_PASSWORD") # https://myaccount.google.com/u/1/apppasswords
20 |
21 | # Set up the LLM
22 | llm = ClaudeWrapper(anthropic_api_key)
23 |
24 |
25 | def interest_analysis(title: str, abstract: str, interests: str):
26 | interest_analysis_prompt = \
27 | f"""
28 | I want to determine if an academic paper is relevant to my interests. I am interested in: {interests}. The paper
29 | is titled: {title}. It has the following abstract: {abstract}. Is this paper relevant to my interests? Respond
30 | with either 'yes' or 'no'. Do not explain your reasoning.
31 |
32 | Example responses are given between the ### ### symbols. Respond exactly as shown in the examples.
33 |
34 | ###yes###
35 | or
36 | ###no###
37 | """
38 | return llm.text_completion(prompt=interest_analysis_prompt)
39 |
40 |
41 | def summarize(title: str, abstract: str, interests: str):
42 | """
43 | This function summarizes why the paper might be relevant to the user's interests.
44 | Args:
45 | title: The title of the paper.
46 | abstract: The abstract of the paper.
47 | interests: The user's interests.
48 |
49 | Returns: The summary of why the paper might be relevant to the user's interests.
50 |
51 | """
52 | # Summarize why the paper might be relevant to the user's interests.
53 | summary_prompt = \
54 | f"""
55 | Summarize why the the following paper is relevant to my interests. My interests are: {interests}. The paper is
56 | titled: {title}. It has the following abstract: {abstract}.
57 | """
58 | return llm.text_completion(prompt=summary_prompt)
59 |
60 |
61 | def send_email(title: str, abstract: str, link: str, summary: str) -> None:
62 | """
63 | This function sends an email to the user with the title of the paper and the summary.
64 | Args:
65 | title: The title of the paper.
66 | abstract: The abstract of the paper.
67 | link: The link to the paper.
68 | summary: The summary of the paper.
69 |
70 | Returns:
71 |
72 | """
73 | # Send email
74 | print('Sending email...')
75 |
76 | content = f"Title: {title}\n\nSummary:\n{summary}\n\nAbstract:\n{abstract}\n\nLink: {link}"
77 |
78 | email_agent = EmailSenderAgent(
79 | sender_name='arXiv Assistant',
80 | smtp='smtp.gmail.com',
81 | sender_address=gmail_email,
82 | password=gmail_password,
83 | port=587
84 | )
85 | email_agent.send_plain_email(recipient_email=gmail_email, subject=title, content=content)
86 |
87 |
88 | def analyze_and_email(paper: FeedParserDict, interests: str, retries: int = 0) -> None:
89 | """
90 | This function analyzes the latest papers from arXiv and emails the user if any of them are relevant to their
91 | interests.
92 | Args:
93 | paper: The paper to analyze.
94 | interests: The user's interests.
95 | retries: The number of retry attempts made so far.
96 | Returns:
97 |
98 | """
99 | # Allow for a maximum of 1 retry.
100 | max_retries = 1
101 |
102 | title = paper['title']
103 | abstract = paper['summary']
104 | link = paper['link']
105 | interested = interest_analysis(title=title, abstract=abstract, interests=interests)
106 |
107 | # Find the answer within the response.
108 | answer = re.search(r'###(yes|no)###', interested)
109 | if not answer:
110 | if retries < max_retries:
111 | analyze_and_email(paper=paper, interests=interests, retries=retries + 1)
112 | else:
113 | interested = answer.group(0)
114 |
115 | # Send email if the user is interested.
116 | if interested == '###yes###':
117 | summary = summarize(title=title, abstract=abstract, interests=interests)
118 | send_email(title=title, abstract=abstract, link=link, summary=summary)
119 | elif interested == '###no###':
120 | pass
121 | else:
122 | print(f'LLM did not respond in the expected format after {max_retries}. Skipping paper:\n{title}')
123 |
124 |
125 | def main():
126 | """
127 | Entry point for the arXiv assistant.
128 | Returns:
129 |
130 | """
131 | # Ask user what they want to read about.
132 | interests = input("What kinds of papers do you want to be notified about?")
133 |
134 | papers_processed = 0
135 |
136 | rss_agent = RSSAgent(url='https://arxiv.org/rss/cs')
137 | with rss_agent.poll(60) as poller:
138 | for papers in poller():
139 | print(f'Found {len(papers)} new paper(s).')
140 | for paper in papers:
141 | analyze_and_email(
142 | paper=paper,
143 | interests=interests
144 | )
145 | papers_processed += 1
146 | print(f'Processed {papers_processed} paper(s).')
147 |
148 |
149 | if __name__ == '__main__':
150 | main()
151 |
--------------------------------------------------------------------------------
/demos-and-products/basic-chatbot/demo.py:
--------------------------------------------------------------------------------
1 |
2 | import os
3 | from dotenv import load_dotenv
4 |
5 | from phasellm.llms import OpenAIGPTWrapper, ChatBot
6 |
7 | load_dotenv()
8 | MODEL_LLM = OpenAIGPTWrapper
9 | MODEL_STRING = "gpt-4"
10 | MODEL_API_KEY = os.getenv("OPENAI_API_KEY")
11 | llm = MODEL_LLM(MODEL_API_KEY, MODEL_STRING)
12 |
13 | CHATBOT = None
14 |
15 | from flask import Flask, request, render_template, jsonify
16 |
17 | APP = Flask(__name__)
18 |
19 | # We have a function because we'll eventually add other things, like system prompts, variables, etc.
20 | # Returns True if successful, False otherwise
21 | def resetChatBot():
22 | global CHATBOT
23 | CHATBOT = ChatBot(llm)
24 | return True
25 |
26 | resetChatBot()
27 |
28 | @APP.route('/submit_chat_message', methods = ['POST'])
29 | def sendchat():
30 | global CHATBOT
31 | message = request.json["input"]
32 | response = CHATBOT.chat(message)
33 | return {"status":"ok", "content":response,}
34 |
35 | @APP.route('/resetchatbot')
36 | def resetchatbot():
37 | if resetChatBot():
38 | return jsonify({"status":"ok", "message":"ChatBot has been restarted."})
39 | else:
40 | return jsonify({"status":"error", "message":"ChatBot could not be restarted."})
41 |
42 | @APP.route('/')
43 | def index():
44 |
45 | # Loop and print all args...
46 | #for key, value in request.args.items():
47 | # print(f"{key} :: {value}")
48 | #print(request.args)
49 |
50 | if "reset" in request.args:
51 | if request.qrgs['reset'] == 'true':
52 | resetChatBot()
53 |
54 | return render_template('index.html')
55 |
56 | def run(host="127.0.0.1", port=5000):
57 | """
58 | Launches a local web server for interfacing with PhaseLLM. This is meant to be for testing purposes only.
59 | """
60 | APP.run(host=host, port=port)
61 |
62 | MAIN_HOST = "127.0.0.1"
63 | MAIN_PORT = 8000
64 | if __name__ == '__main__':
65 | run(MAIN_HOST, MAIN_PORT)
--------------------------------------------------------------------------------
/demos-and-products/basic-chatbot/templates/index.html:
--------------------------------------------------------------------------------
1 |
2 |
96 |
97 |
137 |
138 |
139 |
--------------------------------------------------------------------------------
/demos-and-products/chaining-workshop/README.md:
--------------------------------------------------------------------------------
1 | # Chaining Workshop
2 |
3 | This provides a front-end and a set of prompt templates where you can then begin chaining and structuring "apps" in various ways.
4 |
5 | ## Example Prompt Types
6 |
7 | - System Message: show a message without any logic around what is shown.
8 | - Linear Order: show a message at a specific time (similar to 'system message' but with order).
9 | - Logic
10 |
11 | ## Sample Apps
12 |
13 | - AmpUp.ai with a "yes/no" from the LLM
14 | - AmpUp.ai with a confidence score
15 | - Newsbot with review of outputs
16 | - Character-focused chatbot
17 | - Travel agent workflow
18 |
19 | ## Data Structure
20 |
21 | { prompt_id, prompt}
22 | fallback prompt (i.e., error)
23 |
24 | { pid_1 -> pid_2, conditions}
25 |
26 |
27 | ## Characters
28 |
29 | ### Socrates
30 |
31 | { "prompt_id": 1, "prompt": "REMINDER: you are playing the role of Socrates and you are meant to reply to every message as if you were Socrates using the Socratic method. Please do so with the message below.\nMESSAGE:{message}", "next_prompt": 2}
32 |
33 | { "prompt_id": 2, "prompt": "REMINDER: you are playing the role of Socrates and you are meant to reply to every message as if you were Socrates using the Socratic method. Please do so with the message below.\nMESSAGE:{message}", "next_prompt": 2}
34 |
35 |
36 |
37 | variables = user/app provided, LLM-provided
38 |
39 | ## How to Add Conditional Flows
40 |
41 | - Output Parser: need to take the output of a model and parse it in some way. This should parse the outputs into specific variables.
42 | - Pass a function to the next prompt? This will be limited, though -- you still need to write functions. Is that bad?
43 | - Prebuilt template functions + custom functions.
44 |
45 | Output Parser -> Environment Variable -> Function
46 |
47 |
48 | OUTPUT PARSER
49 |
50 |
51 |
52 | For all of your responses, please provide them in the following format:
53 | ---MESSAGE
54 | This is where your actual message will go.
55 | ---SENTIMENT-SCORE
56 | A score between 0 and 100 that shows how positive or negative the person's response was when describing their product.
57 | ---END
58 | Include 'yes' or 'no' here. 'Yes' means we've asked 2 follow-up questions or the sentiment score has gotten close to 0 and you think it's safer to end the conversation. 'Yes' will continue the conversation.
59 |
60 | ## Conditional Flows v2
61 |
62 | Right now, this is all hard-coded via " ---VAR" which is a poor way of doing things.
--------------------------------------------------------------------------------
/demos-and-products/chaining-workshop/apps.py:
--------------------------------------------------------------------------------
1 | app_socrates = {
2 |
3 | "code":"socrates",
4 | "name":"Chat with Socrates",
5 |
6 | "prompts": {
7 |
8 | 0 : {
9 | "type":"system_message", "message": "You are chatting with Socrates. Enjoy!", "next_prompt": 1
10 | },
11 |
12 | 1 : {
13 | "prompt": "REMINDER: you are playing the role of Socrates and you are meant to reply to every message as if you were Socrates using the Socratic method. Please do so with the message below.\nMESSAGE:{message}", "next_prompt": 1
14 | }
15 |
16 | }
17 |
18 | }
19 |
20 | app_yoyo = {
21 |
22 | "code":"yoyo",
23 | "name":"Chat with 'Yo Yo'",
24 |
25 | "prompts": {
26 |
27 | 0 : {
28 | "type":"system_message", "message": "You are chatting with someone that uses 'yo' too much. Enjoy!", "next_prompt": 1
29 | },
30 |
31 | 1 : {
32 | "prompt": "REMINDER: you are a chatbot that starts every message with 'Yo, yo, yo!' and also includes 'yo' throughout responses. lease do so with the message below.\nMESSAGE:{message}", "next_prompt": 1
33 | }
34 |
35 | }
36 |
37 | }
38 |
39 | app_act = {
40 |
41 | "code":"act",
42 | "name":"Acceptance and Commitment Therapy",
43 |
44 | "prompts": {
45 |
46 | 0 : {
47 | "type":"system_message", "message": "This is an 'Acceptance and Commitment Therapy' (ACT) coach. The responses in this chat model will always focus on different follow-up questions or advice around how you should move forward with your day based on this style of positive psychology.", "next_prompt": 1
48 | },
49 |
50 | 1 : {
51 | "prompt": "REMINDER: you are an Acceptance and Commitment Therapy' (ACT) coach and every message needs to follow the perspective of an ACT therapist that is also steeped in positive and humanistic psychology with a strong focus on ACT.\nMESSAGE:{message}", "next_prompt": 1
52 | }
53 |
54 | }
55 |
56 | }
57 |
58 | app_random_end = {
59 |
60 | "code": "random",
61 | "name": "Random End",
62 | "prompts": {
63 |
64 | 0 : {
65 | "type":"system_message", "message": "This is a demo bot that always follows up with ONE question and also randomly ends the conversation. It's being used to show how conditional app flows could work.", "next_prompt": 1
66 | },
67 |
68 | 1 : {
69 | "prompt": "REMINDER: you only allowed to respond with ONE SHORT QUESTION to the MESSAGE below. Please make sure that your response follows the following format:\n---RESPONSE\nThis is where your response actually goes.\n---NEXT\nPut 'YES' or 'NO' here randomly, with a 50% split.\n\n\nMESSAGE:{message}", "next_prompt": 1
70 | }
71 |
72 | }
73 |
74 | }
75 |
76 | app_danger_demo = {
77 |
78 | "code": "danger",
79 | "name": "Brand Sentiment",
80 | "prompts": {
81 |
82 | 0 : {
83 | "type":"system_message", "message": "This is a demo bot that interviews you about how you feel about your recent Nike sneaker purchase. If your sentiment goes down quite a bit, then it ends the interview.", "next_prompt": 1
84 | },
85 |
86 | 1 : {
87 | "prompt": "REMINDER: please always follow up with a question to keep learning about my sentiment around Nike sneakers. Also provide a 'danger' score from 0 to 100, where 100 means the conversation is incredibly negative, and 0 means it's incredibly positive, and 50 means it's neutral. Please make sure that your response follows the following format, always starting with '---RESPONSE':\n\n---RESPONSE\nThis is where your response actually goes.\n---DANGER\nThis is the sentiment score with 100 = negative, 50 = neutral, and 0 = positive.\n\n\nMESSAGE:{message}", "next_prompt": 1
88 | }
89 |
90 | }
91 |
92 | }
93 |
94 |
95 | APP_DATA_SETS = {
96 | "socrates": app_socrates,
97 | "yoyo": app_yoyo,
98 | "act": app_act,
99 | "random": app_random_end,
100 | "danger": app_danger_demo
101 | }
--------------------------------------------------------------------------------
/demos-and-products/chaining-workshop/demo.py:
--------------------------------------------------------------------------------
1 | # Import all the data, apps, etc. we have built...
2 | from apps import *
3 |
4 | import os
5 | from dotenv import load_dotenv
6 |
7 | from phasellm.llms import OpenAIGPTWrapper, ChatBot, Prompt
8 |
9 | load_dotenv()
10 | MODEL_LLM = OpenAIGPTWrapper
11 | MODEL_STRING = "gpt-4"
12 | #MODEL_STRING = "gpt-3.5-turbo" # Use for speed.
13 | MODEL_API_KEY = os.getenv("OPENAI_API_KEY")
14 | llm = MODEL_LLM(MODEL_API_KEY, MODEL_STRING)
15 |
16 | CHATBOT = None
17 |
18 | APP_PROMPT_STATE = 0
19 | APP_CODE = None
20 |
21 | from flask import Flask, request, render_template, jsonify
22 |
23 | APP = Flask(__name__)
24 |
25 | # We have a function because we'll eventually add other things, like system prompts, variables, etc.
26 | # Returns True if successful, False otherwise
27 | def resetChatBot():
28 | global CHATBOT
29 | CHATBOT = ChatBot(llm)
30 | return True
31 |
32 | resetChatBot()
33 |
34 | def parseResponse(r):
35 | lines = r.strip().split("\n")
36 |
37 | # Should eventually throw an error.
38 | if r[0:3] != "---":
39 | return None
40 | #assert r[0:3] == "---"
41 |
42 | var_name = None
43 | v = ""
44 |
45 | rdict = {}
46 |
47 | for line in lines:
48 | if line[0:3] == "---":
49 | if var_name is not None:
50 | rdict[var_name] = v.strip()
51 | var_name = line[3:].strip().upper()
52 | v = ""
53 | else:
54 | v += line
55 |
56 | rdict[var_name] = v.strip()
57 |
58 | return rdict
59 |
60 | @APP.route('/submit_chat_message', methods = ['POST'])
61 | def sendchat():
62 | global CHATBOT
63 | message = request.json["input"]
64 | response = process_message(message)
65 | return {"status":"ok", "content":response}
66 |
67 | @APP.route('/resetchatbot')
68 | def resetchatbot():
69 | if resetChatBot():
70 | return jsonify({"status":"ok", "message":"ChatBot has been restarted."})
71 | else:
72 | return jsonify({"status":"error", "message":"ChatBot could not be restarted."})
73 |
74 | def isInt(v):
75 | try:
76 | int(v)
77 | except:
78 | return False
79 | return True
80 |
81 | def process_message(message):
82 | global APP_PROMPT_STATE
83 | global APP_CODE
84 | global CHATBOT
85 | prompt = Prompt(APP_CODE["prompts"][APP_PROMPT_STATE]["prompt"])
86 | filled_prompt = prompt.fill(message = message)
87 |
88 | print(f"\n\n{filled_prompt}\n\n")
89 |
90 | response = CHATBOT.chat(filled_prompt)
91 |
92 | print(f"\n\n{response}\n\n")
93 |
94 | response_dict = parseResponse(response)
95 |
96 | next_prompt = -1
97 | if isInt(APP_CODE["prompts"][APP_PROMPT_STATE]["next_prompt"]):
98 | next_prompt = APP_CODE["prompts"][APP_PROMPT_STATE]["next_prompt"]
99 |
100 | if response_dict is not None:
101 | print(response_dict)
102 | if "NEXT" in response_dict:
103 | if response_dict["NEXT"].upper() == "NO":
104 | response = "Chat is over!"
105 | else:
106 | if "RESPONSE" in response_dict:
107 | response = response_dict["RESPONSE"]
108 | if "DANGER" in response_dict:
109 | if isInt(response_dict["DANGER"]):
110 | danger_score = int(response_dict["DANGER"])
111 | if danger_score > 80:
112 | response = "Dangerous topic! Chat is over!"
113 | else:
114 | if "RESPONSE" in response_dict:
115 | response = response_dict["RESPONSE"]
116 |
117 | APP_PROMPT_STATE = next_prompt
118 |
119 | return response
120 |
121 | @APP.route("/")
122 | def index():
123 | applist = ""
124 | for key in APP_DATA_SETS:
125 | applist += f"""
126 |
{APP_DATA_SETS[key]["name"]}
127 | """
128 | return render_template('applist.html', applist=applist)
129 |
130 | @APP.route('/app')
131 | def llmapp():
132 |
133 | global APP_PROMPT_STATE
134 | global APP_CODE
135 |
136 | # Loop and print all args...
137 | #for key, value in request.args.items():
138 | # print(f"{key} :: {value}")
139 | #print(request.args)
140 |
141 | if "reset" in request.args:
142 | if request.args['reset'] == 'true':
143 | resetChatBot()
144 |
145 | app_name = ""
146 | system_message = ""
147 | if "app" in request.args:
148 | app_code = request.args['app']
149 | if app_code in APP_DATA_SETS:
150 | system_message = APP_DATA_SETS[app_code]["prompts"][0]["message"]
151 | app_name = app_code
152 | APP_PROMPT_STATE = 0
153 | APP_CODE = APP_DATA_SETS[app_code]
154 | APP_PROMPT_STATE = APP_DATA_SETS[app_code]["prompts"][0]["next_prompt"]
155 |
156 | return render_template('app.html', app_name=app_name, sys_msg=system_message)
157 |
158 | def run(host="127.0.0.1", port=5000):
159 | """
160 | Launches a local web server for interfacing with PhaseLLM. This is meant to be for testing purposes only.
161 | """
162 | APP.run(host=host, port=port)
163 |
164 | MAIN_HOST = "127.0.0.1"
165 | MAIN_PORT = 8000
166 | if __name__ == '__main__':
167 | run(MAIN_HOST, MAIN_PORT)
--------------------------------------------------------------------------------
/demos-and-products/chaining-workshop/templates/app.html:
--------------------------------------------------------------------------------
1 |
2 |
3 |
33 |
34 |
35 |
36 |
37 |
38 |
--------------------------------------------------------------------------------
/demos-and-products/cot-analytics/README.md:
--------------------------------------------------------------------------------
1 | # Chain of Thought (CoT) Analytics
2 |
3 | This generates a Chain of Thought (CoT) plan for a data set, and then asks the LLM to generate code for each step in the CoT analysis.
4 |
5 | Curious about the output? Please see `sample_output.md` for an example of an analysis plan for the demo data set. This was not edited!
6 |
7 | ## Installation and Setup
8 |
9 | ### Installation
10 |
11 | Clone the GitHub repository and navigate to the folder containing this README.md file. Install the relevant packages (including PhaseLLM):
12 |
13 | ```
14 | pip install -r requirements.txt
15 | ```
16 |
17 | Next, make sure you edit the `cot.py` file to include the proper API keys. You'll find these around line 115:
18 | ```python
19 | openai_api_key = os.getenv("OPENAI_API_KEY")
20 | o = OpenAIGPTWrapper(openai_api_key, 'gpt-4') # We highly recommend using GPT-4 or Claude v1.3 for this.
21 | ```
22 |
23 | ### Running With Sample Data
24 |
25 | Simply run `cot.py` directly in your command line. This will take a while to run, and will make 10-20 requests to the OpenAI or Anthropic API.
26 |
27 | ## Running on Your Own Data
28 |
29 | This demo is based on the data in `incomes.csv` in the `researchllm` folder. We are working on making this easier to apply to other data sets *and* to actually execute the code generated by the LLM.
30 |
31 | Until then, please don't hesitate to reach out -- hello (at) phaseai (dot) com -- and we'll help you apply this to your data.
32 |
--------------------------------------------------------------------------------
/demos-and-products/cot-analytics/cot.py:
--------------------------------------------------------------------------------
1 | """
2 | Chain of Thought (CoT) analysis for a data set. Please see the README.md for more information.
3 | """
4 |
5 | import os
6 | from dotenv import load_dotenv
7 |
8 | from phasellm.llms import OpenAIGPTWrapper, ChatBot
9 |
10 | # Where we write the output of this analysis.
11 | OUT_FILE = "output.md"
12 |
13 | def save_output(output, header):
14 | """
15 | Appends model outputs to a markdown file. Includes a header ("# header") and then the output itself.
16 | """
17 | with open(OUT_FILE, 'a') as writer:
18 | writer.write(f"# {header}\n\n{output}\n\n")
19 |
20 | # This prompt is basically a copy/paste of what is generated by ResearchLLM for the 'incomes.csv' data set via the generateOverview() function.
21 | messages = [{"role":"system", "content": """You are a data science research assistant. We will ask you about a big data set and would like you to break down the analysis you suggest into specific tasks that we can then write code for."""},
22 | {"role":"user", "content":"""I am researching the relationship between income and sociodemographic census info. I have imported Pandas as `pd`, Numpy as `np`, `scipy`, and `sklearn`, and have a dataframe called `df` loaded into Python. `df` contains the following variables and variable types:
23 |
24 | Column Name: age
25 | Column Type: int64
26 |
27 | Column Name: workclass
28 | Column Type: object
29 | Sample Values: ['?' 'Federal-gov' 'Local-gov' 'Never-worked' 'Private' 'Self-emp-inc'
30 | 'Self-emp-not-inc' 'State-gov' 'Without-pay']
31 |
32 | Column Name: fnlwgt
33 | Column Type: int64
34 |
35 | Column Name: education
36 | Column Type: object
37 | Sample Values: ['10th' '11th' '12th' '1st-4th' '5th-6th' '7th-8th' '9th' 'Assoc-acdm'
38 | 'Assoc-voc' 'Bachelors' 'Doctorate' 'HS-grad' 'Masters' 'Preschool'
39 | 'Prof-school' 'Some-college']
40 |
41 | Column Name: education.num
42 | Column Type: int64
43 |
44 | Column Name: marital.status
45 | Column Type: object
46 | Sample Values: ['Divorced' 'Married-AF-spouse' 'Married-civ-spouse'
47 | 'Married-spouse-absent' 'Never-married' 'Separated' 'Widowed']
48 |
49 | Column Name: occupation
50 | Column Type: object
51 | Sample Values: ['?' 'Adm-clerical' 'Armed-Forces' 'Craft-repair' 'Exec-managerial'
52 | 'Farming-fishing' 'Handlers-cleaners' 'Machine-op-inspct' 'Other-service'
53 | 'Priv-house-serv' 'Prof-specialty' 'Protective-serv' 'Sales'
54 | 'Tech-support' 'Transport-moving']
55 |
56 | Column Name: relationship
57 | Column Type: object
58 | Sample Values: ['Husband' 'Not-in-family' 'Other-relative' 'Own-child' 'Unmarried' 'Wife']
59 |
60 | Column Name: race
61 | Column Type: object
62 | Sample Values: ['Amer-Indian-Eskimo' 'Asian-Pac-Islander' 'Black' 'Other' 'White']
63 |
64 | Column Name: sex
65 | Column Type: object
66 | Sample Values: ['Female' 'Male']
67 |
68 | Column Name: capital.gain
69 | Column Type: int64
70 |
71 | Column Name: capital.loss
72 | Column Type: int64
73 |
74 | Column Name: hours.per.week
75 | Column Type: int64
76 |
77 | Column Name: native.country
78 | Column Type: object
79 | Sample Values: ['?' 'Cambodia' 'Canada' 'China' 'Columbia' 'Cuba' 'Dominican-Republic'
80 | 'Ecuador' 'El-Salvador' 'England' 'France' 'Germany' 'Greece' 'Guatemala'
81 | 'Haiti' 'Holand-Netherlands' 'Honduras' 'Hong' 'Hungary' 'India' 'Iran'
82 | 'Ireland' 'Italy' 'Jamaica' 'Japan' 'Laos' 'Mexico' 'Nicaragua'
83 | 'Outlying-US(Guam-USVI-etc)' 'Peru' 'Philippines' 'Poland' 'Portugal'
84 | 'Puerto-Rico' 'Scotland' 'South' 'Taiwan' 'Thailand' 'Trinadad&Tobago'
85 | 'United-States' 'Vietnam' 'Yugoslavia']
86 |
87 | Column Name: income
88 | Column Type: object
89 | Sample Values: ['<=50K' '>50K']
90 |
91 | ````````
92 |
93 | With all of the above in mind, could you please provide me with a set of analysis steps you would recommend I run on the data to better understand what drives income inequality? Please provide a numbered list where each number is a specific analytical step. For each step, include the hypothesis you would test, what variables you'd look at, and what you'd be hoping to find.
94 |
95 | Do not worry about visualizing the data, as I'd like to ensure the outputs are all things that you are able to interpret afterwards. """}
96 | ]
97 |
98 | def split_cot(cot):
99 | """
100 | Takes a numbered list generated by an LLM and splits it into an array.
101 | """
102 | lines = cot.split("\n")
103 | cot_steps = []
104 |
105 | step_text = ""
106 | for i in range(0, len(lines)):
107 | line = lines[i]
108 | if len(line.strip()) > 0:
109 | step_text += line + "\n"
110 | else:
111 | cot_steps.append(step_text.strip())
112 | step_text = ""
113 |
114 | return cot_steps
115 |
116 | load_dotenv()
117 |
118 | print("Setting up chat...")
119 |
120 | openai_api_key = os.getenv("OPENAI_API_KEY")
121 | o = OpenAIGPTWrapper(openai_api_key, 'gpt-4')
122 | c = ChatBot(o, messages[0]['content'])
123 |
124 | print("Getting CoT...")
125 |
126 | # Step 1, let's get a chain of thought (COT) approach to understanding the data set.
127 | response = c.chat(messages[1]['content'])
128 | save_output(response, "Chain of Thought Plan for Data Analysis")
129 | cot_steps = split_cot(response)
130 |
131 | # Step 2, go through each COT step and ask GPT-4 to generate code.
132 | step_num = 1
133 | for step in cot_steps:
134 |
135 | print(f"Generating code for step {step_num}.")
136 |
137 | prompt = f"""You wrote the following instructions for a step:
138 | {step}
139 |
140 | Please write the Python code for the step above. Assume the following:
141 | 1. Start your response with ```python
142 | 2. End your response with ```
143 | 3. Do not add any text outside the code. For anything that requires comment, simply add Python comments.
144 | 4. Assume the data was imported into a dataframe called `df`
145 | 5. I have imported Pandas as `pd`, Numpy as `np`, `scipy`, and `sklearn`. You can use those libraries and no others.
146 | """
147 |
148 | response = c.chat(prompt)
149 |
150 | save_output(step + "\n\n" + response, f"Code for Step #{step_num}")
151 |
152 | step_num += 1
153 |
154 | print("Done!")
--------------------------------------------------------------------------------
/demos-and-products/cot-analytics/requirements.txt:
--------------------------------------------------------------------------------
1 | phasellm
2 | scikit-learn
3 | pandas
4 | numpy
5 | scipy
6 | statsmodels
--------------------------------------------------------------------------------
/demos-and-products/eval_platform/env-template.txt:
--------------------------------------------------------------------------------
1 | # LLM APIs
2 | OPENAI_API_KEY=...your OpenAI API key...
--------------------------------------------------------------------------------
/demos-and-products/eval_platform/eval_platform/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wgryc/phasellm/974d026dc649e4a71da4c25bf8c934622e56cf5d/demos-and-products/eval_platform/eval_platform/__init__.py
--------------------------------------------------------------------------------
/demos-and-products/eval_platform/eval_platform/asgi.py:
--------------------------------------------------------------------------------
1 | """
2 | ASGI config for eval_platform project.
3 |
4 | It exposes the ASGI callable as a module-level variable named ``application``.
5 |
6 | For more information on this file, see
7 | https://docs.djangoproject.com/en/4.2/howto/deployment/asgi/
8 | """
9 |
10 | import os
11 |
12 | from django.core.asgi import get_asgi_application
13 |
14 | os.environ.setdefault("DJANGO_SETTINGS_MODULE", "eval_platform.settings")
15 |
16 | application = get_asgi_application()
17 |
--------------------------------------------------------------------------------
/demos-and-products/eval_platform/eval_platform/settings.py:
--------------------------------------------------------------------------------
1 | """
2 | Django settings for eval_platform project.
3 |
4 | Generated by 'django-admin startproject' using Django 4.2.
5 |
6 | For more information on this file, see
7 | https://docs.djangoproject.com/en/4.2/topics/settings/
8 |
9 | For the full list of settings and their values, see
10 | https://docs.djangoproject.com/en/4.2/ref/settings/
11 | """
12 |
13 | import os
14 | from dotenv import load_dotenv
15 |
16 | load_dotenv()
17 | OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
18 |
19 | from pathlib import Path
20 |
21 | # Build paths inside the project like this: BASE_DIR / 'subdir'.
22 | BASE_DIR = Path(__file__).resolve().parent.parent
23 |
24 |
25 | # Quick-start development settings - unsuitable for production
26 | # See https://docs.djangoproject.com/en/4.2/howto/deployment/checklist/
27 |
28 | # SECURITY WARNING: keep the secret key used in production secret!
29 | SECRET_KEY = "django-insecure-qhwo&d2q3@p2ov)-6e8il37squqh0ji&3qvqmtciforvkekr+^"
30 |
31 | # SECURITY WARNING: don't run with debug turned on in production!
32 | DEBUG = True
33 |
34 | ALLOWED_HOSTS = []
35 |
36 |
37 | # Application definition
38 |
39 | INSTALLED_APPS = [
40 | "django.contrib.admin",
41 | "django.contrib.auth",
42 | "django.contrib.contenttypes",
43 | "django.contrib.sessions",
44 | "django.contrib.messages",
45 | "django.contrib.staticfiles",
46 | "llmevaluator",
47 | ]
48 |
49 | MIDDLEWARE = [
50 | "django.middleware.security.SecurityMiddleware",
51 | "django.contrib.sessions.middleware.SessionMiddleware",
52 | "django.middleware.common.CommonMiddleware",
53 | "django.middleware.csrf.CsrfViewMiddleware",
54 | "django.contrib.auth.middleware.AuthenticationMiddleware",
55 | "django.contrib.messages.middleware.MessageMiddleware",
56 | "django.middleware.clickjacking.XFrameOptionsMiddleware",
57 | ]
58 |
59 | ROOT_URLCONF = "eval_platform.urls"
60 |
61 | TEMPLATES = [
62 | {
63 | "BACKEND": "django.template.backends.django.DjangoTemplates",
64 | "DIRS": ["templates"],
65 | "APP_DIRS": True,
66 | "OPTIONS": {
67 | "context_processors": [
68 | "django.template.context_processors.debug",
69 | "django.template.context_processors.request",
70 | "django.contrib.auth.context_processors.auth",
71 | "django.contrib.messages.context_processors.messages",
72 | ],
73 | },
74 | },
75 | ]
76 |
77 | WSGI_APPLICATION = "eval_platform.wsgi.application"
78 |
79 |
80 | # Database
81 | # https://docs.djangoproject.com/en/4.2/ref/settings/#databases
82 |
83 | DATABASES = {
84 | "default": {
85 | "ENGINE": "django.db.backends.sqlite3",
86 | "NAME": BASE_DIR / "db.sqlite3",
87 | }
88 | }
89 |
90 |
91 | # Password validation
92 | # https://docs.djangoproject.com/en/4.2/ref/settings/#auth-password-validators
93 |
94 | AUTH_PASSWORD_VALIDATORS = [
95 | {
96 | "NAME": "django.contrib.auth.password_validation.UserAttributeSimilarityValidator",
97 | },
98 | {
99 | "NAME": "django.contrib.auth.password_validation.MinimumLengthValidator",
100 | },
101 | {
102 | "NAME": "django.contrib.auth.password_validation.CommonPasswordValidator",
103 | },
104 | {
105 | "NAME": "django.contrib.auth.password_validation.NumericPasswordValidator",
106 | },
107 | ]
108 |
109 |
110 | # Internationalization
111 | # https://docs.djangoproject.com/en/4.2/topics/i18n/
112 |
113 | LANGUAGE_CODE = "en-us"
114 |
115 | TIME_ZONE = "UTC"
116 |
117 | USE_I18N = True
118 |
119 | USE_TZ = True
120 |
121 |
122 | # Static files (CSS, JavaScript, Images)
123 | # https://docs.djangoproject.com/en/4.2/howto/static-files/
124 |
125 | STATIC_URL = "static/"
126 |
127 | STATICFILES_DIRS = [
128 | BASE_DIR / "static",
129 | ]
130 |
131 | # Default primary key field type
132 | # https://docs.djangoproject.com/en/4.2/ref/settings/#default-auto-field
133 |
134 | DEFAULT_AUTO_FIELD = "django.db.models.BigAutoField"
135 |
--------------------------------------------------------------------------------
/demos-and-products/eval_platform/eval_platform/urls.py:
--------------------------------------------------------------------------------
1 | """
2 | URL configuration for eval_platform project.
3 |
4 | The `urlpatterns` list routes URLs to views. For more information please see:
5 | https://docs.djangoproject.com/en/4.2/topics/http/urls/
6 | Examples:
7 | Function views
8 | 1. Add an import: from my_app import views
9 | 2. Add a URL to urlpatterns: path('', views.home, name='home')
10 | Class-based views
11 | 1. Add an import: from other_app.views import Home
12 | 2. Add a URL to urlpatterns: path('', Home.as_view(), name='home')
13 | Including another URLconf
14 | 1. Import the include() function: from django.urls import include, path
15 | 2. Add a URL to urlpatterns: path('blog/', include('blog.urls'))
16 | """
17 | from django.contrib import admin
18 | from django.urls import path
19 | from django.views.generic import TemplateView
20 |
21 | import llmevaluator.views as lv
22 |
23 | urlpatterns = [
24 | path("admin/", admin.site.urls),
25 | path("", lv.review_jobs),
26 | path(
27 | "import",
28 | TemplateView.as_view(
29 | template_name="create.html",
30 | extra_context={"contenttitle": "Import Chat via JSON"},
31 | ),
32 | ),
33 | path(
34 | "about",
35 | TemplateView.as_view(
36 | template_name="aboutus.html",
37 | extra_context={"contenttitle": "About Us"},
38 | ),
39 | ),
40 | path("create_save_ma", lv.createMessageArray),
41 | path("create_save_ma_json", lv.createMessageArrayJson),
42 | path("groups", lv.list_groups),
43 | path("create_group_csv", lv.createGroupFromCSV),
44 | path("jobs", lv.list_jobs),
45 | path("create_job", lv.createJob),
46 | path("chats", lv.get_chats, name="list_chats"),
47 | path("view_chat/", lv.view_chat, name="view_chat"),
48 | path("view_chat", lv.view_chat_new),
49 | path("update_title_via_post", lv.update_title_via_post),
50 | path("overwrite_chat", lv.overwrite_chat),
51 | path("delete_chat/", lv.delete_chat),
52 | ]
53 |
--------------------------------------------------------------------------------
/demos-and-products/eval_platform/eval_platform/wsgi.py:
--------------------------------------------------------------------------------
1 | """
2 | WSGI config for eval_platform project.
3 |
4 | It exposes the WSGI callable as a module-level variable named ``application``.
5 |
6 | For more information on this file, see
7 | https://docs.djangoproject.com/en/4.2/howto/deployment/wsgi/
8 | """
9 |
10 | import os
11 |
12 | from django.core.wsgi import get_wsgi_application
13 |
14 | os.environ.setdefault("DJANGO_SETTINGS_MODULE", "eval_platform.settings")
15 |
16 | application = get_wsgi_application()
17 |
--------------------------------------------------------------------------------
/demos-and-products/eval_platform/llmevaluator/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wgryc/phasellm/974d026dc649e4a71da4c25bf8c934622e56cf5d/demos-and-products/eval_platform/llmevaluator/__init__.py
--------------------------------------------------------------------------------
/demos-and-products/eval_platform/llmevaluator/admin.py:
--------------------------------------------------------------------------------
1 | from django.contrib import admin
2 |
3 | from .models import ChatBotMessageArray, MessageCollection, BatchLLMJob
4 |
5 | admin.site.register(ChatBotMessageArray)
6 | admin.site.register(MessageCollection)
7 | admin.site.register(BatchLLMJob)
8 |
--------------------------------------------------------------------------------
/demos-and-products/eval_platform/llmevaluator/apps.py:
--------------------------------------------------------------------------------
1 | from django.apps import AppConfig
2 |
3 |
4 | class LlmevaluatorConfig(AppConfig):
5 | default_auto_field = "django.db.models.BigAutoField"
6 | name = "llmevaluator"
7 |
--------------------------------------------------------------------------------
/demos-and-products/eval_platform/llmevaluator/management/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wgryc/phasellm/974d026dc649e4a71da4c25bf8c934622e56cf5d/demos-and-products/eval_platform/llmevaluator/management/__init__.py
--------------------------------------------------------------------------------
/demos-and-products/eval_platform/llmevaluator/management/commands/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wgryc/phasellm/974d026dc649e4a71da4c25bf8c934622e56cf5d/demos-and-products/eval_platform/llmevaluator/management/commands/__init__.py
--------------------------------------------------------------------------------
/demos-and-products/eval_platform/llmevaluator/management/commands/runjobs.py:
--------------------------------------------------------------------------------
1 | from django.core.management.base import BaseCommand
2 |
3 | from llmevaluator.models import *
4 |
5 | from django.conf import settings
6 | from phasellm.llms import OpenAIGPTWrapper, ChatBot
7 |
8 |
9 | # Returns the new ChatBotMessageArray ID
10 | def run_llm_task_and_save(
11 | message_array,
12 | user_message,
13 | job_id,
14 | original_title="Untitled",
15 | model="gpt-4",
16 | temperature=0.7,
17 | print_response=True,
18 | new_system_prompt=None,
19 | resend_last_user_message=False,
20 | ):
21 | o = OpenAIGPTWrapper(settings.OPENAI_API_KEY, model=model, temperature=temperature)
22 | cb = ChatBot(o, "")
23 |
24 | # If we want to resend the last user message *and* provide a new user message, then we'll have to ignore one of those options
25 | assert not (resend_last_user_message == True and len(user_message) > 0)
26 |
27 | ma_copy = message_array.copy()
28 | if new_system_prompt is not None:
29 | if len(new_system_prompt.strip()) > 0:
30 | # If the first message is not a system prompt, then error out.
31 | assert ma_copy[0]["role"] == "system"
32 | ma_copy[0]["content"] = new_system_prompt
33 |
34 | cb.messages = ma_copy
35 |
36 | if resend_last_user_message:
37 | response = cb.resend()
38 | else:
39 | response = cb.chat(user_message)
40 |
41 | new_cbma = ChatBotMessageArray(
42 | message_array=cb.messages,
43 | source_batch_job_id=job_id,
44 | title=f"{original_title} w/ T={temperature}, model={model}",
45 | )
46 |
47 | new_cbma.llm_temperature = temperature
48 | new_cbma.llm_model = model
49 |
50 | new_cbma.save()
51 |
52 | if print_response:
53 | print(response)
54 |
55 | return new_cbma
56 |
57 |
58 | def run_job(job):
59 | print(f"Starting job: {job.title}")
60 |
61 | mc = MessageCollection.objects.get(id=job.message_collection_id)
62 | chat_ids_string = mc.chat_ids
63 | chat_ids = chat_ids_string.strip().split(",")
64 |
65 | results_ids = []
66 | results_to_append = []
67 |
68 | for _cid in chat_ids:
69 | print(f"Analyzing chat ID: {_cid}")
70 |
71 | cid = int(_cid)
72 | cbma = ChatBotMessageArray.objects.get(id=cid)
73 |
74 | # SETTING: run_n_times
75 | run_n_times = job.run_n_times
76 | for i in range(0, run_n_times):
77 | # SETTING: include_gpt_4
78 | if job.include_gpt_4:
79 | if job.temperature_range:
80 | for t in [0.25, 0.75, 1.25]:
81 | nc = run_llm_task_and_save(
82 | cbma.message_array.copy(),
83 | job.user_message,
84 | job.id,
85 | cbma.title,
86 | model="gpt-4",
87 | temperature=t,
88 | new_system_prompt=job.new_system_prompt,
89 | resend_last_user_message=job.resend_last_user_message,
90 | )
91 | results_ids.append(str(nc.id))
92 | results_to_append.append(nc)
93 | else:
94 | nc = run_llm_task_and_save(
95 | cbma.message_array.copy(),
96 | job.user_message,
97 | job.id,
98 | cbma.title,
99 | "gpt-4",
100 | new_system_prompt=job.new_system_prompt,
101 | resend_last_user_message=job.resend_last_user_message,
102 | )
103 | results_ids.append(str(nc.id))
104 | results_to_append.append(nc)
105 |
106 | # SETTING: include_gpt_35
107 | if job.include_gpt_35:
108 | if job.temperature_range:
109 | for t in [0.25, 0.75, 1.25]:
110 | nc = run_llm_task_and_save(
111 | cbma.message_array.copy(),
112 | job.user_message,
113 | job.id,
114 | cbma.title,
115 | model="gpt-3.5-turbo",
116 | temperature=t,
117 | new_system_prompt=job.new_system_prompt,
118 | resend_last_user_message=job.resend_last_user_message,
119 | )
120 | results_ids.append(str(nc.id))
121 | results_to_append.append(nc)
122 | else:
123 | nc = run_llm_task_and_save(
124 | cbma.message_array.copy(),
125 | job.user_message,
126 | job.id,
127 | cbma.title,
128 | "gpt-3.5-turbo",
129 | new_system_prompt=job.new_system_prompt,
130 | resend_last_user_message=job.resend_last_user_message,
131 | )
132 | results_ids.append(str(nc.id))
133 | results_to_append.append(nc)
134 |
135 | new_chats_str = ",".join(results_ids)
136 | results_mc = MessageCollection(
137 | title=f"Results from '{job.title}' job",
138 | chat_ids=new_chats_str,
139 | source_collection_id=mc.id,
140 | source_batch_job_id=job.id,
141 | )
142 | results_mc.save()
143 |
144 | for r in results_to_append:
145 | results_mc.chats.add(r)
146 | results_mc.save()
147 |
148 | job.status = "complete"
149 | job.results_array = results_mc
150 | job.save()
151 |
152 | print("Done!")
153 |
154 |
155 | class Command(BaseCommand):
156 | help = "Runs all scheduled batch jobs."
157 |
158 | def handle(self, *args, **options):
159 | jobs = BatchLLMJob.objects.filter(status="scheduled")
160 | for job in jobs:
161 | run_job(job)
162 |
--------------------------------------------------------------------------------
/demos-and-products/eval_platform/llmevaluator/migrations/0001_initial.py:
--------------------------------------------------------------------------------
1 | # Generated by Django 4.2 on 2023-09-24 16:39
2 |
3 | from django.db import migrations, models
4 |
5 |
6 | class Migration(migrations.Migration):
7 | initial = True
8 |
9 | dependencies = []
10 |
11 | operations = [
12 | migrations.CreateModel(
13 | name="ChatBotMessageArray",
14 | fields=[
15 | (
16 | "id",
17 | models.BigAutoField(
18 | auto_created=True,
19 | primary_key=True,
20 | serialize=False,
21 | verbose_name="ID",
22 | ),
23 | ),
24 | ("created_at", models.DateTimeField(auto_now_add=True)),
25 | ("updated_at", models.DateTimeField(auto_now=True)),
26 | ("message_array", models.JSONField(default=dict)),
27 | ("comments", models.TextField(blank=True, default="", null=True)),
28 | ],
29 | ),
30 | migrations.CreateModel(
31 | name="MessageCollection",
32 | fields=[
33 | (
34 | "id",
35 | models.BigAutoField(
36 | auto_created=True,
37 | primary_key=True,
38 | serialize=False,
39 | verbose_name="ID",
40 | ),
41 | ),
42 | ("created_at", models.DateTimeField(auto_now_add=True)),
43 | ("updated_at", models.DateTimeField(auto_now=True)),
44 | ("title", models.TextField(blank=True, default="", null=True)),
45 | ("chat_ids", models.TextField(blank=True, default="", null=True)),
46 | ],
47 | ),
48 | ]
49 |
--------------------------------------------------------------------------------
/demos-and-products/eval_platform/llmevaluator/migrations/0002_batchllmjob.py:
--------------------------------------------------------------------------------
1 | # Generated by Django 4.2 on 2023-09-26 18:50
2 |
3 | from django.db import migrations, models
4 |
5 |
6 | class Migration(migrations.Migration):
7 | dependencies = [
8 | ("llmevaluator", "0001_initial"),
9 | ]
10 |
11 | operations = [
12 | migrations.CreateModel(
13 | name="BatchLLMJob",
14 | fields=[
15 | (
16 | "id",
17 | models.BigAutoField(
18 | auto_created=True,
19 | primary_key=True,
20 | serialize=False,
21 | verbose_name="ID",
22 | ),
23 | ),
24 | ("created_at", models.DateTimeField(auto_now_add=True)),
25 | ("updated_at", models.DateTimeField(auto_now=True)),
26 | ("title", models.TextField(blank=True, default="", null=True)),
27 | ("message_collection_id", models.IntegerField()),
28 | ("user_message", models.TextField(blank=True, default="", null=True)),
29 | (
30 | "status",
31 | models.TextField(blank=True, default="scheduled", null=True),
32 | ),
33 | ],
34 | ),
35 | ]
36 |
--------------------------------------------------------------------------------
/demos-and-products/eval_platform/llmevaluator/migrations/0003_chatbotmessagearray_source_batch_job_id_and_more.py:
--------------------------------------------------------------------------------
1 | # Generated by Django 4.2 on 2023-09-28 14:54
2 |
3 | from django.db import migrations, models
4 |
5 |
6 | class Migration(migrations.Migration):
7 | dependencies = [
8 | ("llmevaluator", "0002_batchllmjob"),
9 | ]
10 |
11 | operations = [
12 | migrations.AddField(
13 | model_name="chatbotmessagearray",
14 | name="source_batch_job_id",
15 | field=models.IntegerField(null=True),
16 | ),
17 | migrations.AddField(
18 | model_name="messagecollection",
19 | name="source_batch_job_id",
20 | field=models.IntegerField(null=True),
21 | ),
22 | migrations.AddField(
23 | model_name="messagecollection",
24 | name="source_collection_id",
25 | field=models.IntegerField(null=True),
26 | ),
27 | ]
28 |
--------------------------------------------------------------------------------
/demos-and-products/eval_platform/llmevaluator/migrations/0004_alter_chatbotmessagearray_message_array.py:
--------------------------------------------------------------------------------
1 | # Generated by Django 4.2 on 2023-09-28 19:42
2 |
3 | import django.core.serializers.json
4 | from django.db import migrations, models
5 |
6 |
7 | class Migration(migrations.Migration):
8 | dependencies = [
9 | ("llmevaluator", "0003_chatbotmessagearray_source_batch_job_id_and_more"),
10 | ]
11 |
12 | operations = [
13 | migrations.AlterField(
14 | model_name="chatbotmessagearray",
15 | name="message_array",
16 | field=models.JSONField(
17 | default=dict, encoder=django.core.serializers.json.DjangoJSONEncoder
18 | ),
19 | ),
20 | ]
21 |
--------------------------------------------------------------------------------
/demos-and-products/eval_platform/llmevaluator/migrations/0005_alter_chatbotmessagearray_source_batch_job_id_and_more.py:
--------------------------------------------------------------------------------
1 | # Generated by Django 4.2 on 2023-09-29 15:22
2 |
3 | from django.db import migrations, models
4 |
5 |
6 | class Migration(migrations.Migration):
7 | dependencies = [
8 | ("llmevaluator", "0004_alter_chatbotmessagearray_message_array"),
9 | ]
10 |
11 | operations = [
12 | migrations.AlterField(
13 | model_name="chatbotmessagearray",
14 | name="source_batch_job_id",
15 | field=models.IntegerField(blank=True, null=True),
16 | ),
17 | migrations.AlterField(
18 | model_name="messagecollection",
19 | name="source_batch_job_id",
20 | field=models.IntegerField(blank=True, null=True),
21 | ),
22 | migrations.AlterField(
23 | model_name="messagecollection",
24 | name="source_collection_id",
25 | field=models.IntegerField(blank=True, null=True),
26 | ),
27 | ]
28 |
--------------------------------------------------------------------------------
/demos-and-products/eval_platform/llmevaluator/migrations/0006_batchllmjob_tags_chatbotmessagearray_tags_and_more.py:
--------------------------------------------------------------------------------
1 | # Generated by Django 4.2 on 2023-09-29 18:52
2 |
3 | from django.db import migrations, models
4 |
5 |
6 | class Migration(migrations.Migration):
7 | dependencies = [
8 | ("llmevaluator", "0005_alter_chatbotmessagearray_source_batch_job_id_and_more"),
9 | ]
10 |
11 | operations = [
12 | migrations.AddField(
13 | model_name="batchllmjob",
14 | name="tags",
15 | field=models.TextField(blank=True, default="", null=True),
16 | ),
17 | migrations.AddField(
18 | model_name="chatbotmessagearray",
19 | name="tags",
20 | field=models.TextField(blank=True, default="", null=True),
21 | ),
22 | migrations.AddField(
23 | model_name="messagecollection",
24 | name="tags",
25 | field=models.TextField(blank=True, default="", null=True),
26 | ),
27 | ]
28 |
--------------------------------------------------------------------------------
/demos-and-products/eval_platform/llmevaluator/migrations/0007_chatbotmessagearray_title.py:
--------------------------------------------------------------------------------
1 | # Generated by Django 4.2 on 2023-09-30 16:34
2 |
3 | from django.db import migrations, models
4 |
5 |
6 | class Migration(migrations.Migration):
7 | dependencies = [
8 | ("llmevaluator", "0006_batchllmjob_tags_chatbotmessagearray_tags_and_more"),
9 | ]
10 |
11 | operations = [
12 | migrations.AddField(
13 | model_name="chatbotmessagearray",
14 | name="title",
15 | field=models.TextField(blank=True, default="Untitled"),
16 | ),
17 | ]
18 |
--------------------------------------------------------------------------------
/demos-and-products/eval_platform/llmevaluator/migrations/0008_batchllmjob_include_gpt_35_batchllmjob_include_gpt_4_and_more.py:
--------------------------------------------------------------------------------
1 | # Generated by Django 4.2 on 2023-10-09 13:37
2 |
3 | from django.db import migrations, models
4 |
5 |
6 | class Migration(migrations.Migration):
7 | dependencies = [
8 | ("llmevaluator", "0007_chatbotmessagearray_title"),
9 | ]
10 |
11 | operations = [
12 | migrations.AddField(
13 | model_name="batchllmjob",
14 | name="include_gpt_35",
15 | field=models.BooleanField(default=False),
16 | ),
17 | migrations.AddField(
18 | model_name="batchllmjob",
19 | name="include_gpt_4",
20 | field=models.BooleanField(default=True),
21 | ),
22 | migrations.AddField(
23 | model_name="batchllmjob",
24 | name="run_n_times",
25 | field=models.IntegerField(default=1),
26 | ),
27 | migrations.AddField(
28 | model_name="batchllmjob",
29 | name="temperature_range",
30 | field=models.BooleanField(default=False),
31 | ),
32 | ]
33 |
--------------------------------------------------------------------------------
/demos-and-products/eval_platform/llmevaluator/migrations/0009_batchllmjob_new_system_prompt_and_more.py:
--------------------------------------------------------------------------------
1 | # Generated by Django 4.2 on 2023-10-10 16:23
2 |
3 | from django.db import migrations, models
4 |
5 |
6 | class Migration(migrations.Migration):
7 | dependencies = [
8 | (
9 | "llmevaluator",
10 | "0008_batchllmjob_include_gpt_35_batchllmjob_include_gpt_4_and_more",
11 | ),
12 | ]
13 |
14 | operations = [
15 | migrations.AddField(
16 | model_name="batchllmjob",
17 | name="new_system_prompt",
18 | field=models.TextField(blank=True, default="", null=True),
19 | ),
20 | migrations.AddField(
21 | model_name="chatbotmessagearray",
22 | name="llm_model",
23 | field=models.TextField(blank=True, default="None", null=True),
24 | ),
25 | migrations.AddField(
26 | model_name="chatbotmessagearray",
27 | name="llm_temperature",
28 | field=models.FloatField(blank=True, null=True),
29 | ),
30 | ]
31 |
--------------------------------------------------------------------------------
/demos-and-products/eval_platform/llmevaluator/migrations/0010_batchllmjob_resend_last_user_message.py:
--------------------------------------------------------------------------------
1 | # Generated by Django 4.2 on 2023-10-11 06:30
2 |
3 | from django.db import migrations, models
4 |
5 |
6 | class Migration(migrations.Migration):
7 | dependencies = [
8 | ("llmevaluator", "0009_batchllmjob_new_system_prompt_and_more"),
9 | ]
10 |
11 | operations = [
12 | migrations.AddField(
13 | model_name="batchllmjob",
14 | name="resend_last_user_message",
15 | field=models.BooleanField(default=False),
16 | ),
17 | ]
18 |
--------------------------------------------------------------------------------
/demos-and-products/eval_platform/llmevaluator/migrations/0011_batchllmjob_description.py:
--------------------------------------------------------------------------------
1 | # Generated by Django 4.2 on 2023-10-11 10:44
2 |
3 | from django.db import migrations, models
4 |
5 |
6 | class Migration(migrations.Migration):
7 | dependencies = [
8 | ("llmevaluator", "0010_batchllmjob_resend_last_user_message"),
9 | ]
10 |
11 | operations = [
12 | migrations.AddField(
13 | model_name="batchllmjob",
14 | name="description",
15 | field=models.TextField(blank=True, null=True),
16 | ),
17 | ]
18 |
--------------------------------------------------------------------------------
/demos-and-products/eval_platform/llmevaluator/migrations/0012_batchllmjob_message_collection_ref.py:
--------------------------------------------------------------------------------
1 | # Generated by Django 4.2 on 2023-10-11 10:52
2 |
3 | from django.db import migrations, models
4 | import django.db.models.deletion
5 |
6 |
7 | class Migration(migrations.Migration):
8 | dependencies = [
9 | ("llmevaluator", "0011_batchllmjob_description"),
10 | ]
11 |
12 | operations = [
13 | migrations.AddField(
14 | model_name="batchllmjob",
15 | name="message_collection_ref",
16 | field=models.ForeignKey(
17 | null=True,
18 | on_delete=django.db.models.deletion.SET_NULL,
19 | to="llmevaluator.messagecollection",
20 | ),
21 | ),
22 | ]
23 |
--------------------------------------------------------------------------------
/demos-and-products/eval_platform/llmevaluator/migrations/0013_batchllmjob_results_array_and_more.py:
--------------------------------------------------------------------------------
1 | # Generated by Django 4.2 on 2023-10-11 11:05
2 |
3 | from django.db import migrations, models
4 | import django.db.models.deletion
5 |
6 |
7 | class Migration(migrations.Migration):
8 | dependencies = [
9 | ("llmevaluator", "0012_batchllmjob_message_collection_ref"),
10 | ]
11 |
12 | operations = [
13 | migrations.AddField(
14 | model_name="batchllmjob",
15 | name="results_array",
16 | field=models.ForeignKey(
17 | null=True,
18 | on_delete=django.db.models.deletion.SET_NULL,
19 | related_name="results_collection",
20 | to="llmevaluator.messagecollection",
21 | ),
22 | ),
23 | migrations.AlterField(
24 | model_name="batchllmjob",
25 | name="message_collection_ref",
26 | field=models.ForeignKey(
27 | null=True,
28 | on_delete=django.db.models.deletion.SET_NULL,
29 | related_name="source_messages_collection",
30 | to="llmevaluator.messagecollection",
31 | ),
32 | ),
33 | ]
34 |
--------------------------------------------------------------------------------
/demos-and-products/eval_platform/llmevaluator/migrations/0014_messagecollection_chats.py:
--------------------------------------------------------------------------------
1 | # Generated by Django 4.2 on 2023-10-11 13:03
2 |
3 | from django.db import migrations, models
4 |
5 |
6 | class Migration(migrations.Migration):
7 | dependencies = [
8 | ("llmevaluator", "0013_batchllmjob_results_array_and_more"),
9 | ]
10 |
11 | operations = [
12 | migrations.AddField(
13 | model_name="messagecollection",
14 | name="chats",
15 | field=models.ManyToManyField(
16 | blank=True, null=True, to="llmevaluator.chatbotmessagearray"
17 | ),
18 | ),
19 | ]
20 |
--------------------------------------------------------------------------------
/demos-and-products/eval_platform/llmevaluator/migrations/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wgryc/phasellm/974d026dc649e4a71da4c25bf8c934622e56cf5d/demos-and-products/eval_platform/llmevaluator/migrations/__init__.py
--------------------------------------------------------------------------------
/demos-and-products/eval_platform/llmevaluator/models.py:
--------------------------------------------------------------------------------
1 | from django.db import models
2 | from django.core.serializers.json import DjangoJSONEncoder
3 |
4 |
5 | def object_has_tag(model_object, tag_string):
6 | tags = model_object.tags.split(",")
7 | for tag in tags:
8 | if tag.strip() == tag_string:
9 | return True
10 | return False
11 |
12 |
13 | class ChatBotMessageArray(models.Model):
14 | created_at = models.DateTimeField(auto_now_add=True)
15 | updated_at = models.DateTimeField(auto_now=True)
16 | message_array = models.JSONField(default=dict, encoder=DjangoJSONEncoder)
17 | comments = models.TextField(default="", null=True, blank=True)
18 | source_batch_job_id = models.IntegerField(null=True, blank=True)
19 | tags = models.TextField(default="", null=True, blank=True)
20 | title = models.TextField(default="Untitled", blank=True)
21 |
22 | # LLM settings for review, later
23 | llm_model = models.TextField(default="None", blank=True, null=True)
24 | llm_temperature = models.FloatField(null=True, blank=True)
25 |
26 | def __str__(self):
27 | return f"ChatBotMessage (ID {self.id}), {self.title}"
28 |
29 |
30 | class MessageCollection(models.Model):
31 | created_at = models.DateTimeField(auto_now_add=True)
32 | updated_at = models.DateTimeField(auto_now=True)
33 | title = models.TextField(default="", null=True, blank=True)
34 |
35 | # Note: we should use an ArrayField or JSONField or a ManyToManyField if we scale this up.
36 | # However, to keep things very simple and supportable in SQLite, we'll assume the chat_ids are in a comma-separated string for now. We'll do some basic validation when saving via the front-end.
37 | chat_ids = models.TextField(default="", null=True, blank=True)
38 | chats = models.ManyToManyField(ChatBotMessageArray, blank=True)
39 |
40 | # We can save source collections in cases where we have batch jobs run.
41 | source_collection_id = models.IntegerField(null=True, blank=True)
42 | source_batch_job_id = models.IntegerField(null=True, blank=True)
43 | tags = models.TextField(default="", null=True, blank=True)
44 |
45 | def __str__(self):
46 | return f"MessageCollection (ID {self.id}), {self.title}"
47 |
48 |
49 | class BatchLLMJob(models.Model):
50 | created_at = models.DateTimeField(auto_now_add=True)
51 | updated_at = models.DateTimeField(auto_now=True)
52 | title = models.TextField(default="", null=True, blank=True)
53 | description = models.TextField(null=True, blank=True)
54 | message_collection_id = models.IntegerField()
55 | message_collection_ref = models.ForeignKey(
56 | MessageCollection,
57 | on_delete=models.SET_NULL,
58 | null=True,
59 | related_name="source_messages_collection",
60 | )
61 | results_array = models.ForeignKey(
62 | MessageCollection,
63 | on_delete=models.SET_NULL,
64 | null=True,
65 | related_name="results_collection",
66 | )
67 |
68 | # scheduled, complete
69 | status = models.TextField(default="scheduled", null=True, blank=True)
70 | tags = models.TextField(default="", null=True, blank=True)
71 |
72 | # settings
73 | # By default we only run the LLM on GPT-4 with a user message. The
74 | # settings below let you do other things.
75 |
76 | # Messages
77 | user_message = models.TextField(default="", null=True, blank=True)
78 | new_system_prompt = models.TextField(default="", null=True, blank=True)
79 | resend_last_user_message = models.BooleanField(default=False)
80 |
81 | # Repeat the run 'n' times
82 | run_n_times = models.IntegerField(default=1)
83 |
84 | # Which LLM models to run
85 | include_gpt_4 = models.BooleanField(default=True)
86 | include_gpt_35 = models.BooleanField(default=False)
87 |
88 | # Run temperature tests; True = run across 0.25 to 1.75 with 0.5 increments
89 | temperature_range = models.BooleanField(default=False)
90 |
91 | def __str__(self):
92 | return f"Batch LLM Job (ID {self.id}), {self.title}"
93 |
--------------------------------------------------------------------------------
/demos-and-products/eval_platform/llmevaluator/tests.py:
--------------------------------------------------------------------------------
1 | from django.test import TestCase
2 |
3 | # Create your tests here.
4 |
--------------------------------------------------------------------------------
/demos-and-products/eval_platform/manage.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | """Django's command-line utility for administrative tasks."""
3 | import os
4 | import sys
5 |
6 |
7 | def main():
8 | """Run administrative tasks."""
9 | os.environ.setdefault("DJANGO_SETTINGS_MODULE", "eval_platform.settings")
10 | try:
11 | from django.core.management import execute_from_command_line
12 | except ImportError as exc:
13 | raise ImportError(
14 | "Couldn't import Django. Are you sure it's installed and "
15 | "available on your PYTHONPATH environment variable? Did you "
16 | "forget to activate a virtual environment?"
17 | ) from exc
18 | execute_from_command_line(sys.argv)
19 |
20 |
21 | if __name__ == "__main__":
22 | main()
23 |
--------------------------------------------------------------------------------
/demos-and-products/eval_platform/readme.md:
--------------------------------------------------------------------------------
1 | # PhaseLLM Evaluation
2 |
3 | *PhaseLLM Evaluation* helps you run batch jobs across LLMs. Think of it as a playground where you can easily run multiple LLM calls across different models.
4 |
5 | Example use cases:
6 | - Run the same set of messages `n` times to see how responses differ.
7 | - Run messages across different models (e.g., GPT-4 and GPT-3.5) to see performance differences.
8 | - Replace or update system prompts across multiple chats to see if they have an impact on responses.
9 |
10 | [5-minute demo below:](https://www.youtube.com/watch?v=Ycu2eKkCO7Y)
11 | [](https://www.youtube.com/watch?v=Ycu2eKkCO7Y)
12 |
13 | ## Installation and Running
14 |
15 | Please follow the step below to run *PhaseLLM Evaluation*.
16 |
17 | Run the code below in the `eval_platform` directory.
18 |
19 | ```bash
20 | pip3 install -r requirements.txt
21 | python3 manage.py migrate
22 | ```
23 |
24 | The code above will install `phasellm` and `Django`, and set up the relevant SQLite database.
25 |
26 | Update the `env_template.txt` file with your OpenAI API key and save it to `.env`.
27 |
28 | Finally, to run the server, type the following:
29 | ```bash
30 | python3 manage.py runserver
31 | ```
32 |
33 | You'll then be able to navigate to `http://localhost:8000` and run your evaluations.
34 |
35 | ## Running Batch Jobs
36 |
37 | Once you've created the proper chats, chat groups, and jobs, open a second terminal window and type the following in your `eval_platform` directory:
38 |
39 | ```bash
40 | python3 manage.py runjobs
41 | ```
42 |
43 | This is a custom Django job that will run your jobs. The outputs will be printed in the terminal, btu will also be saved in the front-end.
44 |
45 | ## Hosting
46 |
47 | Want us to host the *Evaluation* demo product for you? Please reach out to us at w [at] phaseai [dot] com
48 |
49 | ## Feedback?
50 |
51 | Any feedback is welcome. Please reach out to w [at] phaseai [dot] com and we'll get back to you as soon as we can!
52 |
--------------------------------------------------------------------------------
/demos-and-products/eval_platform/requirements.txt:
--------------------------------------------------------------------------------
1 | Django==4.2
2 | phasellm>=0.0.17,<0.1.0
3 |
--------------------------------------------------------------------------------
/demos-and-products/eval_platform/screenshot.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wgryc/phasellm/974d026dc649e4a71da4c25bf8c934622e56cf5d/demos-and-products/eval_platform/screenshot.png
--------------------------------------------------------------------------------
/demos-and-products/eval_platform/static/main.css:
--------------------------------------------------------------------------------
1 | /**
2 | * DEFAULT AND UNIVERSAL VALUES
3 | */
4 |
5 | * {
6 | margin: 0;
7 | padding: 0;
8 | font-family: 'Open Sans', sans-serif;
9 | font-weight: 200;
10 | font-size: 15px;
11 | box-sizing: border-box;
12 | }
13 |
14 | :root {
15 | --standard-margin-spacing-text: 20px;
16 | --internal-standard-padding: 5px;
17 | --standard-border-radius: 5px;
18 | --lightgray-borders-backgrounds: rgb(235, 235, 235);
19 | }
20 |
21 | b {
22 | font-weight: 900;
23 | }
24 |
25 | ul {
26 | margin-left: var(--standard-margin-spacing-text);
27 | }
28 |
29 | a {
30 | text-decoration: none;
31 | color: #4682B4;
32 | font-weight: 500;
33 | }
34 |
35 | /**
36 | * TWO COLUMN CONTAINER TEST
37 | */
38 |
39 | .two-col-content-container {
40 | display: grid;
41 | grid-template-columns: 50% 1fr;
42 | column-gap: calc(3*var(--internal-standard-padding));
43 | height: 100%;
44 | overflow: hidden;
45 | }
46 |
47 | .two-col-content-left {
48 | background-color: white;
49 | overflow: auto;
50 | padding: calc(2*var(--internal-standard-padding));
51 | }
52 |
53 | .two-col-content-right {
54 | background-color: white;
55 | overflow: auto;
56 | padding: calc(2*var(--internal-standard-padding));
57 | }
58 |
59 | /**
60 | * TWO ROW CONTAINER TEST
61 | */
62 |
63 | .two-row-container {
64 | display: grid;
65 | grid-template-rows: auto auto;
66 | row-gap: var(--internal-standard-padding);
67 | height: 100%;
68 | }
69 |
70 | .two-row-top-row {
71 | background-color: white;
72 | padding-bottom: calc(3*var(--internal-standard-padding));
73 | border-bottom: 1px solid var(--lightgray-borders-backgrounds);
74 | }
75 |
76 | .two-row-bottom-row {
77 | background-color: white;
78 | }
79 |
80 | /**
81 | * EVERYTHING ELSE
82 | */
83 |
84 | .two-col-container {
85 | display: grid;
86 | grid-template-columns: 200px 1fr;
87 | }
88 |
89 | #left-menu {
90 | height: 100vh;
91 | background-color: white;
92 | overflow: hidden;
93 | padding: 15px;
94 | border-right: 1px solid lightgray;
95 | }
96 |
97 | #navlogo {
98 | font-family: 'Playfair Display', serif;
99 | font-size: 25px;
100 | font-weight: 300;
101 | letter-spacing: 1px;
102 | display: block;
103 | }
104 |
105 | #navlogo_sub {
106 | font-family: 'Playfair Display', serif;
107 | font-size: 15px;
108 | font-weight: 600;
109 | letter-spacing: 2px;
110 | display: block;
111 | color: gray;
112 | }
113 |
114 | .navlink {
115 | display: block;
116 | margin-top: 10px;
117 | cursor: pointer;
118 | text-decoration: none;
119 | color: black;
120 | font-weight: 100;
121 | }
122 |
123 | .navlink:first-of-type {
124 | margin-top: 25px;
125 | }
126 |
127 | .navlink .navicon {
128 | margin-right: 10px;
129 | }
130 |
131 | #main-content {
132 | height: 100vh;
133 | background-color: white;
134 | overflow: auto;
135 | padding: 15px;
136 | }
137 |
138 | .content-title {
139 | font-weight: 200;
140 | font-size: 25px;
141 | padding-bottom: var(--standard-margin-spacing-text);
142 | }
143 |
144 | input,
145 | textarea,
146 | .formfield {
147 | margin: 0 0 var(--standard-margin-spacing-text) 0;
148 | padding: var(--internal-standard-padding);
149 | border-radius: var(--standard-border-radius);
150 | border: 1px solid lightgray;
151 | }
152 |
153 | .formfield-hover {
154 | cursor: pointer;
155 | }
156 |
157 | .formfield-hover:hover {
158 | background-color: var(--lightgray-borders-backgrounds);
159 | }
160 |
161 | .error_message {
162 | color: crimson;
163 | }
164 |
165 | .job_info_container {
166 | padding: calc(2*var(--internal-standard-padding));
167 | border-radius: var(--standard-border-radius);
168 | background-color: var(--lightgray-borders-backgrounds);
169 | margin: var(--internal-standard-padding) 0 var(--internal-standard-padding) 0;
170 | display: grid;
171 | grid-template-columns: 33% 33% 1fr;
172 | }
173 |
174 | .job_info_container div {
175 | margin-right: var(--internal-standard-padding);
176 | }
177 |
178 | .jobtitle {
179 | font-weight: 900;
180 | margin: calc(2*var(--internal-standard-padding)) 0 calc(2*var(--internal-standard-padding)) 0;
181 | }
182 |
183 | .general-list-container {
184 | padding: calc(2*var(--internal-standard-padding));
185 | border-radius: var(--standard-border-radius);
186 | background-color: var(--lightgray-borders-backgrounds);
187 | margin: var(--internal-standard-padding) 0 var(--internal-standard-padding) 0;
188 | }
189 |
190 | .tag-label-green {
191 | display: inline-block;
192 | padding: var(--internal-standard-padding);
193 | border-radius: var(--standard-border-radius);
194 | background-color: #2E8B57;
195 | color: white;
196 | font-weight: 600;
197 | font-size: 12px;
198 | }
199 |
200 | .tag-label-blue {
201 | display: inline-block;
202 | padding: var(--internal-standard-padding);
203 | border-radius: var(--standard-border-radius);
204 | background-color: #4682B4;
205 | color: white;
206 | font-weight: 600;
207 | font-size: 12px;
208 | }
209 |
210 | .delete-icon {
211 | margin: 0 var(--internal-standard-padding) 0 var(--internal-standard-padding);
212 | cursor: pointer;
213 | }
--------------------------------------------------------------------------------
/demos-and-products/eval_platform/templates/aboutus.html:
--------------------------------------------------------------------------------
1 | {% extends 'base-navigation.html' %}
2 |
3 | {% block bodycontent %}
4 |
5 |
The PhaseLLM Evaluation project is built by Phase AI. You can learn about the PhaseLLM package by visiting phasellm.com. Learn more about Phase AI at phaseai.com.
8 |
9 |
10 |
11 |
If you have questions about this LLM evaluation project, you can also email w [at] phaseai [dot] com.
Advanced Options
14 | Resend Last User Message
15 | Run GPT-4
16 | Run GPT-3.5
17 | Run across temperature = 0.25, 0.75, and 1.25
18 | Number of times to run:
19 |
20 |
21 |
23 |
24 |
25 |
Queue Job
26 |
27 |
76 |
77 | {% endblock %}
--------------------------------------------------------------------------------
/demos-and-products/eval_platform/templates/batch_review.html:
--------------------------------------------------------------------------------
1 | {% extends 'base-navigation.html' %}
2 |
3 | {% block bodycontent %}
4 |
5 | {% if jobs %}
6 |
7 | {% for job in jobs %}
8 |
9 |