├── .gitignore
├── CHANGELOG.md
├── LICENSE
├── README.md
├── demos-and-products
    ├── README.md
    ├── arxiv-assistant
    │   └── arxiv_assistant.py
    ├── basic-chatbot
    │   ├── demo.py
    │   └── templates
    │   │   └── index.html
    ├── chaining-workshop
    │   ├── README.md
    │   ├── apps.py
    │   ├── demo.py
    │   ├── templates
    │   │   ├── app.html
    │   │   └── applist.html
    │   └── tests.py
    ├── cot-analytics-frontend
    │   ├── README.md
    │   ├── cot-scr-1.png
    │   ├── cot-scr-2.png
    │   ├── frontend.py
    │   ├── incomes.csv
    │   ├── researchllm.py
    │   ├── static
    │   │   ├── data.js
    │   │   ├── interface01.css
    │   │   ├── output.json
    │   │   └── results.js
    │   └── templates
    │   │   └── interface01.html
    ├── cot-analytics
    │   ├── README.md
    │   ├── cot.py
    │   ├── requirements.txt
    │   └── sample_output.md
    ├── eval_platform
    │   ├── env-template.txt
    │   ├── eval_platform
    │   │   ├── __init__.py
    │   │   ├── asgi.py
    │   │   ├── settings.py
    │   │   ├── urls.py
    │   │   └── wsgi.py
    │   ├── llmevaluator
    │   │   ├── __init__.py
    │   │   ├── admin.py
    │   │   ├── apps.py
    │   │   ├── management
    │   │   │   ├── __init__.py
    │   │   │   └── commands
    │   │   │   │   ├── __init__.py
    │   │   │   │   └── runjobs.py
    │   │   ├── migrations
    │   │   │   ├── 0001_initial.py
    │   │   │   ├── 0002_batchllmjob.py
    │   │   │   ├── 0003_chatbotmessagearray_source_batch_job_id_and_more.py
    │   │   │   ├── 0004_alter_chatbotmessagearray_message_array.py
    │   │   │   ├── 0005_alter_chatbotmessagearray_source_batch_job_id_and_more.py
    │   │   │   ├── 0006_batchllmjob_tags_chatbotmessagearray_tags_and_more.py
    │   │   │   ├── 0007_chatbotmessagearray_title.py
    │   │   │   ├── 0008_batchllmjob_include_gpt_35_batchllmjob_include_gpt_4_and_more.py
    │   │   │   ├── 0009_batchllmjob_new_system_prompt_and_more.py
    │   │   │   ├── 0010_batchllmjob_resend_last_user_message.py
    │   │   │   ├── 0011_batchllmjob_description.py
    │   │   │   ├── 0012_batchllmjob_message_collection_ref.py
    │   │   │   ├── 0013_batchllmjob_results_array_and_more.py
    │   │   │   ├── 0014_messagecollection_chats.py
    │   │   │   └── __init__.py
    │   │   ├── models.py
    │   │   ├── tests.py
    │   │   └── views.py
    │   ├── manage.py
    │   ├── readme.md
    │   ├── requirements.txt
    │   ├── screenshot.png
    │   ├── static
    │   │   └── main.css
    │   └── templates
    │   │   ├── aboutus.html
    │   │   ├── base-navigation-two-cols.html
    │   │   ├── base-navigation-two-rows.html
    │   │   ├── base-navigation.html
    │   │   ├── base.html
    │   │   ├── batch.html
    │   │   ├── batch_review.html
    │   │   ├── chats.html
    │   │   ├── create-group.html
    │   │   ├── create.html
    │   │   └── view-chat.html
    ├── newsbot
    │   ├── README.md
    │   ├── news_articles.json
    │   ├── newsbot.py
    │   ├── newsbot_create.py
    │   ├── newsbot_evaluate.py
    │   └── notes.md
    ├── researchllm
    │   ├── README.md
    │   ├── frontend.py
    │   ├── incomes.csv
    │   ├── requirements.txt
    │   ├── researchllm.py
    │   ├── screenshot.png
    │   └── templates
    │   │   └── index.html
    └── web-search-chatbot
    │   ├── demo.py
    │   └── templates
    │       └── index.html
├── docs
    ├── Makefile
    ├── README.md
    ├── make.bat
    └── source
    │   ├── conf.py
    │   └── index.md
├── phasellm
    ├── __init__.py
    ├── agents.py
    ├── configurations.py
    ├── configurations_utils.py
    ├── eval.py
    ├── exceptions.py
    ├── html.py
    ├── llms.py
    ├── llms_utils.py
    ├── logging.py
    └── types.py
├── project_metadata.py
├── readthedocs.yaml
├── release_checklist.md
├── requirements-dev.txt
├── requirements.txt
├── setup.py
├── tests-non-deterministic
    ├── README.md
    ├── __init__.py
    └── llms
    │   ├── __init__.py
    │   └── test_llms.py
└── tests
    ├── README.MD
    ├── __init__.py
    ├── e2e
        ├── __init__.py
        ├── agents
        │   ├── __init__.py
        │   └── test_e2e_agents.py
        ├── llms
        │   ├── __init__.py
        │   ├── test_e2e_llms.py
        │   └── utils.py
        └── sse
        │   ├── __init__.py
        │   └── test_e2e_sse.py
    ├── release_checklist_code.py
    ├── unit
        ├── __init__.py
        ├── agents
        │   ├── __init__.py
        │   └── test_agents.py
        └── llms
        │   ├── __init__.py
        │   └── test_llms.py
    └── utils.py


/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # MacOS
 10 | .DS_Store
 11 | 
 12 | # Distribution / packaging
 13 | .Python
 14 | build/
 15 | develop-eggs/
 16 | dist/
 17 | downloads/
 18 | eggs/
 19 | .eggs/
 20 | lib/
 21 | lib64/
 22 | parts/
 23 | sdist/
 24 | var/
 25 | wheels/
 26 | pip-wheel-metadata/
 27 | share/python-wheels/
 28 | *.egg-info/
 29 | .installed.cfg
 30 | *.egg
 31 | MANIFEST
 32 | 
 33 | # PyInstaller
 34 | #  Usually these files are written by a python script from a template
 35 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 36 | *.manifest
 37 | *.spec
 38 | 
 39 | # Installer logs
 40 | pip-log.txt
 41 | pip-delete-this-directory.txt
 42 | 
 43 | # Unit test / coverage reports
 44 | htmlcov/
 45 | .tox/
 46 | .nox/
 47 | .coverage
 48 | .coverage.*
 49 | .cache
 50 | nosetests.xml
 51 | coverage.xml
 52 | *.cover
 53 | *.py,cover
 54 | .hypothesis/
 55 | .pytest_cache/
 56 | 
 57 | # Translations
 58 | *.mo
 59 | *.pot
 60 | 
 61 | # Django stuff:
 62 | *.log
 63 | local_settings.py
 64 | db.sqlite3
 65 | db.sqlite3-journal
 66 | 
 67 | # Flask stuff:
 68 | instance/
 69 | .webassets-cache
 70 | 
 71 | # Scrapy stuff:
 72 | .scrapy
 73 | 
 74 | # Sphinx documentation
 75 | docs/_build/
 76 | 
 77 | # PyBuilder
 78 | target/
 79 | 
 80 | # Jupyter Notebook
 81 | .ipynb_checkpoints
 82 | 
 83 | # IPython
 84 | profile_default/
 85 | ipython_config.py
 86 | 
 87 | # pyenv
 88 | .python-version
 89 | 
 90 | # pipenv
 91 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
 92 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
 93 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
 94 | #   install all needed dependencies.
 95 | #Pipfile.lock
 96 | 
 97 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow
 98 | __pypackages__/
 99 | 
100 | # Celery stuff
101 | celerybeat-schedule
102 | celerybeat.pid
103 | 
104 | # SageMath parsed files
105 | *.sage.py
106 | 
107 | # Environments
108 | .env
109 | .venv
110 | env/
111 | venv/
112 | ENV/
113 | env.bak/
114 | venv.bak/
115 | 
116 | # Jetbrains IDEs
117 | .idea
118 | 
119 | # Spyder project settings
120 | .spyderproject
121 | .spyproject
122 | 
123 | # Rope project settings
124 | .ropeproject
125 | 
126 | # mkdocs documentation
127 | /site
128 | 
129 | # mypy
130 | .mypy_cache/
131 | .dmypy.json
132 | dmypy.json
133 | 
134 | # Pyre type checker
135 | .pyre/
136 | 
137 | # Workspaces
138 | /workspace
139 | 
140 | # Scratch directories
141 | .tmp


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2023 Phase AI Technologies Inc.
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # PhaseLLM
  2 | 
  3 | Large language model evaluation and workflow framework from [Phase AI](https://phaseai.com/).
  4 | 
  5 | - [Follow us on Twitter](https://twitter.com/phasellm) for updates.
  6 | - [Star us on GitHub](https://github.com/wgryc/phasellm).
  7 | - [Read the Docs](https://phasellm.readthedocs.io/en/latest/autoapi/phasellm/index.html) -- Module reference. Tutorials and code examples are below.
  8 | 
  9 | ## Installation
 10 | 
 11 | You can install PhaseLLM via pip:
 12 | 
 13 | ```
 14 | pip install phasellm
 15 | ```
 16 | 
 17 | Installing from PyPI does not include libraries for running LLMs locally. Please run `pip install phasellm[complete]` if you plan on using LLMs locally (e.g., our `DollyWrapper`).
 18 | 
 19 | Sample demos and products are in the `demos-and-products` folder. Clone this repository and follow instructions in the `README.md` file in each product folder to run those.
 20 | 
 21 | ## Introduction
 22 | 
 23 | The coming months and years will bring thousands of new products and experienced powered by large language models (LLMs) like ChatGPT or its increasing number of variants. Whether you're using OpenAI's ChatGPT, Anthropic's Claude, or something else all together, you'll want to test how well your models and prompts perform against user needs. As more models are launched, you'll also have a bigger range of options.
 24 | 
 25 | PhaseLLM is a framework designed to help manage and test LLM-driven experiences -- products, content, or other experiences that product and brand managers might be driving for their users.
 26 | 
 27 | Here's what PhaseLLM does:
 28 | 1. We standardize API calls so you can plug and play models from OpenAI, Cohere, Anthropic, or other providers.
 29 | 2. We've built evaluation frameworks so you can compare outputs and decide which ones are driving the best experiences for users.
 30 | 3. We're adding automations so you can use advanced models (e.g., GPT-4) to evaluate simpler models (e.g., GPT-3) to determine what combination of prompts yield the best experiences, especially when taking into account costs and speed of model execution.
 31 | 
 32 | PhaseLLM is open source and we envision building more features to help with model understanding. We want to help developers, data scientists, and others launch new, robust products as easily as possible.
 33 | 
 34 | If you're working on an LLM product, please reach out. We'd love to help out.
 35 | 
 36 | ## Example: Evaluating Travel Chatbot Prompts with GPT-3.5, Claude, and more
 37 | 
 38 | PhaseLLM makes it incredibly easy to plug and play LLMs and evaluate them, in some cases with *other* LLMs. Suppose you're building a travel chatbot, and you want to test Claude and Cohere against each other, using GPT-3.5. 
 39 | 
 40 | What's awesome with this approach is that (1) you can plug and play models and prompts as needed, and (2) the entire workflow takes a small amount of code. This simple example can easily be scaled to much more complex workflows.
 41 | 
 42 | So, time for the code... First, load your API keys.
 43 | 
 44 | ```python
 45 | import os
 46 | from dotenv import load_dotenv
 47 | 
 48 | load_dotenv()
 49 | openai_api_key = os.getenv("OPENAI_API_KEY")
 50 | anthropic_api_key = os.getenv("ANTHROPIC_API_KEY")
 51 | cohere_api_key = os.getenv("COHERE_API_KEY")
 52 | ```
 53 | 
 54 | We're going to set up the *Evaluator*, which takes two LLM model outputs and decides which one is better for the objective at hand.
 55 | 
 56 | ```python
 57 | from phasellm.eval import GPTEvaluator
 58 | 
 59 | # We'll use GPT-3.5 as the evaluator (default for GPTEvaluator).
 60 | e = GPTEvaluator(openai_api_key)
 61 | ```
 62 | 
 63 | Now it's time to set up the experiment. In this case, we'll set up an `objective` which describes what we're trying to achieve with our chatbot. We'll also provide 5 examples of starting chats that we've seen with our users.
 64 | 
 65 | ```python
 66 | # Our objective.
 67 | objective = "We're building a chatbot to discuss a user's travel preferences and provide advice."
 68 | 
 69 | # Chats that have been launched by users.
 70 | travel_chat_starts = [
 71 |     "I'm planning to visit Poland in spring.",
 72 |     "I'm looking for the cheapest flight to Europe next week.",
 73 |     "I am trying to decide between Prague and Paris for a 5-day trip",
 74 |     "I want to visit Europe but can't decide if spring, summer, or fall would be better.",
 75 |     "I'm unsure I should visit Spain by flying via the UK or via France."
 76 | ]
 77 | ```
 78 | 
 79 | Now we set up our Cohere and Claude models.
 80 | 
 81 | ```python
 82 | from phasellm.llms import CohereWrapper, ClaudeWrapper
 83 | cohere_model = CohereWrapper(cohere_api_key)
 84 | claude_model = ClaudeWrapper(anthropic_api_key)
 85 | ```
 86 | 
 87 | Finally, we launch our test. We run an experiments where both models generate a chat response and then we have GPT-3.5 evaluate the response.
 88 | 
 89 | ```python
 90 | print("Running test. 1 = Cohere, and 2 = Claude.")
 91 | for tcs in travel_chat_starts:
 92 | 
 93 |     messages = [{"role":"system", "content":objective},
 94 |             {"role":"user", "content":tcs}]
 95 | 
 96 |     response_cohere = cohere_model.complete_chat(messages, "assistant")
 97 |     response_claude = claude_model.complete_chat(messages, "assistant")
 98 | 
 99 |     pref = e.choose(objective, tcs, response_cohere, response_claude)
100 |     print(f"{pref}")
101 | ```
102 | 
103 | In this case, we simply print which of the two models was preferred.
104 | 
105 | Voila! You've got a suite to test your models and can plug-and-play three major LLMs.
106 | 
107 | ## Contact Us
108 | 
109 | If you have questions, requests, ideas, etc. please reach out at w (at) phaseai (dot) com.
110 | 


--------------------------------------------------------------------------------
/demos-and-products/README.md:
--------------------------------------------------------------------------------
1 | # Demos and Products (/demos-and-products)
2 | 
3 | This folder contains various products and demos built using PhaseLLM.
4 | 
5 | Every fold contains a self-contained product or demo. Each one also contains a README.md file that includes installation instructions.
6 | 
7 | All products require the `phasellm` package to be installed.
8 | 


--------------------------------------------------------------------------------
/demos-and-products/arxiv-assistant/arxiv_assistant.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import re
  3 | 
  4 | from dotenv import load_dotenv
  5 | 
  6 | from feedparser import FeedParserDict
  7 | 
  8 | from phasellm.llms import ClaudeWrapper
  9 | 
 10 | from phasellm.agents import EmailSenderAgent, RSSAgent
 11 | 
 12 | load_dotenv()
 13 | 
 14 | # Load OpenAI and newsapi.org API keys.
 15 | anthropic_api_key = os.getenv("ANTHROPIC_API_KEY")
 16 | 
 17 | # Load Gmail credentials.
 18 | gmail_email = os.getenv("GMAIL_EMAIL")
 19 | gmail_password = os.getenv("GMAIL_PASSWORD")  # https://myaccount.google.com/u/1/apppasswords
 20 | 
 21 | # Set up the LLM
 22 | llm = ClaudeWrapper(anthropic_api_key)
 23 | 
 24 | 
 25 | def interest_analysis(title: str, abstract: str, interests: str):
 26 |     interest_analysis_prompt = \
 27 |         f"""
 28 |         I want to determine if an academic paper is relevant to my interests. I am interested in: {interests}. The paper 
 29 |         is titled: {title}. It has the following abstract: {abstract}. Is this paper relevant to my interests? Respond 
 30 |         with either 'yes' or 'no'. Do not explain your reasoning.
 31 |         
 32 |         Example responses are given between the ### ### symbols. Respond exactly as shown in the examples.
 33 |         
 34 |         ###yes###
 35 |         or
 36 |         ###no###
 37 |         """
 38 |     return llm.text_completion(prompt=interest_analysis_prompt)
 39 | 
 40 | 
 41 | def summarize(title: str, abstract: str, interests: str):
 42 |     """
 43 |     This function summarizes why the paper might be relevant to the user's interests.
 44 |     Args:
 45 |         title: The title of the paper.
 46 |         abstract: The abstract of the paper.
 47 |         interests: The user's interests.
 48 | 
 49 |     Returns: The summary of why the paper might be relevant to the user's interests.
 50 | 
 51 |     """
 52 |     # Summarize why the paper might be relevant to the user's interests.
 53 |     summary_prompt = \
 54 |         f"""
 55 |         Summarize why the the following paper is relevant to my interests. My interests are: {interests}. The paper is 
 56 |         titled: {title}. It has the following abstract: {abstract}.
 57 |         """
 58 |     return llm.text_completion(prompt=summary_prompt)
 59 | 
 60 | 
 61 | def send_email(title: str, abstract: str, link: str, summary: str) -> None:
 62 |     """
 63 |     This function sends an email to the user with the title of the paper and the summary.
 64 |     Args:
 65 |         title: The title of the paper.
 66 |         abstract: The abstract of the paper.
 67 |         link: The link to the paper.
 68 |         summary: The summary of the paper.
 69 | 
 70 |     Returns:
 71 | 
 72 |     """
 73 |     # Send email
 74 |     print('Sending email...')
 75 | 
 76 |     content = f"Title: {title}\n\nSummary:\n{summary}\n\nAbstract:\n{abstract}\n\nLink: {link}"
 77 | 
 78 |     email_agent = EmailSenderAgent(
 79 |         sender_name='arXiv Assistant',
 80 |         smtp='smtp.gmail.com',
 81 |         sender_address=gmail_email,
 82 |         password=gmail_password,
 83 |         port=587
 84 |     )
 85 |     email_agent.send_plain_email(recipient_email=gmail_email, subject=title, content=content)
 86 | 
 87 | 
 88 | def analyze_and_email(paper: FeedParserDict, interests: str, retries: int = 0) -> None:
 89 |     """
 90 |     This function analyzes the latest papers from arXiv and emails the user if any of them are relevant to their
 91 |     interests.
 92 |     Args:
 93 |         paper: The paper to analyze.
 94 |         interests: The user's interests.
 95 |         retries: The number of retry attempts made so far.
 96 |     Returns:
 97 | 
 98 |     """
 99 |     # Allow for a maximum of 1 retry.
100 |     max_retries = 1
101 | 
102 |     title = paper['title']
103 |     abstract = paper['summary']
104 |     link = paper['link']
105 |     interested = interest_analysis(title=title, abstract=abstract, interests=interests)
106 | 
107 |     # Find the answer within the response.
108 |     answer = re.search(r'###(yes|no)###', interested)
109 |     if not answer:
110 |         if retries < max_retries:
111 |             analyze_and_email(paper=paper, interests=interests, retries=retries + 1)
112 |     else:
113 |         interested = answer.group(0)
114 | 
115 |     # Send email if the user is interested.
116 |     if interested == '###yes###':
117 |         summary = summarize(title=title, abstract=abstract, interests=interests)
118 |         send_email(title=title, abstract=abstract, link=link, summary=summary)
119 |     elif interested == '###no###':
120 |         pass
121 |     else:
122 |         print(f'LLM did not respond in the expected format after {max_retries}. Skipping paper:\n{title}')
123 | 
124 | 
125 | def main():
126 |     """
127 |     Entry point for the arXiv assistant.
128 |     Returns:
129 | 
130 |     """
131 |     # Ask user what they want to read about.
132 |     interests = input("What kinds of papers do you want to be notified about?")
133 | 
134 |     papers_processed = 0
135 | 
136 |     rss_agent = RSSAgent(url='https://arxiv.org/rss/cs')
137 |     with rss_agent.poll(60) as poller:
138 |         for papers in poller():
139 |             print(f'Found {len(papers)} new paper(s).')
140 |             for paper in papers:
141 |                 analyze_and_email(
142 |                     paper=paper,
143 |                     interests=interests
144 |                 )
145 |                 papers_processed += 1
146 |                 print(f'Processed {papers_processed} paper(s).')
147 | 
148 | 
149 | if __name__ == '__main__':
150 |     main()
151 | 


--------------------------------------------------------------------------------
/demos-and-products/basic-chatbot/demo.py:
--------------------------------------------------------------------------------
 1 | 
 2 | import os
 3 | from dotenv import load_dotenv
 4 | 
 5 | from phasellm.llms import OpenAIGPTWrapper, ChatBot
 6 | 
 7 | load_dotenv()
 8 | MODEL_LLM = OpenAIGPTWrapper
 9 | MODEL_STRING = "gpt-4"
10 | MODEL_API_KEY = os.getenv("OPENAI_API_KEY")
11 | llm = MODEL_LLM(MODEL_API_KEY, MODEL_STRING)
12 | 
13 | CHATBOT = None 
14 | 
15 | from flask import Flask, request, render_template, jsonify
16 | 
17 | APP = Flask(__name__)
18 | 
19 | # We have a function because we'll eventually add other things, like system prompts, variables, etc.
20 | # Returns True if successful, False otherwise
21 | def resetChatBot():
22 |     global CHATBOT 
23 |     CHATBOT = ChatBot(llm)
24 |     return True 
25 | 
26 | resetChatBot()
27 | 
28 | @APP.route('/submit_chat_message', methods = ['POST'])
29 | def sendchat():
30 |     global CHATBOT
31 |     message = request.json["input"]
32 |     response = CHATBOT.chat(message)
33 |     return {"status":"ok", "content":response,}
34 | 
35 | @APP.route('/resetchatbot')
36 | def resetchatbot():
37 |     if resetChatBot():
38 |         return jsonify({"status":"ok", "message":"ChatBot has been restarted."})
39 |     else:
40 |         return jsonify({"status":"error", "message":"ChatBot could not be restarted."})
41 | 
42 | @APP.route('/')
43 | def index():
44 | 
45 |     # Loop and print all args...
46 |     #for key, value in request.args.items():
47 |     #    print(f"{key} :: {value}")
48 |     #print(request.args)
49 | 
50 |     if "reset" in request.args:
51 |         if request.qrgs['reset'] == 'true':
52 |             resetChatBot()
53 | 
54 |     return render_template('index.html')
55 | 
56 | def run(host="127.0.0.1", port=5000):
57 |     """
58 |     Launches a local web server for interfacing with PhaseLLM. This is meant to be for testing purposes only.
59 |     """
60 |     APP.run(host=host, port=port)
61 | 
62 | MAIN_HOST = "127.0.0.1"
63 | MAIN_PORT = 8000
64 | if __name__ == '__main__':
65 |     run(MAIN_HOST, MAIN_PORT)


--------------------------------------------------------------------------------
/demos-and-products/basic-chatbot/templates/index.html:
--------------------------------------------------------------------------------
  1 | <html>
  2 | <head>
  3 | <title>WorkshopLLM</title>
  4 | 
  5 | <link rel="preconnect" href="https://fonts.googleapis.com">
  6 | <link rel="preconnect" href="https://fonts.gstatic.com" crossorigin>
  7 | <link href="https://fonts.googleapis.com/css2?family=Open+Sans:ital,wght@0,300;0,400;0,500;0,600;0,700;0,800;1,300;1,400;1,500;1,600;1,700;1,800&display=swap" rel="stylesheet">
  8 | 
  9 | <style>
 10 | * {
 11 |     font-family: 'Open Sans', sans-serif;	
 12 |     box-sizing: border-box;	
 13 | }
 14 | 
 15 | body {
 16 | 	margin:0;
 17 | 	padding:0;
 18 | 	overflow:hidden;
 19 | }
 20 | 
 21 | #chatstream {
 22 |     padding:20px;
 23 |     height:calc(100vh - 200px);
 24 |     margin:20px;
 25 |     border:1px solid lightgray;
 26 |     border-radius:10px;
 27 |     overflow-y:auto;
 28 | }
 29 | 
 30 | #chatinputdiv {
 31 |     padding:20px;
 32 |     height:150px;
 33 |     margin:20px;
 34 |     border:1px solid lightgray;
 35 |     border-radius:10px;
 36 | }
 37 | 
 38 | .interface-element {
 39 | 	font-size:15px;
 40 | 	font-family: 'Open Sans', sans-serif;	
 41 | }
 42 | 	
 43 | #chat-input {
 44 | 	padding:10px;
 45 | 	border-radius:8px;
 46 | 	border:1px solid rgb(230,230,230);
 47 | 	width:100%;
 48 | }
 49 | 
 50 | #chat-button {
 51 | 	padding:10px;
 52 | 	border-radius:8px;
 53 | 	border:1px solid rgb(230,230,230);
 54 | 	width:100%;
 55 | 	background:rgb(83, 145, 101);
 56 | 	color:rgb(248, 245, 228);
 57 | 	font-weight:800px;
 58 | 	margin-top:10px;
 59 | 	width:100px;
 60 | 	cursor:pointer;
 61 | }
 62 | 
 63 | .chat-msg-user {
 64 | 	background-color:rgb(63, 73, 127);
 65 | 	color:white;
 66 | 	border-radius:8px;
 67 | 	margin-left:50px;
 68 | 	padding:10px;
 69 | 	margin-top:10px;
 70 | }
 71 | 
 72 | .chat-msg-chatbot {
 73 | 	background-color:rgb(248, 245, 228);
 74 | 	border-radius:8px;
 75 | 	margin-right:50px;
 76 | 	padding:10px;
 77 | 	margin-top:10px;
 78 | }
 79 | 
 80 | </style>
 81 | 
 82 | </head>
 83 | <body>
 84 | 
 85 | <div id="chatstream">
 86 |     
 87 | </div>
 88 | 
 89 | 
 90 | <div id="chatinputdiv">
 91 |     <form>
 92 |         <input type="text" class="interface-element" id="chat-input" placeholder="Say something! 😀">
 93 |         <button type="button" class="interface-element" id="chat-button" onclick="javascript:send_message();">Submit</button>
 94 |     </form>
 95 | </div>
 96 | 
 97 | <script>
 98 | 
 99 | function append_message(msg, who) {
100 | 	var output_div = document.getElementById("chatstream");
101 | 	output_div.innerHTML = output_div.innerHTML + `<div class="chat-msg-${who}">${msg}</div>`;
102 | }
103 | 
104 | function send_message() {
105 | 	var chat_message_box = document.getElementById("chat-input");
106 | 	var msg_to_send = chat_message_box.value + "";
107 | 	append_message(msg_to_send, "user");
108 | 	chat_message_box.value = "";
109 | 	
110 |     var data = {"input":msg_to_send};
111 |     const response = fetch("submit_chat_message", {
112 |         method: "POST",
113 |         cache: "no-cache",
114 |         credentials: "same-origin",
115 |         headers: {"Content-Type": "application/json"},
116 |         body: JSON.stringify(data),
117 |     })
118 |     .then(response=>response.json())
119 |     .then(data=>{
120 | 		console.log(data);
121 | 		console.log(`STATUS: ${data["status"]}`);
122 | 		console.log(`CONTENT: ${data["content"]}`);
123 | 		var new_content = data["content"].replace(/(?:\r\n|\r|\n)/g, '<br>');
124 | 		append_message(new_content, "chatbot");
125 |     })
126 | }
127 | 
128 | var element = document.getElementById('chat-input');
129 | element.addEventListener('keypress', function(e){
130 |   if (e.keyCode == 13) {
131 |     send_message();
132 | 	e.preventDefault(); 
133 |   }
134 | });
135 | 
136 | </script>
137 | 
138 | </body>
139 | </html>


--------------------------------------------------------------------------------
/demos-and-products/chaining-workshop/README.md:
--------------------------------------------------------------------------------
 1 | # Chaining Workshop
 2 | 
 3 | This provides a front-end and a set of prompt templates where you can then begin chaining and structuring "apps" in various ways.
 4 | 
 5 | ## Example Prompt Types
 6 | 
 7 | - System Message: show a message without any logic around what is shown.
 8 | - Linear Order: show a message at a specific time (similar to 'system message' but with order).
 9 | - Logic
10 | 
11 | ## Sample Apps
12 | 
13 | - AmpUp.ai with a "yes/no" from the LLM
14 | - AmpUp.ai with a confidence score
15 | - Newsbot with review of outputs
16 | - Character-focused chatbot
17 | - Travel agent workflow
18 | 
19 | ## Data Structure
20 | 
21 | { prompt_id, prompt}
22 | fallback prompt (i.e., error)
23 | 
24 | { pid_1 -> pid_2, conditions}
25 | 
26 | 
27 | ## Characters
28 | 
29 | ### Socrates
30 | 
31 | { "prompt_id": 1, "prompt": "REMINDER: you are playing the role of Socrates and you are meant to reply to every message as if you were Socrates using the Socratic method. Please do so with the message below.\nMESSAGE:{message}", "next_prompt": 2}
32 | 
33 | { "prompt_id": 2, "prompt": "REMINDER: you are playing the role of Socrates and you are meant to reply to every message as if you were Socrates using the Socratic method. Please do so with the message below.\nMESSAGE:{message}", "next_prompt": 2}
34 | 
35 | 
36 | 
37 | variables = user/app provided, LLM-provided
38 | 
39 | ## How to Add Conditional Flows
40 | 
41 | - Output Parser: need to take the output of a model and parse it in some way. This should parse the outputs into specific variables.
42 | - Pass a function to the next prompt? This will be limited, though -- you still need to write functions. Is that bad?
43 | - Prebuilt template functions + custom functions.
44 | 
45 | Output Parser -> Environment Variable -> Function
46 | 
47 | 
48 | OUTPUT PARSER
49 | 
50 | 
51 | 
52 | For all of your responses, please provide them in the following format:
53 | ---MESSAGE
54 | This is where your actual message will go.
55 | ---SENTIMENT-SCORE
56 | A score between 0 and 100 that shows how positive or negative the person's response was when describing their product.
57 | ---END
58 | Include 'yes' or 'no' here. 'Yes' means we've asked 2 follow-up questions or the sentiment score has gotten close to 0 and you think it's safer to end the conversation. 'Yes' will continue the conversation.
59 | 
60 | ## Conditional Flows v2
61 | 
62 | Right now, this is all hard-coded via " ---VAR" which is a poor way of doing things. 


--------------------------------------------------------------------------------
/demos-and-products/chaining-workshop/apps.py:
--------------------------------------------------------------------------------
  1 | app_socrates = {
  2 | 
  3 | "code":"socrates",
  4 | "name":"Chat with Socrates",
  5 | 
  6 | "prompts": {
  7 | 
  8 |     0 : {
  9 |         "type":"system_message", "message": "You are chatting with Socrates. Enjoy!", "next_prompt": 1
 10 |         },
 11 | 
 12 |     1 : {
 13 |         "prompt": "REMINDER: you are playing the role of Socrates and you are meant to reply to every message as if you were Socrates using the Socratic method. Please do so with the message below.\nMESSAGE:{message}", "next_prompt": 1
 14 |         }
 15 | 
 16 |     }
 17 | 
 18 | }
 19 | 
 20 | app_yoyo = {
 21 | 
 22 | "code":"yoyo",
 23 | "name":"Chat with 'Yo Yo'",
 24 | 
 25 | "prompts": {
 26 | 
 27 |     0 : {
 28 |         "type":"system_message", "message": "You are chatting with someone that uses 'yo' too much. Enjoy!", "next_prompt": 1
 29 |         },
 30 | 
 31 |     1 : {
 32 |         "prompt": "REMINDER: you are a chatbot that starts every message with 'Yo, yo, yo!' and also includes 'yo' throughout responses. lease do so with the message below.\nMESSAGE:{message}", "next_prompt": 1
 33 |         }
 34 | 
 35 |     }
 36 | 
 37 | }
 38 | 
 39 | app_act = {
 40 | 
 41 | "code":"act",
 42 | "name":"Acceptance and Commitment Therapy",
 43 | 
 44 | "prompts": {
 45 | 
 46 |     0 : {
 47 |         "type":"system_message", "message": "This is an 'Acceptance and Commitment Therapy' (ACT) coach. The responses in this chat model will always focus on different follow-up questions or advice around how you should move forward with your day based on this style of positive psychology.", "next_prompt": 1
 48 |         },
 49 | 
 50 |     1 : {
 51 |         "prompt": "REMINDER: you are an Acceptance and Commitment Therapy' (ACT) coach and every message needs to follow the perspective of an ACT therapist that is also steeped in positive and humanistic psychology with a strong focus on ACT.\nMESSAGE:{message}", "next_prompt": 1
 52 |         }
 53 | 
 54 |     }
 55 | 
 56 | }
 57 | 
 58 | app_random_end = {
 59 | 
 60 | "code": "random",
 61 | "name": "Random End",
 62 | "prompts": {
 63 | 
 64 |     0 : {
 65 |         "type":"system_message", "message": "This is a demo bot that always follows up with ONE question and also randomly ends the conversation. It's being used to show how conditional app flows could work.", "next_prompt": 1
 66 |         },
 67 | 
 68 |     1 : {
 69 |         "prompt": "REMINDER: you only allowed to respond with ONE SHORT QUESTION to the MESSAGE below. Please make sure that your response follows the following format:\n---RESPONSE\nThis is where your response actually goes.\n---NEXT\nPut 'YES' or 'NO' here randomly, with a 50% split.\n\n\nMESSAGE:{message}", "next_prompt": 1
 70 |         }
 71 | 
 72 |     }
 73 | 
 74 | }
 75 | 
 76 | app_danger_demo = {
 77 | 
 78 | "code": "danger",
 79 | "name": "Brand Sentiment",
 80 | "prompts": {
 81 | 
 82 |     0 : {
 83 |         "type":"system_message", "message": "This is a demo bot that interviews you about how you feel about your recent Nike sneaker purchase. If your sentiment goes down quite a bit, then it ends the interview.", "next_prompt": 1
 84 |         },
 85 | 
 86 |     1 : {
 87 |         "prompt": "REMINDER: please always follow up with a question to keep learning about my sentiment around Nike sneakers. Also provide a 'danger' score from 0 to 100, where 100 means the conversation is incredibly negative, and 0 means it's incredibly positive, and 50 means it's neutral. Please make sure that your response follows the following format, always starting with '---RESPONSE':\n\n---RESPONSE\nThis is where your response actually goes.\n---DANGER\nThis is the sentiment score with 100 = negative, 50 = neutral, and 0 = positive.\n\n\nMESSAGE:{message}", "next_prompt": 1
 88 |         }
 89 | 
 90 |     }
 91 | 
 92 | }
 93 | 
 94 | 
 95 | APP_DATA_SETS = {
 96 |     "socrates": app_socrates,
 97 |     "yoyo": app_yoyo,
 98 |     "act": app_act,
 99 |     "random": app_random_end,
100 |     "danger": app_danger_demo
101 | }


--------------------------------------------------------------------------------
/demos-and-products/chaining-workshop/demo.py:
--------------------------------------------------------------------------------
  1 | # Import all the data, apps, etc. we have built...
  2 | from apps import *
  3 | 
  4 | import os
  5 | from dotenv import load_dotenv
  6 | 
  7 | from phasellm.llms import OpenAIGPTWrapper, ChatBot, Prompt
  8 | 
  9 | load_dotenv()
 10 | MODEL_LLM = OpenAIGPTWrapper
 11 | MODEL_STRING = "gpt-4"
 12 | #MODEL_STRING = "gpt-3.5-turbo" # Use for speed.
 13 | MODEL_API_KEY = os.getenv("OPENAI_API_KEY")
 14 | llm = MODEL_LLM(MODEL_API_KEY, MODEL_STRING)
 15 | 
 16 | CHATBOT = None 
 17 | 
 18 | APP_PROMPT_STATE = 0
 19 | APP_CODE = None
 20 | 
 21 | from flask import Flask, request, render_template, jsonify
 22 | 
 23 | APP = Flask(__name__)
 24 | 
 25 | # We have a function because we'll eventually add other things, like system prompts, variables, etc.
 26 | # Returns True if successful, False otherwise
 27 | def resetChatBot():
 28 |     global CHATBOT 
 29 |     CHATBOT = ChatBot(llm)
 30 |     return True 
 31 | 
 32 | resetChatBot()
 33 | 
 34 | def parseResponse(r):
 35 |     lines = r.strip().split("\n")
 36 | 
 37 |     # Should eventually throw an error.
 38 |     if r[0:3] != "---":
 39 |         return None
 40 |     #assert r[0:3] == "---"
 41 | 
 42 |     var_name = None 
 43 |     v = ""
 44 | 
 45 |     rdict = {}
 46 | 
 47 |     for line in lines:
 48 |         if line[0:3] == "---":
 49 |             if var_name is not None:
 50 |                 rdict[var_name] = v.strip()
 51 |             var_name = line[3:].strip().upper()
 52 |             v = ""
 53 |         else:
 54 |             v += line
 55 | 
 56 |     rdict[var_name] = v.strip()
 57 | 
 58 |     return rdict
 59 | 
 60 | @APP.route('/submit_chat_message', methods = ['POST'])
 61 | def sendchat():
 62 |     global CHATBOT
 63 |     message = request.json["input"]
 64 |     response = process_message(message)
 65 |     return {"status":"ok", "content":response}
 66 | 
 67 | @APP.route('/resetchatbot')
 68 | def resetchatbot():
 69 |     if resetChatBot():
 70 |         return jsonify({"status":"ok", "message":"ChatBot has been restarted."})
 71 |     else:
 72 |         return jsonify({"status":"error", "message":"ChatBot could not be restarted."})
 73 | 
 74 | def isInt(v):
 75 |     try:
 76 |         int(v)
 77 |     except:
 78 |         return False 
 79 |     return True 
 80 | 
 81 | def process_message(message):
 82 |     global APP_PROMPT_STATE
 83 |     global APP_CODE
 84 |     global CHATBOT
 85 |     prompt = Prompt(APP_CODE["prompts"][APP_PROMPT_STATE]["prompt"])
 86 |     filled_prompt = prompt.fill(message = message)
 87 | 
 88 |     print(f"\n\n{filled_prompt}\n\n")
 89 | 
 90 |     response = CHATBOT.chat(filled_prompt)
 91 | 
 92 |     print(f"\n\n{response}\n\n")
 93 | 
 94 |     response_dict = parseResponse(response)
 95 | 
 96 |     next_prompt = -1
 97 |     if isInt(APP_CODE["prompts"][APP_PROMPT_STATE]["next_prompt"]):
 98 |         next_prompt = APP_CODE["prompts"][APP_PROMPT_STATE]["next_prompt"]
 99 | 
100 |     if response_dict is not None:
101 |         print(response_dict)
102 |         if "NEXT" in response_dict:
103 |             if response_dict["NEXT"].upper() == "NO":
104 |                 response = "Chat is over!"
105 |             else:
106 |                 if "RESPONSE" in response_dict:
107 |                     response = response_dict["RESPONSE"]
108 |         if "DANGER" in response_dict:
109 |             if isInt(response_dict["DANGER"]):
110 |                 danger_score = int(response_dict["DANGER"])
111 |                 if danger_score > 80:
112 |                     response = "Dangerous topic! Chat is over!"
113 |                 else:
114 |                     if "RESPONSE" in response_dict:
115 |                         response = response_dict["RESPONSE"]
116 | 
117 |     APP_PROMPT_STATE = next_prompt 
118 | 
119 |     return response
120 | 
121 | @APP.route("/")
122 | def index():
123 |     applist = ""
124 |     for key in APP_DATA_SETS:
125 |         applist += f"""
126 |         <p><a href='/app?reset=true&app={APP_DATA_SETS[key]["code"]}'>{APP_DATA_SETS[key]["name"]}</a>
127 |         """
128 |     return render_template('applist.html', applist=applist)
129 | 
130 | @APP.route('/app')
131 | def llmapp():
132 | 
133 |     global APP_PROMPT_STATE
134 |     global APP_CODE
135 | 
136 |     # Loop and print all args...
137 |     #for key, value in request.args.items():
138 |     #    print(f"{key} :: {value}")
139 |     #print(request.args)
140 | 
141 |     if "reset" in request.args:
142 |         if request.args['reset'] == 'true':
143 |             resetChatBot()
144 | 
145 |     app_name = ""
146 |     system_message = ""
147 |     if "app" in request.args:
148 |         app_code = request.args['app']
149 |         if app_code in APP_DATA_SETS:
150 |             system_message = APP_DATA_SETS[app_code]["prompts"][0]["message"]
151 |             app_name = app_code
152 |             APP_PROMPT_STATE = 0
153 |             APP_CODE = APP_DATA_SETS[app_code]
154 |             APP_PROMPT_STATE = APP_DATA_SETS[app_code]["prompts"][0]["next_prompt"]
155 | 
156 |     return render_template('app.html', app_name=app_name, sys_msg=system_message)
157 | 
158 | def run(host="127.0.0.1", port=5000):
159 |     """
160 |     Launches a local web server for interfacing with PhaseLLM. This is meant to be for testing purposes only.
161 |     """
162 |     APP.run(host=host, port=port)
163 | 
164 | MAIN_HOST = "127.0.0.1"
165 | MAIN_PORT = 8000
166 | if __name__ == '__main__':
167 |     run(MAIN_HOST, MAIN_PORT)


--------------------------------------------------------------------------------
/demos-and-products/chaining-workshop/templates/app.html:
--------------------------------------------------------------------------------
  1 | <!DOCTYPE html>
  2 | <html>
  3 | <head>
  4 | <title>WorkshopLLM</title>
  5 | 
  6 | <link rel="preconnect" href="https://fonts.googleapis.com">
  7 | <link rel="preconnect" href="https://fonts.gstatic.com" crossorigin>
  8 | <link href="https://fonts.googleapis.com/css2?family=Open+Sans:ital,wght@0,300;0,400;0,500;0,600;0,700;0,800;1,300;1,400;1,500;1,600;1,700;1,800&display=swap" rel="stylesheet">
  9 | 
 10 | <style>
 11 | * {
 12 |     font-family: 'Open Sans', sans-serif;	
 13 |     box-sizing: border-box;	
 14 | }
 15 | 
 16 | body {
 17 | 	margin:0;
 18 | 	padding:0;
 19 | 	overflow:hidden;
 20 | }
 21 | 
 22 | #chatstream {
 23 |     padding:20px;
 24 |     height:calc(100vh - 200px);
 25 |     margin:20px;
 26 |     border:1px solid lightgray;
 27 |     border-radius:10px;
 28 |     overflow-y:auto;
 29 | }
 30 | 
 31 | #chatinputdiv {
 32 |     padding:20px;
 33 |     height:150px;
 34 |     margin:20px;
 35 |     border:1px solid lightgray;
 36 |     border-radius:10px;
 37 | }
 38 | 
 39 | .interface-element {
 40 | 	font-size:15px;
 41 | 	font-family: 'Open Sans', sans-serif;	
 42 | }
 43 | 	
 44 | #chat-input {
 45 | 	padding:10px;
 46 | 	border-radius:8px;
 47 | 	border:1px solid rgb(230,230,230);
 48 | 	width:100%;
 49 | }
 50 | 
 51 | #chat-button {
 52 | 	padding:10px;
 53 | 	border-radius:8px;
 54 | 	border:1px solid rgb(230,230,230);
 55 | 	width:100%;
 56 | 	background:rgb(83, 145, 101);
 57 | 	color:rgb(248, 245, 228);
 58 | 	font-weight:800px;
 59 | 	margin-top:10px;
 60 | 	width:100px;
 61 | 	cursor:pointer;
 62 | }
 63 | 
 64 | .chat-msg-system {
 65 | 	background-color:crimson;
 66 | 	color:white;
 67 | 	border-radius:8px;
 68 | 	padding:10px;
 69 | 	margin-top:10px;
 70 | }
 71 | 
 72 | .chat-msg-user {
 73 | 	background-color:rgb(63, 73, 127);
 74 | 	color:white;
 75 | 	border-radius:8px;
 76 | 	margin-left:50px;
 77 | 	padding:10px;
 78 | 	margin-top:10px;
 79 | }
 80 | 
 81 | .chat-msg-chatbot {
 82 | 	background-color:rgb(248, 245, 228);
 83 | 	border-radius:8px;
 84 | 	margin-right:50px;
 85 | 	padding:10px;
 86 | 	margin-top:10px;
 87 | }
 88 | 
 89 | </style>
 90 | 
 91 | </head>
 92 | <body>
 93 | 
 94 | <div id="chatstream">
 95 |     
 96 | </div>
 97 | 
 98 | 
 99 | <div id="chatinputdiv">
100 |     <form>
101 |         <input type="text" class="interface-element" id="chat-input" placeholder="Say something! 😀">
102 |         <button type="button" class="interface-element" id="chat-button" onclick="javascript:send_message();">Submit</button>
103 |     </form>
104 | </div>
105 | 
106 | <script>
107 | 
108 | 
109 | function append_message(msg, who) {
110 | 	var output_div = document.getElementById("chatstream");
111 | 	output_div.innerHTML = output_div.innerHTML + `<div class="chat-msg-${who}">${msg}</div>`;
112 | 	scroll_bottom_output();
113 | }
114 | 
115 | function send_message() {
116 | 	var chat_message_box = document.getElementById("chat-input");
117 | 	var msg_to_send = chat_message_box.value + "";
118 | 	append_message(msg_to_send, "user");
119 | 	chat_message_box.value = "";
120 | 	
121 |     var data = {"input":msg_to_send};
122 |     const response = fetch("submit_chat_message", {
123 |         method: "POST",
124 |         cache: "no-cache",
125 |         credentials: "same-origin",
126 |         headers: {"Content-Type": "application/json"},
127 |         body: JSON.stringify(data),
128 |     })
129 |     .then(response=>response.json())
130 |     .then(data=>{
131 | 		console.log(data);
132 | 		console.log(`STATUS: ${data["status"]}`);
133 | 		console.log(`CONTENT: ${data["content"]}`);
134 | 		var new_content = data["content"].replace(/(?:\r\n|\r|\n)/g, '<br>');
135 | 		append_message(new_content, "chatbot");
136 |     })
137 | }
138 | 
139 | var element = document.getElementById('chat-input');
140 | element.addEventListener('keypress', function(e){
141 |   if (e.keyCode == 13) {
142 |     send_message();
143 | 	e.preventDefault(); 
144 |   }
145 | });
146 | 
147 | function scroll_bottom_output() {
148 | 	var output_div = document.getElementById("chatstream");
149 | 	output_div.scrollTop = output_div.scrollHeight;
150 | }
151 | 
152 | var app_name = "{{ app_name }}";
153 | var system_message = `{{ sys_msg }}`;
154 | if (system_message !== "") {
155 |     append_message(system_message, 'system');
156 | }
157 | 
158 | </script>
159 | 
160 | </body>
161 | </html>


--------------------------------------------------------------------------------
/demos-and-products/chaining-workshop/templates/applist.html:
--------------------------------------------------------------------------------
 1 | <!DOCTYPE html>
 2 | <html>
 3 | <head>
 4 |     <title>App List</title>
 5 | </head>
 6 | <body>
 7 | 
 8 | {{ applist | safe }}
 9 | 
10 | </body>
11 | </html>


--------------------------------------------------------------------------------
/demos-and-products/chaining-workshop/tests.py:
--------------------------------------------------------------------------------
 1 | response_1 = """---FIXED:
 2 | 你好！今天是个好日子。 (Your sentence was already correct in grammar and syntax.)
 3 | 
 4 | ---RESPONSE:
 5 | 你好！是的，今天天气很好。
 6 | 
 7 | ---ENGLISH:
 8 | Hello! Yes, the weather is very good today."""
 9 | 
10 | response_2 = """---MESSAGE
11 | I'm sorry to hear about the discomfort you're experiencing. Is there a specific part of the shoe that's causing the blisters or is it more of a general issue? Also, how does the overall comfort and fit compare to other sneakers you've worn in the past?
12 | ---SENTIMENT-SCORE
13 | 40
14 | ---END
15 | No"""
16 | 
17 | from demo import *
18 | 
19 | print(parseResponse(response_1))
20 | print(parseResponse(response_2))


--------------------------------------------------------------------------------
/demos-and-products/cot-analytics-frontend/README.md:
--------------------------------------------------------------------------------
 1 | # Chain of Thought (COT) Analytics -- Frontend Version
 2 | 
 3 | This is still a work in progress. We need to figure out how to improve the code quality and make it consistent across each step.
 4 | 
 5 | ## Running
 6 | 
 7 | See installation instructions in `cot-analytics`. All the same warnings apply as `researchllm`. Run from this folder with...
 8 | ```python
 9 | from frontend import *
10 | run()
11 | ```
12 | 
13 | ## Examples of how this works
14 | 
15 | The screenshot below shows the first code block being executed. There are no outputs (or errors!) because the code works.
16 | ![Screenshot showing Step 1 running.](cot-scr-1.png)
17 | 
18 | Correlation matrix generated via GPT-4 coding.
19 | ![Screenshot showing Step 2 running.](cot-scr-2.png)
20 | 
21 | ## Issues
22 | 
23 | The `cot-analytics` folder generates a great research plan, but the code isn't consisten across steps. Some thinking below.
24 | 
25 | ### How to fix the code generator...
26 | 
27 | 1. Provide DF printout every time.
28 | 2. Run the code blocks before moving to the next ones.
29 | 3. Add line #s.
30 | 4. Plugin for Jupyter.
31 | 
32 | Note that this is where logging and evaluation become critical.
33 | 
34 | ### Want to help?
35 | 
36 | Email me: w (at) phaseai (dot) com
37 | 


--------------------------------------------------------------------------------
/demos-and-products/cot-analytics-frontend/cot-scr-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wgryc/phasellm/974d026dc649e4a71da4c25bf8c934622e56cf5d/demos-and-products/cot-analytics-frontend/cot-scr-1.png


--------------------------------------------------------------------------------
/demos-and-products/cot-analytics-frontend/cot-scr-2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wgryc/phasellm/974d026dc649e4a71da4c25bf8c934622e56cf5d/demos-and-products/cot-analytics-frontend/cot-scr-2.png


--------------------------------------------------------------------------------
/demos-and-products/cot-analytics-frontend/frontend.py:
--------------------------------------------------------------------------------
  1 | """
  2 | A Flask frontend for the COT demo
  3 | 
  4 | To run, start a Python REPL and in the same directory as this file and run the following:
  5 | > from frontend import *
  6 | > run() # Or, run('0.0.0.0', 80)
  7 | 
  8 | """
  9 | 
 10 | from flask import Flask, request, render_template
 11 | import pandas as pd
 12 | import numpy as np
 13 | 
 14 | from researchllm import *
 15 | 
 16 | APP = Flask(__name__)
 17 | 
 18 | ##########################################################################
 19 | #
 20 | # DATA SET SETUP (START)
 21 | # Please review the code below to set up your own data set for analysis.
 22 | #
 23 | 
 24 | # Data set to load and analyze.
 25 | DATA_SETUP_INTRO = "I am researching the relationship between income and sociodemographic census info."
 26 | DATA_FILE_LOC = "incomes.csv"
 27 | 
 28 | # Another sample we explored.
 29 | #DATA_SETUP_INTRO = "I am researching car crashes in NYC."
 30 | #DATA_FILE_LOC = "nypd-motor-vehicle-collisions.csv"
 31 | 
 32 | # Want to analyze your own data set? Simply replace the two variables above:
 33 | # DATA_SETUP_INTRO = "What are you researching? Please provide a short description.
 34 | # DATA_FILE_LOC = "The location of the CSV file."
 35 | # Note that you DO NOT have to provide metadata about the CSV file. This gets generated automatically.
 36 | 
 37 | # Loads the CSV file.
 38 | # If you want to load another file (e.g., Excel file), replace the code below with the relevant function (e.g., read_excel()).
 39 | df = pd.read_csv(DATA_FILE_LOC)
 40 | 
 41 | #
 42 | # DATA SET SETUP (END)
 43 | #
 44 | ##########################################################################
 45 | 
 46 | def generateOverview(df):
 47 |     """
 48 |     Generates a prompt providing an overview of a data set. This should only be used to generate the initial data prompt for now.
 49 |     """
 50 |     description = ""
 51 |     for column in df:
 52 |         col_name = df[column].name
 53 |         col_type = df[column].dtype
 54 |         col_description = f"Column Name: {col_name}\nColumn Type: {col_type}"
 55 |         if col_type == "object":
 56 |             column_values = df[col_name].values
 57 |             uniques = np.unique(column_values)
 58 |             col_description += f"\nSample Values: {str(uniques)}"
 59 |         description += col_description + "\n\n"
 60 |     return description.strip()
 61 | 
 62 | # The prompt used to set up the entire chat session. This prompt is used regularly for analysis.
 63 | base_prompt = f"{DATA_SETUP_INTRO} I have imported Pandas as `pd`, Numpy as `np`, `scipy`, and `sklearn`, and have a dataframe called `df` loaded into Python. `df` contains the following variables and variable types:\n\n" + generateOverview(df) 
 64 | 
 65 | # Calls the researchllm.py function to set the current dataframe as the main one for analysis.
 66 | set_df(df)
 67 | start_bi_session()
 68 | 
 69 | ##########################################################################
 70 | #
 71 | # FLASK FUNCTIONS
 72 | # Everything below manages the frontend.
 73 | #
 74 | ##########################################################################
 75 | 
 76 | @APP.route('/get_prompt')
 77 | def get_prompt():
 78 |     """
 79 |     Returns a JSON object with the prompt being passed on to the language model.
 80 |     """
 81 |     return {"status":"ok", "prompt":base_prompt}
 82 | 
 83 | @APP.route('/')
 84 | def index():
 85 |     """
 86 |     Displays the index page accessible at '/'
 87 |     """
 88 |     return render_template('interface01.html')
 89 | 
 90 | @APP.route("/runcode", methods = ['POST'])
 91 | def runcode():
 92 |     """
 93 |     Runs code in the POST request.
 94 |     """
 95 |     code_to_run = request.json['code']
 96 |     response, code_output, is_error = ask_interpret_clean(code_to_run)
 97 |     return {"response":response, "code_output":code_output, "is_error":is_error}
 98 | 
 99 | @APP.route("/text_completion", methods = ['POST'])
100 | def analysis():
101 |     """
102 |     Calls the researchllm.py code to request analysis and interpretation thereof.
103 |     
104 |     See run_analysis(message) in researchllm.py for more information.
105 |     """
106 |     text_to_complete = request.json["input"]
107 |     new_request = base_prompt + text_to_complete
108 |     response_object = run_analysis(new_request)
109 |     return {"status":"ok", "content":response_object["interpretation"], "code":response_object["code"], "code_output":response_object["code_output"], "error":response_object["error"]}
110 | 
111 | def run(host="127.0.0.1", port=5000):
112 |     APP.run(host=host, port=port)
113 | 


--------------------------------------------------------------------------------
/demos-and-products/cot-analytics-frontend/static/interface01.css:
--------------------------------------------------------------------------------
  1 | * {
  2 | 	font-family: 'Open Sans', sans-serif;	
  3 | 	box-sizing: border-box;	
  4 | }
  5 | 
  6 | body {
  7 |     margin:0;
  8 |     padding:0;
  9 | }
 10 | 
 11 | #everything-everywhere-all-at-once {
 12 | 	width:100vw;
 13 | 	height:100vh;
 14 | 	display: grid;
 15 | 	grid-template-columns: 33% 66%;	
 16 | 	overflow:hidden;
 17 | }
 18 | 
 19 | #col1 {
 20 |     grid-column:1;
 21 | 	grid-row:1;
 22 | 	padding:25px;
 23 | 	background-color:rgb(248, 245, 228);
 24 | }
 25 | 
 26 | #col2 {
 27 | 	grid-column:2;
 28 | 	grid-row: 1;
 29 | 	padding:25px;
 30 | }
 31 | 
 32 | #data-overview {
 33 | 	margin-top:30px;
 34 | }
 35 | 
 36 | h1 {
 37 | 	font-size:20px;
 38 | 	font-weight:600;
 39 | }
 40 | 
 41 | #prompt-info {
 42 | 	max-height:calc(100vh - 150px);
 43 | 	font-size:11px;
 44 | 	font-weight:300;
 45 | 	font-family: 'Open Sans', sans-serif;	
 46 | 	overflow-x:auto;
 47 | 	overflow-y:auto;
 48 | }
 49 | 
 50 | #cot-output {
 51 | 	overflow-x:auto;
 52 | 	overflow-y:auto;
 53 | 	max-height:calc(100vh - 150px);
 54 | }
 55 | 
 56 | .cot-output-cell {
 57 | 	border:1px solid rgd(235,235,235);
 58 | 	border-radius:7px;
 59 | 	padding:15px;
 60 | 	margin:10px;
 61 | }
 62 | 
 63 | .cot-output-cell .notes {
 64 | 	white-space: pre-wrap;
 65 | }
 66 | 
 67 | .cot-output-cell .code {
 68 | 	width:100%;
 69 | 	white-space: pre;
 70 | 	font-family: monospace;
 71 | }
 72 | 
 73 | .run-button {
 74 | 	padding:10px;
 75 | 	border-radius:8px;
 76 | 	border:1px solid rgb(230,230,230);
 77 | 	width:100%;
 78 | 	background:rgb(83, 145, 101);
 79 | 	color:rgb(248, 245, 228);
 80 | 	font-weight:800px;
 81 | 	margin-top:10px;
 82 | 	width:100px;
 83 | 	cursor:pointer;
 84 | }
 85 | 
 86 | .code-output-after-run {
 87 | 	width:100%;
 88 | 	white-space: pre;
 89 | 	font-family: monospace;
 90 | 	background:rgb(248, 245, 228);
 91 | 	border-radius:7px;
 92 | 	padding:15px;
 93 | 	margin:10px;
 94 | }
 95 | 
 96 | .heading-code-output {
 97 | 	font-weight:600;
 98 | }
 99 | 
100 | .heading-error {
101 | 	color:crimson;
102 | 	font-weight:600;
103 | }


--------------------------------------------------------------------------------
/demos-and-products/cot-analytics-frontend/static/results.js:
--------------------------------------------------------------------------------
 1 | function show_prompt() {
 2 |     const response = fetch("get_prompt", {
 3 |         method: "GET",
 4 |         cache: "no-cache",
 5 |         credentials: "same-origin",
 6 |         headers: {"Content-Type": "application/json"},
 7 |     })
 8 |     .then(response=>response.json())
 9 |     .then(data=>{ 
10 | 		var p = data["prompt"].replace(/(?:\r\n|\r|\n)/g, '<br>');
11 | 		var prompt_pre = document.getElementById("prompt-info");
12 | 		prompt_pre.innerHTML = p;
13 |     })
14 | }
15 | 
16 | show_prompt();
17 | 
18 | function resize_textarea(dom_id) {
19 |     var ta = document.getElementById(dom_id);
20 |     ta.style.height = ta.scrollHeight;
21 | }
22 | 
23 | function add_box(header, notes, code, css_id) {
24 |     var cot_div = document.getElementById('cot-output');
25 |     var notes_clean = notes.replace(/(?:\r\n|\r|\n)/g, '<br>');
26 | 
27 |     var code_clean = code.replace("```python", "").replace("```", ""); // This needs to be changed to just deleting the first and last line.
28 | 
29 |     var new_html = `<div class="cot-output-cell" id="cot-cell-code${css_id}">
30 |         <h3>${header}</h3>
31 |         <p class='notes'>${notes_clean}</p>
32 |         <p><textarea class='code' id='code${css_id}'>${code_clean}</textarea></p>
33 |         <p><button class='run-button' onclick="javascript:run('code${css_id}');">Run Code</button></p>
34 |     </div>`;
35 | 
36 |     cot_div.innerHTML += new_html;
37 |     resize_textarea(`code${css_id}`);
38 | }
39 | 
40 | function add_code_output(code_output, div_id, is_error) {
41 |     var cot_div = document.getElementById(div_id);
42 |     var new_html = "";
43 |     if (is_error) {
44 |         var new_html = `<span class='heading-error'>Error</span><div class='code-output-after-run'>${code_output}</div>`;
45 |     } else {
46 |         var new_html = `<span class='heading-code-output'>Code Output</span><div class='code-output-after-run'>${code_output}</div>`;
47 |     }
48 |     cot_div.innerHTML += new_html;
49 | }
50 | 
51 | for (var i = 1; i <= 7; i++) {
52 |     add_box(`Step #${i}`, COT_DATA[i]['objective'], COT_DATA[i]['code_block'], `_step_${i}`);
53 | }
54 | 
55 | function run(block_id) {
56 |     var code = document.getElementById(block_id).value;
57 |     data = {"code":code};
58 |     console.log(data);
59 |     const response = fetch("runcode", {
60 |         method: "POST",
61 |         cache: "no-cache",
62 |         credentials: "same-origin",
63 |         headers: {"Content-Type": "application/json"},
64 |         body: JSON.stringify(data)
65 |     })
66 |     .then(response=>response.json())
67 |     .then(data=>{ 
68 |         var response = data['response'];
69 |         var code_output = data['code_output'];
70 |         var is_error = data['is_error'];
71 |         //console.log(response);
72 |         //console.log(code_output);
73 |         if (response === "*No outputs.*") {
74 |             code_output = "*No outputs.*"
75 |         }
76 |         //console.log(is_error);
77 |         add_code_output(code_output, `cot-cell-${block_id}`, is_error);
78 |     })
79 | }


--------------------------------------------------------------------------------
/demos-and-products/cot-analytics-frontend/templates/interface01.html:
--------------------------------------------------------------------------------
 1 | <html>
 2 | <head>
 3 | 
 4 | <link rel="preconnect" href="https://fonts.googleapis.com">
 5 | <link rel="preconnect" href="https://fonts.gstatic.com" crossorigin>
 6 | <link href="https://fonts.googleapis.com/css2?family=Open+Sans:ital,wght@0,300;0,400;0,500;0,600;0,700;0,800;1,300;1,400;1,500;1,600;1,700;1,800&family=Playfair+Display:ital,wght@0,400;0,500;0,600;0,700;0,800;0,900;1,400;1,500;1,600;1,700;1,800;1,900&display=swap" rel="stylesheet">
 7 | 
 8 | <link href="static/interface01.css" rel="stylesheet">
 9 | 
10 | </head>
11 | <body>
12 | 
13 | <div id="everything-everywhere-all-at-once">
14 | 
15 |     <div id="col1">
16 | 
17 | 		<h1>ResearchLLM: Chain of Thought Analysis</h1>
18 | 		<div id="data-overview">
19 | 			<h1 style="font-weight:300;">Data Overview (Prompt)</h1>
20 | 			<div id="prompt-info"></div>
21 | 		</div>
22 | 
23 |     </div>
24 | 
25 |     <div id="col2">
26 |         <h1>Analysis Output</h1>
27 | 
28 |         <div id="cot-output"></div>
29 | 
30 |     </div>
31 | 
32 | </div>
33 |     
34 | <script src="static/data.js"></script>
35 | <script src="static/results.js"></script>
36 | 
37 | </body>
38 | </html>


--------------------------------------------------------------------------------
/demos-and-products/cot-analytics/README.md:
--------------------------------------------------------------------------------
 1 | # Chain of Thought (CoT) Analytics
 2 | 
 3 | This generates a Chain of Thought (CoT) plan for a data set, and then asks the LLM to generate code for each step in the CoT analysis.
 4 | 
 5 | Curious about the output? Please see `sample_output.md` for an example of an analysis plan for the demo data set. This was not edited!
 6 | 
 7 | ## Installation and Setup
 8 | 
 9 | ### Installation
10 | 
11 | Clone the GitHub repository and navigate to the folder containing this README.md file. Install the relevant packages (including PhaseLLM):
12 | 
13 | ```
14 | pip install -r requirements.txt
15 | ```
16 | 
17 | Next, make sure you edit the `cot.py` file to include the proper API keys. You'll find these around line 115:
18 | ```python
19 | openai_api_key = os.getenv("OPENAI_API_KEY") 
20 | o = OpenAIGPTWrapper(openai_api_key, 'gpt-4') # We highly recommend using GPT-4 or Claude v1.3 for this.
21 | ```
22 | 
23 | ### Running With Sample Data
24 | 
25 | Simply run `cot.py` directly in your command line. This will take a while to run, and will make 10-20 requests to the OpenAI or Anthropic API.
26 | 
27 | ## Running on Your Own Data
28 | 
29 | This demo is based on the data in `incomes.csv` in the `researchllm` folder. We are working on making this easier to apply to other data sets *and* to actually execute the code generated by the LLM.
30 | 
31 | Until then, please don't hesitate to reach out -- hello (at) phaseai (dot) com -- and we'll help you apply this to your data.
32 | 


--------------------------------------------------------------------------------
/demos-and-products/cot-analytics/cot.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Chain of Thought (CoT) analysis for a data set. Please see the README.md for more information.
  3 | """
  4 | 
  5 | import os
  6 | from dotenv import load_dotenv
  7 | 
  8 | from phasellm.llms import OpenAIGPTWrapper, ChatBot
  9 | 
 10 | # Where we write the output of this analysis.
 11 | OUT_FILE = "output.md"
 12 | 
 13 | def save_output(output, header):
 14 |     """
 15 |     Appends model outputs to a markdown file. Includes a header ("# header") and then the output itself.
 16 |     """
 17 |     with open(OUT_FILE, 'a') as writer:
 18 |         writer.write(f"# {header}\n\n{output}\n\n")
 19 | 
 20 | # This prompt is basically a copy/paste of what is generated by ResearchLLM for the 'incomes.csv' data set via the generateOverview() function.
 21 | messages = [{"role":"system", "content": """You are a data science research assistant. We will ask you about a big data set and would like you to break down the analysis you suggest into specific tasks that we can then write code for."""},
 22 | {"role":"user", "content":"""I am researching the relationship between income and sociodemographic census info. I have imported Pandas as `pd`, Numpy as `np`, `scipy`, and `sklearn`, and have a dataframe called `df` loaded into Python. `df` contains the following variables and variable types:
 23 | 
 24 | Column Name: age
 25 | Column Type: int64
 26 | 
 27 | Column Name: workclass
 28 | Column Type: object
 29 | Sample Values: ['?' 'Federal-gov' 'Local-gov' 'Never-worked' 'Private' 'Self-emp-inc'
 30 | 'Self-emp-not-inc' 'State-gov' 'Without-pay']
 31 | 
 32 | Column Name: fnlwgt
 33 | Column Type: int64
 34 | 
 35 | Column Name: education
 36 | Column Type: object
 37 | Sample Values: ['10th' '11th' '12th' '1st-4th' '5th-6th' '7th-8th' '9th' 'Assoc-acdm'
 38 | 'Assoc-voc' 'Bachelors' 'Doctorate' 'HS-grad' 'Masters' 'Preschool'
 39 | 'Prof-school' 'Some-college']
 40 | 
 41 | Column Name: education.num
 42 | Column Type: int64
 43 | 
 44 | Column Name: marital.status
 45 | Column Type: object
 46 | Sample Values: ['Divorced' 'Married-AF-spouse' 'Married-civ-spouse'
 47 | 'Married-spouse-absent' 'Never-married' 'Separated' 'Widowed']
 48 | 
 49 | Column Name: occupation
 50 | Column Type: object
 51 | Sample Values: ['?' 'Adm-clerical' 'Armed-Forces' 'Craft-repair' 'Exec-managerial'
 52 | 'Farming-fishing' 'Handlers-cleaners' 'Machine-op-inspct' 'Other-service'
 53 | 'Priv-house-serv' 'Prof-specialty' 'Protective-serv' 'Sales'
 54 | 'Tech-support' 'Transport-moving']
 55 | 
 56 | Column Name: relationship
 57 | Column Type: object
 58 | Sample Values: ['Husband' 'Not-in-family' 'Other-relative' 'Own-child' 'Unmarried' 'Wife']
 59 | 
 60 | Column Name: race
 61 | Column Type: object
 62 | Sample Values: ['Amer-Indian-Eskimo' 'Asian-Pac-Islander' 'Black' 'Other' 'White']
 63 | 
 64 | Column Name: sex
 65 | Column Type: object
 66 | Sample Values: ['Female' 'Male']
 67 | 
 68 | Column Name: capital.gain
 69 | Column Type: int64
 70 | 
 71 | Column Name: capital.loss
 72 | Column Type: int64
 73 | 
 74 | Column Name: hours.per.week
 75 | Column Type: int64
 76 | 
 77 | Column Name: native.country
 78 | Column Type: object
 79 | Sample Values: ['?' 'Cambodia' 'Canada' 'China' 'Columbia' 'Cuba' 'Dominican-Republic'
 80 | 'Ecuador' 'El-Salvador' 'England' 'France' 'Germany' 'Greece' 'Guatemala'
 81 | 'Haiti' 'Holand-Netherlands' 'Honduras' 'Hong' 'Hungary' 'India' 'Iran'
 82 | 'Ireland' 'Italy' 'Jamaica' 'Japan' 'Laos' 'Mexico' 'Nicaragua'
 83 | 'Outlying-US(Guam-USVI-etc)' 'Peru' 'Philippines' 'Poland' 'Portugal'
 84 | 'Puerto-Rico' 'Scotland' 'South' 'Taiwan' 'Thailand' 'Trinadad&Tobago'
 85 | 'United-States' 'Vietnam' 'Yugoslavia']
 86 | 
 87 | Column Name: income
 88 | Column Type: object
 89 | Sample Values: ['<=50K' '>50K']
 90 | 
 91 | ````````
 92 | 
 93 | With all of the above in mind, could you please provide me with a set of analysis steps you would recommend I run on the data to better understand what drives income inequality? Please provide a numbered list where each number is a specific analytical step. For each step, include the hypothesis you would test, what variables you'd look at, and what you'd be hoping to find.
 94 | 
 95 | Do not worry about visualizing the data, as I'd like to ensure the outputs are all things that you are able to interpret afterwards. """}
 96 | ]
 97 | 
 98 | def split_cot(cot):
 99 |     """
100 |     Takes a numbered list generated by an LLM and splits it into an array.
101 |     """
102 |     lines = cot.split("\n")
103 |     cot_steps = []
104 | 
105 |     step_text = ""
106 |     for i in range(0, len(lines)):
107 |         line = lines[i]
108 |         if len(line.strip()) > 0:
109 |             step_text += line + "\n"
110 |         else:
111 |             cot_steps.append(step_text.strip())
112 |             step_text = ""
113 | 
114 |     return cot_steps
115 | 
116 | load_dotenv()
117 | 
118 | print("Setting up chat...")
119 | 
120 | openai_api_key = os.getenv("OPENAI_API_KEY")
121 | o = OpenAIGPTWrapper(openai_api_key, 'gpt-4')
122 | c = ChatBot(o, messages[0]['content'])
123 | 
124 | print("Getting CoT...")
125 | 
126 | # Step 1, let's get a chain of thought (COT) approach to understanding the data set.
127 | response = c.chat(messages[1]['content'])
128 | save_output(response, "Chain of Thought Plan for Data Analysis")
129 | cot_steps = split_cot(response)
130 | 
131 | # Step 2, go through each COT step and ask GPT-4 to generate code.
132 | step_num = 1
133 | for step in cot_steps:
134 | 
135 |     print(f"Generating code for step {step_num}.")
136 | 
137 |     prompt = f"""You wrote the following instructions for a step:
138 | {step}
139 | 
140 | Please write the Python code for the step above. Assume the following:
141 | 1. Start your response with ```python
142 | 2. End your response with ```
143 | 3. Do not add any text outside the code. For anything that requires comment, simply add Python comments.
144 | 4. Assume the data was imported into a dataframe called `df`
145 | 5. I have imported Pandas as `pd`, Numpy as `np`, `scipy`, and `sklearn`. You can use those libraries and no others.
146 | """
147 | 
148 |     response = c.chat(prompt)
149 | 
150 |     save_output(step + "\n\n" + response, f"Code for Step #{step_num}")
151 | 
152 |     step_num += 1
153 | 
154 | print("Done!")


--------------------------------------------------------------------------------
/demos-and-products/cot-analytics/requirements.txt:
--------------------------------------------------------------------------------
1 | phasellm
2 | scikit-learn
3 | pandas
4 | numpy
5 | scipy
6 | statsmodels


--------------------------------------------------------------------------------
/demos-and-products/eval_platform/env-template.txt:
--------------------------------------------------------------------------------
1 | # LLM APIs
2 | OPENAI_API_KEY=...your OpenAI API key...


--------------------------------------------------------------------------------
/demos-and-products/eval_platform/eval_platform/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wgryc/phasellm/974d026dc649e4a71da4c25bf8c934622e56cf5d/demos-and-products/eval_platform/eval_platform/__init__.py


--------------------------------------------------------------------------------
/demos-and-products/eval_platform/eval_platform/asgi.py:
--------------------------------------------------------------------------------
 1 | """
 2 | ASGI config for eval_platform project.
 3 | 
 4 | It exposes the ASGI callable as a module-level variable named ``application``.
 5 | 
 6 | For more information on this file, see
 7 | https://docs.djangoproject.com/en/4.2/howto/deployment/asgi/
 8 | """
 9 | 
10 | import os
11 | 
12 | from django.core.asgi import get_asgi_application
13 | 
14 | os.environ.setdefault("DJANGO_SETTINGS_MODULE", "eval_platform.settings")
15 | 
16 | application = get_asgi_application()
17 | 


--------------------------------------------------------------------------------
/demos-and-products/eval_platform/eval_platform/settings.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Django settings for eval_platform project.
  3 | 
  4 | Generated by 'django-admin startproject' using Django 4.2.
  5 | 
  6 | For more information on this file, see
  7 | https://docs.djangoproject.com/en/4.2/topics/settings/
  8 | 
  9 | For the full list of settings and their values, see
 10 | https://docs.djangoproject.com/en/4.2/ref/settings/
 11 | """
 12 | 
 13 | import os
 14 | from dotenv import load_dotenv
 15 | 
 16 | load_dotenv()
 17 | OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
 18 | 
 19 | from pathlib import Path
 20 | 
 21 | # Build paths inside the project like this: BASE_DIR / 'subdir'.
 22 | BASE_DIR = Path(__file__).resolve().parent.parent
 23 | 
 24 | 
 25 | # Quick-start development settings - unsuitable for production
 26 | # See https://docs.djangoproject.com/en/4.2/howto/deployment/checklist/
 27 | 
 28 | # SECURITY WARNING: keep the secret key used in production secret!
 29 | SECRET_KEY = "django-insecure-qhwo&d2q3@p2ov)-6e8il37squqh0ji&3qvqmtciforvkekr+^"
 30 | 
 31 | # SECURITY WARNING: don't run with debug turned on in production!
 32 | DEBUG = True
 33 | 
 34 | ALLOWED_HOSTS = []
 35 | 
 36 | 
 37 | # Application definition
 38 | 
 39 | INSTALLED_APPS = [
 40 |     "django.contrib.admin",
 41 |     "django.contrib.auth",
 42 |     "django.contrib.contenttypes",
 43 |     "django.contrib.sessions",
 44 |     "django.contrib.messages",
 45 |     "django.contrib.staticfiles",
 46 |     "llmevaluator",
 47 | ]
 48 | 
 49 | MIDDLEWARE = [
 50 |     "django.middleware.security.SecurityMiddleware",
 51 |     "django.contrib.sessions.middleware.SessionMiddleware",
 52 |     "django.middleware.common.CommonMiddleware",
 53 |     "django.middleware.csrf.CsrfViewMiddleware",
 54 |     "django.contrib.auth.middleware.AuthenticationMiddleware",
 55 |     "django.contrib.messages.middleware.MessageMiddleware",
 56 |     "django.middleware.clickjacking.XFrameOptionsMiddleware",
 57 | ]
 58 | 
 59 | ROOT_URLCONF = "eval_platform.urls"
 60 | 
 61 | TEMPLATES = [
 62 |     {
 63 |         "BACKEND": "django.template.backends.django.DjangoTemplates",
 64 |         "DIRS": ["templates"],
 65 |         "APP_DIRS": True,
 66 |         "OPTIONS": {
 67 |             "context_processors": [
 68 |                 "django.template.context_processors.debug",
 69 |                 "django.template.context_processors.request",
 70 |                 "django.contrib.auth.context_processors.auth",
 71 |                 "django.contrib.messages.context_processors.messages",
 72 |             ],
 73 |         },
 74 |     },
 75 | ]
 76 | 
 77 | WSGI_APPLICATION = "eval_platform.wsgi.application"
 78 | 
 79 | 
 80 | # Database
 81 | # https://docs.djangoproject.com/en/4.2/ref/settings/#databases
 82 | 
 83 | DATABASES = {
 84 |     "default": {
 85 |         "ENGINE": "django.db.backends.sqlite3",
 86 |         "NAME": BASE_DIR / "db.sqlite3",
 87 |     }
 88 | }
 89 | 
 90 | 
 91 | # Password validation
 92 | # https://docs.djangoproject.com/en/4.2/ref/settings/#auth-password-validators
 93 | 
 94 | AUTH_PASSWORD_VALIDATORS = [
 95 |     {
 96 |         "NAME": "django.contrib.auth.password_validation.UserAttributeSimilarityValidator",
 97 |     },
 98 |     {
 99 |         "NAME": "django.contrib.auth.password_validation.MinimumLengthValidator",
100 |     },
101 |     {
102 |         "NAME": "django.contrib.auth.password_validation.CommonPasswordValidator",
103 |     },
104 |     {
105 |         "NAME": "django.contrib.auth.password_validation.NumericPasswordValidator",
106 |     },
107 | ]
108 | 
109 | 
110 | # Internationalization
111 | # https://docs.djangoproject.com/en/4.2/topics/i18n/
112 | 
113 | LANGUAGE_CODE = "en-us"
114 | 
115 | TIME_ZONE = "UTC"
116 | 
117 | USE_I18N = True
118 | 
119 | USE_TZ = True
120 | 
121 | 
122 | # Static files (CSS, JavaScript, Images)
123 | # https://docs.djangoproject.com/en/4.2/howto/static-files/
124 | 
125 | STATIC_URL = "static/"
126 | 
127 | STATICFILES_DIRS = [
128 |     BASE_DIR / "static",
129 | ]
130 | 
131 | # Default primary key field type
132 | # https://docs.djangoproject.com/en/4.2/ref/settings/#default-auto-field
133 | 
134 | DEFAULT_AUTO_FIELD = "django.db.models.BigAutoField"
135 | 


--------------------------------------------------------------------------------
/demos-and-products/eval_platform/eval_platform/urls.py:
--------------------------------------------------------------------------------
 1 | """
 2 | URL configuration for eval_platform project.
 3 | 
 4 | The `urlpatterns` list routes URLs to views. For more information please see:
 5 |     https://docs.djangoproject.com/en/4.2/topics/http/urls/
 6 | Examples:
 7 | Function views
 8 |     1. Add an import:  from my_app import views
 9 |     2. Add a URL to urlpatterns:  path('', views.home, name='home')
10 | Class-based views
11 |     1. Add an import:  from other_app.views import Home
12 |     2. Add a URL to urlpatterns:  path('', Home.as_view(), name='home')
13 | Including another URLconf
14 |     1. Import the include() function: from django.urls import include, path
15 |     2. Add a URL to urlpatterns:  path('blog/', include('blog.urls'))
16 | """
17 | from django.contrib import admin
18 | from django.urls import path
19 | from django.views.generic import TemplateView
20 | 
21 | import llmevaluator.views as lv
22 | 
23 | urlpatterns = [
24 |     path("admin/", admin.site.urls),
25 |     path("", lv.review_jobs),
26 |     path(
27 |         "import",
28 |         TemplateView.as_view(
29 |             template_name="create.html",
30 |             extra_context={"contenttitle": "Import Chat via JSON"},
31 |         ),
32 |     ),
33 |     path(
34 |         "about",
35 |         TemplateView.as_view(
36 |             template_name="aboutus.html",
37 |             extra_context={"contenttitle": "About Us"},
38 |         ),
39 |     ),
40 |     path("create_save_ma", lv.createMessageArray),
41 |     path("create_save_ma_json", lv.createMessageArrayJson),
42 |     path("groups", lv.list_groups),
43 |     path("create_group_csv", lv.createGroupFromCSV),
44 |     path("jobs", lv.list_jobs),
45 |     path("create_job", lv.createJob),
46 |     path("chats", lv.get_chats, name="list_chats"),
47 |     path("view_chat/<int:chat_id>", lv.view_chat, name="view_chat"),
48 |     path("view_chat", lv.view_chat_new),
49 |     path("update_title_via_post", lv.update_title_via_post),
50 |     path("overwrite_chat", lv.overwrite_chat),
51 |     path("delete_chat/<int:chat_id>", lv.delete_chat),
52 | ]
53 | 


--------------------------------------------------------------------------------
/demos-and-products/eval_platform/eval_platform/wsgi.py:
--------------------------------------------------------------------------------
 1 | """
 2 | WSGI config for eval_platform project.
 3 | 
 4 | It exposes the WSGI callable as a module-level variable named ``application``.
 5 | 
 6 | For more information on this file, see
 7 | https://docs.djangoproject.com/en/4.2/howto/deployment/wsgi/
 8 | """
 9 | 
10 | import os
11 | 
12 | from django.core.wsgi import get_wsgi_application
13 | 
14 | os.environ.setdefault("DJANGO_SETTINGS_MODULE", "eval_platform.settings")
15 | 
16 | application = get_wsgi_application()
17 | 


--------------------------------------------------------------------------------
/demos-and-products/eval_platform/llmevaluator/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wgryc/phasellm/974d026dc649e4a71da4c25bf8c934622e56cf5d/demos-and-products/eval_platform/llmevaluator/__init__.py


--------------------------------------------------------------------------------
/demos-and-products/eval_platform/llmevaluator/admin.py:
--------------------------------------------------------------------------------
1 | from django.contrib import admin
2 | 
3 | from .models import ChatBotMessageArray, MessageCollection, BatchLLMJob
4 | 
5 | admin.site.register(ChatBotMessageArray)
6 | admin.site.register(MessageCollection)
7 | admin.site.register(BatchLLMJob)
8 | 


--------------------------------------------------------------------------------
/demos-and-products/eval_platform/llmevaluator/apps.py:
--------------------------------------------------------------------------------
1 | from django.apps import AppConfig
2 | 
3 | 
4 | class LlmevaluatorConfig(AppConfig):
5 |     default_auto_field = "django.db.models.BigAutoField"
6 |     name = "llmevaluator"
7 | 


--------------------------------------------------------------------------------
/demos-and-products/eval_platform/llmevaluator/management/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wgryc/phasellm/974d026dc649e4a71da4c25bf8c934622e56cf5d/demos-and-products/eval_platform/llmevaluator/management/__init__.py


--------------------------------------------------------------------------------
/demos-and-products/eval_platform/llmevaluator/management/commands/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wgryc/phasellm/974d026dc649e4a71da4c25bf8c934622e56cf5d/demos-and-products/eval_platform/llmevaluator/management/commands/__init__.py


--------------------------------------------------------------------------------
/demos-and-products/eval_platform/llmevaluator/management/commands/runjobs.py:
--------------------------------------------------------------------------------
  1 | from django.core.management.base import BaseCommand
  2 | 
  3 | from llmevaluator.models import *
  4 | 
  5 | from django.conf import settings
  6 | from phasellm.llms import OpenAIGPTWrapper, ChatBot
  7 | 
  8 | 
  9 | # Returns the new ChatBotMessageArray ID
 10 | def run_llm_task_and_save(
 11 |     message_array,
 12 |     user_message,
 13 |     job_id,
 14 |     original_title="Untitled",
 15 |     model="gpt-4",
 16 |     temperature=0.7,
 17 |     print_response=True,
 18 |     new_system_prompt=None,
 19 |     resend_last_user_message=False,
 20 | ):
 21 |     o = OpenAIGPTWrapper(settings.OPENAI_API_KEY, model=model, temperature=temperature)
 22 |     cb = ChatBot(o, "")
 23 | 
 24 |     # If we want to resend the last user message *and* provide a new user message, then we'll have to ignore one of those options
 25 |     assert not (resend_last_user_message == True and len(user_message) > 0)
 26 | 
 27 |     ma_copy = message_array.copy()
 28 |     if new_system_prompt is not None:
 29 |         if len(new_system_prompt.strip()) > 0:
 30 |             # If the first message is not a system prompt, then error out.
 31 |             assert ma_copy[0]["role"] == "system"
 32 |             ma_copy[0]["content"] = new_system_prompt
 33 | 
 34 |     cb.messages = ma_copy
 35 | 
 36 |     if resend_last_user_message:
 37 |         response = cb.resend()
 38 |     else:
 39 |         response = cb.chat(user_message)
 40 | 
 41 |     new_cbma = ChatBotMessageArray(
 42 |         message_array=cb.messages,
 43 |         source_batch_job_id=job_id,
 44 |         title=f"{original_title} w/ T={temperature}, model={model}",
 45 |     )
 46 | 
 47 |     new_cbma.llm_temperature = temperature
 48 |     new_cbma.llm_model = model
 49 | 
 50 |     new_cbma.save()
 51 | 
 52 |     if print_response:
 53 |         print(response)
 54 | 
 55 |     return new_cbma
 56 | 
 57 | 
 58 | def run_job(job):
 59 |     print(f"Starting job: {job.title}")
 60 | 
 61 |     mc = MessageCollection.objects.get(id=job.message_collection_id)
 62 |     chat_ids_string = mc.chat_ids
 63 |     chat_ids = chat_ids_string.strip().split(",")
 64 | 
 65 |     results_ids = []
 66 |     results_to_append = []
 67 | 
 68 |     for _cid in chat_ids:
 69 |         print(f"Analyzing chat ID: {_cid}")
 70 | 
 71 |         cid = int(_cid)
 72 |         cbma = ChatBotMessageArray.objects.get(id=cid)
 73 | 
 74 |         # SETTING: run_n_times
 75 |         run_n_times = job.run_n_times
 76 |         for i in range(0, run_n_times):
 77 |             # SETTING: include_gpt_4
 78 |             if job.include_gpt_4:
 79 |                 if job.temperature_range:
 80 |                     for t in [0.25, 0.75, 1.25]:
 81 |                         nc = run_llm_task_and_save(
 82 |                             cbma.message_array.copy(),
 83 |                             job.user_message,
 84 |                             job.id,
 85 |                             cbma.title,
 86 |                             model="gpt-4",
 87 |                             temperature=t,
 88 |                             new_system_prompt=job.new_system_prompt,
 89 |                             resend_last_user_message=job.resend_last_user_message,
 90 |                         )
 91 |                         results_ids.append(str(nc.id))
 92 |                         results_to_append.append(nc)
 93 |                 else:
 94 |                     nc = run_llm_task_and_save(
 95 |                         cbma.message_array.copy(),
 96 |                         job.user_message,
 97 |                         job.id,
 98 |                         cbma.title,
 99 |                         "gpt-4",
100 |                         new_system_prompt=job.new_system_prompt,
101 |                         resend_last_user_message=job.resend_last_user_message,
102 |                     )
103 |                     results_ids.append(str(nc.id))
104 |                     results_to_append.append(nc)
105 | 
106 |             # SETTING: include_gpt_35
107 |             if job.include_gpt_35:
108 |                 if job.temperature_range:
109 |                     for t in [0.25, 0.75, 1.25]:
110 |                         nc = run_llm_task_and_save(
111 |                             cbma.message_array.copy(),
112 |                             job.user_message,
113 |                             job.id,
114 |                             cbma.title,
115 |                             model="gpt-3.5-turbo",
116 |                             temperature=t,
117 |                             new_system_prompt=job.new_system_prompt,
118 |                             resend_last_user_message=job.resend_last_user_message,
119 |                         )
120 |                         results_ids.append(str(nc.id))
121 |                         results_to_append.append(nc)
122 |                 else:
123 |                     nc = run_llm_task_and_save(
124 |                         cbma.message_array.copy(),
125 |                         job.user_message,
126 |                         job.id,
127 |                         cbma.title,
128 |                         "gpt-3.5-turbo",
129 |                         new_system_prompt=job.new_system_prompt,
130 |                         resend_last_user_message=job.resend_last_user_message,
131 |                     )
132 |                     results_ids.append(str(nc.id))
133 |                     results_to_append.append(nc)
134 | 
135 |     new_chats_str = ",".join(results_ids)
136 |     results_mc = MessageCollection(
137 |         title=f"Results from '{job.title}' job",
138 |         chat_ids=new_chats_str,
139 |         source_collection_id=mc.id,
140 |         source_batch_job_id=job.id,
141 |     )
142 |     results_mc.save()
143 | 
144 |     for r in results_to_append:
145 |         results_mc.chats.add(r)
146 |     results_mc.save()
147 | 
148 |     job.status = "complete"
149 |     job.results_array = results_mc
150 |     job.save()
151 | 
152 |     print("Done!")
153 | 
154 | 
155 | class Command(BaseCommand):
156 |     help = "Runs all scheduled batch jobs."
157 | 
158 |     def handle(self, *args, **options):
159 |         jobs = BatchLLMJob.objects.filter(status="scheduled")
160 |         for job in jobs:
161 |             run_job(job)
162 | 


--------------------------------------------------------------------------------
/demos-and-products/eval_platform/llmevaluator/migrations/0001_initial.py:
--------------------------------------------------------------------------------
 1 | # Generated by Django 4.2 on 2023-09-24 16:39
 2 | 
 3 | from django.db import migrations, models
 4 | 
 5 | 
 6 | class Migration(migrations.Migration):
 7 |     initial = True
 8 | 
 9 |     dependencies = []
10 | 
11 |     operations = [
12 |         migrations.CreateModel(
13 |             name="ChatBotMessageArray",
14 |             fields=[
15 |                 (
16 |                     "id",
17 |                     models.BigAutoField(
18 |                         auto_created=True,
19 |                         primary_key=True,
20 |                         serialize=False,
21 |                         verbose_name="ID",
22 |                     ),
23 |                 ),
24 |                 ("created_at", models.DateTimeField(auto_now_add=True)),
25 |                 ("updated_at", models.DateTimeField(auto_now=True)),
26 |                 ("message_array", models.JSONField(default=dict)),
27 |                 ("comments", models.TextField(blank=True, default="", null=True)),
28 |             ],
29 |         ),
30 |         migrations.CreateModel(
31 |             name="MessageCollection",
32 |             fields=[
33 |                 (
34 |                     "id",
35 |                     models.BigAutoField(
36 |                         auto_created=True,
37 |                         primary_key=True,
38 |                         serialize=False,
39 |                         verbose_name="ID",
40 |                     ),
41 |                 ),
42 |                 ("created_at", models.DateTimeField(auto_now_add=True)),
43 |                 ("updated_at", models.DateTimeField(auto_now=True)),
44 |                 ("title", models.TextField(blank=True, default="", null=True)),
45 |                 ("chat_ids", models.TextField(blank=True, default="", null=True)),
46 |             ],
47 |         ),
48 |     ]
49 | 


--------------------------------------------------------------------------------
/demos-and-products/eval_platform/llmevaluator/migrations/0002_batchllmjob.py:
--------------------------------------------------------------------------------
 1 | # Generated by Django 4.2 on 2023-09-26 18:50
 2 | 
 3 | from django.db import migrations, models
 4 | 
 5 | 
 6 | class Migration(migrations.Migration):
 7 |     dependencies = [
 8 |         ("llmevaluator", "0001_initial"),
 9 |     ]
10 | 
11 |     operations = [
12 |         migrations.CreateModel(
13 |             name="BatchLLMJob",
14 |             fields=[
15 |                 (
16 |                     "id",
17 |                     models.BigAutoField(
18 |                         auto_created=True,
19 |                         primary_key=True,
20 |                         serialize=False,
21 |                         verbose_name="ID",
22 |                     ),
23 |                 ),
24 |                 ("created_at", models.DateTimeField(auto_now_add=True)),
25 |                 ("updated_at", models.DateTimeField(auto_now=True)),
26 |                 ("title", models.TextField(blank=True, default="", null=True)),
27 |                 ("message_collection_id", models.IntegerField()),
28 |                 ("user_message", models.TextField(blank=True, default="", null=True)),
29 |                 (
30 |                     "status",
31 |                     models.TextField(blank=True, default="scheduled", null=True),
32 |                 ),
33 |             ],
34 |         ),
35 |     ]
36 | 


--------------------------------------------------------------------------------
/demos-and-products/eval_platform/llmevaluator/migrations/0003_chatbotmessagearray_source_batch_job_id_and_more.py:
--------------------------------------------------------------------------------
 1 | # Generated by Django 4.2 on 2023-09-28 14:54
 2 | 
 3 | from django.db import migrations, models
 4 | 
 5 | 
 6 | class Migration(migrations.Migration):
 7 |     dependencies = [
 8 |         ("llmevaluator", "0002_batchllmjob"),
 9 |     ]
10 | 
11 |     operations = [
12 |         migrations.AddField(
13 |             model_name="chatbotmessagearray",
14 |             name="source_batch_job_id",
15 |             field=models.IntegerField(null=True),
16 |         ),
17 |         migrations.AddField(
18 |             model_name="messagecollection",
19 |             name="source_batch_job_id",
20 |             field=models.IntegerField(null=True),
21 |         ),
22 |         migrations.AddField(
23 |             model_name="messagecollection",
24 |             name="source_collection_id",
25 |             field=models.IntegerField(null=True),
26 |         ),
27 |     ]
28 | 


--------------------------------------------------------------------------------
/demos-and-products/eval_platform/llmevaluator/migrations/0004_alter_chatbotmessagearray_message_array.py:
--------------------------------------------------------------------------------
 1 | # Generated by Django 4.2 on 2023-09-28 19:42
 2 | 
 3 | import django.core.serializers.json
 4 | from django.db import migrations, models
 5 | 
 6 | 
 7 | class Migration(migrations.Migration):
 8 |     dependencies = [
 9 |         ("llmevaluator", "0003_chatbotmessagearray_source_batch_job_id_and_more"),
10 |     ]
11 | 
12 |     operations = [
13 |         migrations.AlterField(
14 |             model_name="chatbotmessagearray",
15 |             name="message_array",
16 |             field=models.JSONField(
17 |                 default=dict, encoder=django.core.serializers.json.DjangoJSONEncoder
18 |             ),
19 |         ),
20 |     ]
21 | 


--------------------------------------------------------------------------------
/demos-and-products/eval_platform/llmevaluator/migrations/0005_alter_chatbotmessagearray_source_batch_job_id_and_more.py:
--------------------------------------------------------------------------------
 1 | # Generated by Django 4.2 on 2023-09-29 15:22
 2 | 
 3 | from django.db import migrations, models
 4 | 
 5 | 
 6 | class Migration(migrations.Migration):
 7 |     dependencies = [
 8 |         ("llmevaluator", "0004_alter_chatbotmessagearray_message_array"),
 9 |     ]
10 | 
11 |     operations = [
12 |         migrations.AlterField(
13 |             model_name="chatbotmessagearray",
14 |             name="source_batch_job_id",
15 |             field=models.IntegerField(blank=True, null=True),
16 |         ),
17 |         migrations.AlterField(
18 |             model_name="messagecollection",
19 |             name="source_batch_job_id",
20 |             field=models.IntegerField(blank=True, null=True),
21 |         ),
22 |         migrations.AlterField(
23 |             model_name="messagecollection",
24 |             name="source_collection_id",
25 |             field=models.IntegerField(blank=True, null=True),
26 |         ),
27 |     ]
28 | 


--------------------------------------------------------------------------------
/demos-and-products/eval_platform/llmevaluator/migrations/0006_batchllmjob_tags_chatbotmessagearray_tags_and_more.py:
--------------------------------------------------------------------------------
 1 | # Generated by Django 4.2 on 2023-09-29 18:52
 2 | 
 3 | from django.db import migrations, models
 4 | 
 5 | 
 6 | class Migration(migrations.Migration):
 7 |     dependencies = [
 8 |         ("llmevaluator", "0005_alter_chatbotmessagearray_source_batch_job_id_and_more"),
 9 |     ]
10 | 
11 |     operations = [
12 |         migrations.AddField(
13 |             model_name="batchllmjob",
14 |             name="tags",
15 |             field=models.TextField(blank=True, default="", null=True),
16 |         ),
17 |         migrations.AddField(
18 |             model_name="chatbotmessagearray",
19 |             name="tags",
20 |             field=models.TextField(blank=True, default="", null=True),
21 |         ),
22 |         migrations.AddField(
23 |             model_name="messagecollection",
24 |             name="tags",
25 |             field=models.TextField(blank=True, default="", null=True),
26 |         ),
27 |     ]
28 | 


--------------------------------------------------------------------------------
/demos-and-products/eval_platform/llmevaluator/migrations/0007_chatbotmessagearray_title.py:
--------------------------------------------------------------------------------
 1 | # Generated by Django 4.2 on 2023-09-30 16:34
 2 | 
 3 | from django.db import migrations, models
 4 | 
 5 | 
 6 | class Migration(migrations.Migration):
 7 |     dependencies = [
 8 |         ("llmevaluator", "0006_batchllmjob_tags_chatbotmessagearray_tags_and_more"),
 9 |     ]
10 | 
11 |     operations = [
12 |         migrations.AddField(
13 |             model_name="chatbotmessagearray",
14 |             name="title",
15 |             field=models.TextField(blank=True, default="Untitled"),
16 |         ),
17 |     ]
18 | 


--------------------------------------------------------------------------------
/demos-and-products/eval_platform/llmevaluator/migrations/0008_batchllmjob_include_gpt_35_batchllmjob_include_gpt_4_and_more.py:
--------------------------------------------------------------------------------
 1 | # Generated by Django 4.2 on 2023-10-09 13:37
 2 | 
 3 | from django.db import migrations, models
 4 | 
 5 | 
 6 | class Migration(migrations.Migration):
 7 |     dependencies = [
 8 |         ("llmevaluator", "0007_chatbotmessagearray_title"),
 9 |     ]
10 | 
11 |     operations = [
12 |         migrations.AddField(
13 |             model_name="batchllmjob",
14 |             name="include_gpt_35",
15 |             field=models.BooleanField(default=False),
16 |         ),
17 |         migrations.AddField(
18 |             model_name="batchllmjob",
19 |             name="include_gpt_4",
20 |             field=models.BooleanField(default=True),
21 |         ),
22 |         migrations.AddField(
23 |             model_name="batchllmjob",
24 |             name="run_n_times",
25 |             field=models.IntegerField(default=1),
26 |         ),
27 |         migrations.AddField(
28 |             model_name="batchllmjob",
29 |             name="temperature_range",
30 |             field=models.BooleanField(default=False),
31 |         ),
32 |     ]
33 | 


--------------------------------------------------------------------------------
/demos-and-products/eval_platform/llmevaluator/migrations/0009_batchllmjob_new_system_prompt_and_more.py:
--------------------------------------------------------------------------------
 1 | # Generated by Django 4.2 on 2023-10-10 16:23
 2 | 
 3 | from django.db import migrations, models
 4 | 
 5 | 
 6 | class Migration(migrations.Migration):
 7 |     dependencies = [
 8 |         (
 9 |             "llmevaluator",
10 |             "0008_batchllmjob_include_gpt_35_batchllmjob_include_gpt_4_and_more",
11 |         ),
12 |     ]
13 | 
14 |     operations = [
15 |         migrations.AddField(
16 |             model_name="batchllmjob",
17 |             name="new_system_prompt",
18 |             field=models.TextField(blank=True, default="", null=True),
19 |         ),
20 |         migrations.AddField(
21 |             model_name="chatbotmessagearray",
22 |             name="llm_model",
23 |             field=models.TextField(blank=True, default="None", null=True),
24 |         ),
25 |         migrations.AddField(
26 |             model_name="chatbotmessagearray",
27 |             name="llm_temperature",
28 |             field=models.FloatField(blank=True, null=True),
29 |         ),
30 |     ]
31 | 


--------------------------------------------------------------------------------
/demos-and-products/eval_platform/llmevaluator/migrations/0010_batchllmjob_resend_last_user_message.py:
--------------------------------------------------------------------------------
 1 | # Generated by Django 4.2 on 2023-10-11 06:30
 2 | 
 3 | from django.db import migrations, models
 4 | 
 5 | 
 6 | class Migration(migrations.Migration):
 7 |     dependencies = [
 8 |         ("llmevaluator", "0009_batchllmjob_new_system_prompt_and_more"),
 9 |     ]
10 | 
11 |     operations = [
12 |         migrations.AddField(
13 |             model_name="batchllmjob",
14 |             name="resend_last_user_message",
15 |             field=models.BooleanField(default=False),
16 |         ),
17 |     ]
18 | 


--------------------------------------------------------------------------------
/demos-and-products/eval_platform/llmevaluator/migrations/0011_batchllmjob_description.py:
--------------------------------------------------------------------------------
 1 | # Generated by Django 4.2 on 2023-10-11 10:44
 2 | 
 3 | from django.db import migrations, models
 4 | 
 5 | 
 6 | class Migration(migrations.Migration):
 7 |     dependencies = [
 8 |         ("llmevaluator", "0010_batchllmjob_resend_last_user_message"),
 9 |     ]
10 | 
11 |     operations = [
12 |         migrations.AddField(
13 |             model_name="batchllmjob",
14 |             name="description",
15 |             field=models.TextField(blank=True, null=True),
16 |         ),
17 |     ]
18 | 


--------------------------------------------------------------------------------
/demos-and-products/eval_platform/llmevaluator/migrations/0012_batchllmjob_message_collection_ref.py:
--------------------------------------------------------------------------------
 1 | # Generated by Django 4.2 on 2023-10-11 10:52
 2 | 
 3 | from django.db import migrations, models
 4 | import django.db.models.deletion
 5 | 
 6 | 
 7 | class Migration(migrations.Migration):
 8 |     dependencies = [
 9 |         ("llmevaluator", "0011_batchllmjob_description"),
10 |     ]
11 | 
12 |     operations = [
13 |         migrations.AddField(
14 |             model_name="batchllmjob",
15 |             name="message_collection_ref",
16 |             field=models.ForeignKey(
17 |                 null=True,
18 |                 on_delete=django.db.models.deletion.SET_NULL,
19 |                 to="llmevaluator.messagecollection",
20 |             ),
21 |         ),
22 |     ]
23 | 


--------------------------------------------------------------------------------
/demos-and-products/eval_platform/llmevaluator/migrations/0013_batchllmjob_results_array_and_more.py:
--------------------------------------------------------------------------------
 1 | # Generated by Django 4.2 on 2023-10-11 11:05
 2 | 
 3 | from django.db import migrations, models
 4 | import django.db.models.deletion
 5 | 
 6 | 
 7 | class Migration(migrations.Migration):
 8 |     dependencies = [
 9 |         ("llmevaluator", "0012_batchllmjob_message_collection_ref"),
10 |     ]
11 | 
12 |     operations = [
13 |         migrations.AddField(
14 |             model_name="batchllmjob",
15 |             name="results_array",
16 |             field=models.ForeignKey(
17 |                 null=True,
18 |                 on_delete=django.db.models.deletion.SET_NULL,
19 |                 related_name="results_collection",
20 |                 to="llmevaluator.messagecollection",
21 |             ),
22 |         ),
23 |         migrations.AlterField(
24 |             model_name="batchllmjob",
25 |             name="message_collection_ref",
26 |             field=models.ForeignKey(
27 |                 null=True,
28 |                 on_delete=django.db.models.deletion.SET_NULL,
29 |                 related_name="source_messages_collection",
30 |                 to="llmevaluator.messagecollection",
31 |             ),
32 |         ),
33 |     ]
34 | 


--------------------------------------------------------------------------------
/demos-and-products/eval_platform/llmevaluator/migrations/0014_messagecollection_chats.py:
--------------------------------------------------------------------------------
 1 | # Generated by Django 4.2 on 2023-10-11 13:03
 2 | 
 3 | from django.db import migrations, models
 4 | 
 5 | 
 6 | class Migration(migrations.Migration):
 7 |     dependencies = [
 8 |         ("llmevaluator", "0013_batchllmjob_results_array_and_more"),
 9 |     ]
10 | 
11 |     operations = [
12 |         migrations.AddField(
13 |             model_name="messagecollection",
14 |             name="chats",
15 |             field=models.ManyToManyField(
16 |                 blank=True, null=True, to="llmevaluator.chatbotmessagearray"
17 |             ),
18 |         ),
19 |     ]
20 | 


--------------------------------------------------------------------------------
/demos-and-products/eval_platform/llmevaluator/migrations/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wgryc/phasellm/974d026dc649e4a71da4c25bf8c934622e56cf5d/demos-and-products/eval_platform/llmevaluator/migrations/__init__.py


--------------------------------------------------------------------------------
/demos-and-products/eval_platform/llmevaluator/models.py:
--------------------------------------------------------------------------------
 1 | from django.db import models
 2 | from django.core.serializers.json import DjangoJSONEncoder
 3 | 
 4 | 
 5 | def object_has_tag(model_object, tag_string):
 6 |     tags = model_object.tags.split(",")
 7 |     for tag in tags:
 8 |         if tag.strip() == tag_string:
 9 |             return True
10 |     return False
11 | 
12 | 
13 | class ChatBotMessageArray(models.Model):
14 |     created_at = models.DateTimeField(auto_now_add=True)
15 |     updated_at = models.DateTimeField(auto_now=True)
16 |     message_array = models.JSONField(default=dict, encoder=DjangoJSONEncoder)
17 |     comments = models.TextField(default="", null=True, blank=True)
18 |     source_batch_job_id = models.IntegerField(null=True, blank=True)
19 |     tags = models.TextField(default="", null=True, blank=True)
20 |     title = models.TextField(default="Untitled", blank=True)
21 | 
22 |     # LLM settings for review, later
23 |     llm_model = models.TextField(default="None", blank=True, null=True)
24 |     llm_temperature = models.FloatField(null=True, blank=True)
25 | 
26 |     def __str__(self):
27 |         return f"ChatBotMessage (ID {self.id}), {self.title}"
28 | 
29 | 
30 | class MessageCollection(models.Model):
31 |     created_at = models.DateTimeField(auto_now_add=True)
32 |     updated_at = models.DateTimeField(auto_now=True)
33 |     title = models.TextField(default="", null=True, blank=True)
34 | 
35 |     # Note: we should use an ArrayField or JSONField or a ManyToManyField if we scale this up.
36 |     # However, to keep things very simple and supportable in SQLite, we'll assume the chat_ids are in a comma-separated string for now. We'll do some basic validation when saving via the front-end.
37 |     chat_ids = models.TextField(default="", null=True, blank=True)
38 |     chats = models.ManyToManyField(ChatBotMessageArray, blank=True)
39 | 
40 |     # We can save source collections in cases where we have batch jobs run.
41 |     source_collection_id = models.IntegerField(null=True, blank=True)
42 |     source_batch_job_id = models.IntegerField(null=True, blank=True)
43 |     tags = models.TextField(default="", null=True, blank=True)
44 | 
45 |     def __str__(self):
46 |         return f"MessageCollection (ID {self.id}), {self.title}"
47 | 
48 | 
49 | class BatchLLMJob(models.Model):
50 |     created_at = models.DateTimeField(auto_now_add=True)
51 |     updated_at = models.DateTimeField(auto_now=True)
52 |     title = models.TextField(default="", null=True, blank=True)
53 |     description = models.TextField(null=True, blank=True)
54 |     message_collection_id = models.IntegerField()
55 |     message_collection_ref = models.ForeignKey(
56 |         MessageCollection,
57 |         on_delete=models.SET_NULL,
58 |         null=True,
59 |         related_name="source_messages_collection",
60 |     )
61 |     results_array = models.ForeignKey(
62 |         MessageCollection,
63 |         on_delete=models.SET_NULL,
64 |         null=True,
65 |         related_name="results_collection",
66 |     )
67 | 
68 |     # scheduled, complete
69 |     status = models.TextField(default="scheduled", null=True, blank=True)
70 |     tags = models.TextField(default="", null=True, blank=True)
71 | 
72 |     # settings
73 |     # By default we only run the LLM on GPT-4 with a user message. The
74 |     # settings below let you do other things.
75 | 
76 |     # Messages
77 |     user_message = models.TextField(default="", null=True, blank=True)
78 |     new_system_prompt = models.TextField(default="", null=True, blank=True)
79 |     resend_last_user_message = models.BooleanField(default=False)
80 | 
81 |     # Repeat the run 'n' times
82 |     run_n_times = models.IntegerField(default=1)
83 | 
84 |     # Which LLM models to run
85 |     include_gpt_4 = models.BooleanField(default=True)
86 |     include_gpt_35 = models.BooleanField(default=False)
87 | 
88 |     # Run temperature tests; True = run across 0.25 to 1.75 with 0.5 increments
89 |     temperature_range = models.BooleanField(default=False)
90 | 
91 |     def __str__(self):
92 |         return f"Batch LLM Job (ID {self.id}), {self.title}"
93 | 


--------------------------------------------------------------------------------
/demos-and-products/eval_platform/llmevaluator/tests.py:
--------------------------------------------------------------------------------
1 | from django.test import TestCase
2 | 
3 | # Create your tests here.
4 | 


--------------------------------------------------------------------------------
/demos-and-products/eval_platform/manage.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | """Django's command-line utility for administrative tasks."""
 3 | import os
 4 | import sys
 5 | 
 6 | 
 7 | def main():
 8 |     """Run administrative tasks."""
 9 |     os.environ.setdefault("DJANGO_SETTINGS_MODULE", "eval_platform.settings")
10 |     try:
11 |         from django.core.management import execute_from_command_line
12 |     except ImportError as exc:
13 |         raise ImportError(
14 |             "Couldn't import Django. Are you sure it's installed and "
15 |             "available on your PYTHONPATH environment variable? Did you "
16 |             "forget to activate a virtual environment?"
17 |         ) from exc
18 |     execute_from_command_line(sys.argv)
19 | 
20 | 
21 | if __name__ == "__main__":
22 |     main()
23 | 


--------------------------------------------------------------------------------
/demos-and-products/eval_platform/readme.md:
--------------------------------------------------------------------------------
 1 | # PhaseLLM Evaluation
 2 | 
 3 | *PhaseLLM Evaluation* helps you run batch jobs across LLMs. Think of it as a playground where you can easily run multiple LLM calls across different models.
 4 | 
 5 | Example use cases:
 6 | - Run the same set of messages `n` times to see how responses differ.
 7 | - Run messages across different models (e.g., GPT-4 and GPT-3.5) to see performance differences.
 8 | - Replace or update system prompts across multiple chats to see if they have an impact on responses.
 9 | 
10 | [5-minute demo below:](https://www.youtube.com/watch?v=Ycu2eKkCO7Y)
11 | [![PhaseLLM Evaluation screenshot](screenshot.png)](https://www.youtube.com/watch?v=Ycu2eKkCO7Y)
12 | 
13 | ## Installation and Running
14 | 
15 | Please follow the step below to run *PhaseLLM Evaluation*.
16 | 
17 | Run the code below in the `eval_platform` directory.
18 | 
19 | ```bash
20 | pip3 install -r requirements.txt
21 | python3 manage.py migrate
22 | ```
23 | 
24 | The code above will install `phasellm` and `Django`, and set up the relevant SQLite database.
25 | 
26 | Update the `env_template.txt` file with your OpenAI API key and save it to `.env`.
27 | 
28 | Finally, to run the server, type the following:
29 | ```bash
30 | python3 manage.py runserver
31 | ```
32 | 
33 | You'll then be able to navigate to `http://localhost:8000` and run your evaluations.
34 | 
35 | ## Running Batch Jobs
36 | 
37 | Once you've created the proper chats, chat groups, and jobs, open a second terminal window and type the following in your `eval_platform` directory:
38 | 
39 | ```bash
40 | python3 manage.py runjobs
41 | ```
42 | 
43 | This is a custom Django job that will run your jobs. The outputs will be printed in the terminal, btu will also be saved in the front-end.
44 | 
45 | ## Hosting
46 | 
47 | Want us to host the *Evaluation* demo product for you? Please reach out to us at w [at] phaseai [dot] com
48 | 
49 | ## Feedback?
50 | 
51 | Any feedback is welcome. Please reach out to w [at] phaseai [dot] com and we'll get back to you as soon as we can!
52 | 


--------------------------------------------------------------------------------
/demos-and-products/eval_platform/requirements.txt:
--------------------------------------------------------------------------------
1 | Django==4.2
2 | phasellm>=0.0.17,<0.1.0
3 | 


--------------------------------------------------------------------------------
/demos-and-products/eval_platform/screenshot.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wgryc/phasellm/974d026dc649e4a71da4c25bf8c934622e56cf5d/demos-and-products/eval_platform/screenshot.png


--------------------------------------------------------------------------------
/demos-and-products/eval_platform/static/main.css:
--------------------------------------------------------------------------------
  1 | /**
  2 |  * DEFAULT AND UNIVERSAL VALUES
  3 |  */
  4 | 
  5 | * {
  6 |     margin: 0;
  7 |     padding: 0;
  8 |     font-family: 'Open Sans', sans-serif;
  9 |     font-weight: 200;
 10 |     font-size: 15px;
 11 |     box-sizing: border-box;
 12 | }
 13 | 
 14 | :root {
 15 |     --standard-margin-spacing-text: 20px;
 16 |     --internal-standard-padding: 5px;
 17 |     --standard-border-radius: 5px;
 18 |     --lightgray-borders-backgrounds: rgb(235, 235, 235);
 19 | }
 20 | 
 21 | b {
 22 |     font-weight: 900;
 23 | }
 24 | 
 25 | ul {
 26 |     margin-left: var(--standard-margin-spacing-text);
 27 | }
 28 | 
 29 | a {
 30 |     text-decoration: none;
 31 |     color: #4682B4;
 32 |     font-weight: 500;
 33 | }
 34 | 
 35 | /**
 36 |  * TWO COLUMN CONTAINER TEST
 37 |  */
 38 | 
 39 | .two-col-content-container {
 40 |     display: grid;
 41 |     grid-template-columns: 50% 1fr;
 42 |     column-gap: calc(3*var(--internal-standard-padding));
 43 |     height: 100%;
 44 |     overflow: hidden;
 45 | }
 46 | 
 47 | .two-col-content-left {
 48 |     background-color: white;
 49 |     overflow: auto;
 50 |     padding: calc(2*var(--internal-standard-padding));
 51 | }
 52 | 
 53 | .two-col-content-right {
 54 |     background-color: white;
 55 |     overflow: auto;
 56 |     padding: calc(2*var(--internal-standard-padding));
 57 | }
 58 | 
 59 | /**
 60 |  * TWO ROW CONTAINER TEST
 61 |  */
 62 | 
 63 | .two-row-container {
 64 |     display: grid;
 65 |     grid-template-rows: auto auto;
 66 |     row-gap: var(--internal-standard-padding);
 67 |     height: 100%;
 68 | }
 69 | 
 70 | .two-row-top-row {
 71 |     background-color: white;
 72 |     padding-bottom: calc(3*var(--internal-standard-padding));
 73 |     border-bottom: 1px solid var(--lightgray-borders-backgrounds);
 74 | }
 75 | 
 76 | .two-row-bottom-row {
 77 |     background-color: white;
 78 | }
 79 | 
 80 | /**
 81 |  * EVERYTHING ELSE
 82 |  */
 83 | 
 84 | .two-col-container {
 85 |     display: grid;
 86 |     grid-template-columns: 200px 1fr;
 87 | }
 88 | 
 89 | #left-menu {
 90 |     height: 100vh;
 91 |     background-color: white;
 92 |     overflow: hidden;
 93 |     padding: 15px;
 94 |     border-right: 1px solid lightgray;
 95 | }
 96 | 
 97 | #navlogo {
 98 |     font-family: 'Playfair Display', serif;
 99 |     font-size: 25px;
100 |     font-weight: 300;
101 |     letter-spacing: 1px;
102 |     display: block;
103 | }
104 | 
105 | #navlogo_sub {
106 |     font-family: 'Playfair Display', serif;
107 |     font-size: 15px;
108 |     font-weight: 600;
109 |     letter-spacing: 2px;
110 |     display: block;
111 |     color: gray;
112 | }
113 | 
114 | .navlink {
115 |     display: block;
116 |     margin-top: 10px;
117 |     cursor: pointer;
118 |     text-decoration: none;
119 |     color: black;
120 |     font-weight: 100;
121 | }
122 | 
123 | .navlink:first-of-type {
124 |     margin-top: 25px;
125 | }
126 | 
127 | .navlink .navicon {
128 |     margin-right: 10px;
129 | }
130 | 
131 | #main-content {
132 |     height: 100vh;
133 |     background-color: white;
134 |     overflow: auto;
135 |     padding: 15px;
136 | }
137 | 
138 | .content-title {
139 |     font-weight: 200;
140 |     font-size: 25px;
141 |     padding-bottom: var(--standard-margin-spacing-text);
142 | }
143 | 
144 | input,
145 | textarea,
146 | .formfield {
147 |     margin: 0 0 var(--standard-margin-spacing-text) 0;
148 |     padding: var(--internal-standard-padding);
149 |     border-radius: var(--standard-border-radius);
150 |     border: 1px solid lightgray;
151 | }
152 | 
153 | .formfield-hover {
154 |     cursor: pointer;
155 | }
156 | 
157 | .formfield-hover:hover {
158 |     background-color: var(--lightgray-borders-backgrounds);
159 | }
160 | 
161 | .error_message {
162 |     color: crimson;
163 | }
164 | 
165 | .job_info_container {
166 |     padding: calc(2*var(--internal-standard-padding));
167 |     border-radius: var(--standard-border-radius);
168 |     background-color: var(--lightgray-borders-backgrounds);
169 |     margin: var(--internal-standard-padding) 0 var(--internal-standard-padding) 0;
170 |     display: grid;
171 |     grid-template-columns: 33% 33% 1fr;
172 | }
173 | 
174 | .job_info_container div {
175 |     margin-right: var(--internal-standard-padding);
176 | }
177 | 
178 | .jobtitle {
179 |     font-weight: 900;
180 |     margin: calc(2*var(--internal-standard-padding)) 0 calc(2*var(--internal-standard-padding)) 0;
181 | }
182 | 
183 | .general-list-container {
184 |     padding: calc(2*var(--internal-standard-padding));
185 |     border-radius: var(--standard-border-radius);
186 |     background-color: var(--lightgray-borders-backgrounds);
187 |     margin: var(--internal-standard-padding) 0 var(--internal-standard-padding) 0;
188 | }
189 | 
190 | .tag-label-green {
191 |     display: inline-block;
192 |     padding: var(--internal-standard-padding);
193 |     border-radius: var(--standard-border-radius);
194 |     background-color: #2E8B57;
195 |     color: white;
196 |     font-weight: 600;
197 |     font-size: 12px;
198 | }
199 | 
200 | .tag-label-blue {
201 |     display: inline-block;
202 |     padding: var(--internal-standard-padding);
203 |     border-radius: var(--standard-border-radius);
204 |     background-color: #4682B4;
205 |     color: white;
206 |     font-weight: 600;
207 |     font-size: 12px;
208 | }
209 | 
210 | .delete-icon {
211 |     margin: 0 var(--internal-standard-padding) 0 var(--internal-standard-padding);
212 |     cursor: pointer;
213 | }


--------------------------------------------------------------------------------
/demos-and-products/eval_platform/templates/aboutus.html:
--------------------------------------------------------------------------------
 1 | {% extends 'base-navigation.html' %}
 2 | 
 3 | {% block bodycontent %}
 4 | 
 5 | <p>The <i>PhaseLLM Evaluation</i> project is built by Phase AI. You can learn about the PhaseLLM package by visiting <a
 6 |         href="https://phasellm.com">phasellm.com</a>. Learn more about Phase AI at <a
 7 |         href="https://phaseai.com">phaseai.com</a>.</p>
 8 | 
 9 | <p>&nbsp;</p>
10 | 
11 | <p>If you have questions about this LLM evaluation project, you can also email w [at] phaseai [dot] com.</p>
12 | 
13 | {% endblock %}


--------------------------------------------------------------------------------
/demos-and-products/eval_platform/templates/base-navigation-two-cols.html:
--------------------------------------------------------------------------------
 1 | {% extends 'base.html' %}
 2 | 
 3 | {% block content %}
 4 | 
 5 | <div class="two-col-container">
 6 | 
 7 |     <div id="left-menu">
 8 |         <span id="navlogo">PhaseLLM</span>
 9 |         <span id="navlogo_sub">EVALUATION</span>
10 |         <a class="navlink" href="/"><span class="navicon">🏠</span>Home</a>
11 |         <a class="navlink" href="/chats"><span class="navicon">💬</span>Chat Reviews</a>
12 |         <a class="navlink" href="/view_chat"><span class="navicon">📖</span>New Chat</a>
13 |         <a class="navlink" href="/import"><span class="navicon">📋</span>Paste JSON</a>
14 |         <a class="navlink" href="/groups"><span class="navicon">📚</span>Chat Groups</a>
15 |         <a class="navlink" href="/jobs"><span class="navicon">🤖</span>Create Job</a>
16 |         <a class="navlink" href="/about"><span class="navicon">👋</span>About Us</a>
17 |     </div>
18 |     <div id="main-content">
19 |         <div class="two-col-content-container">
20 |             <div class="two-col-content-left">
21 |                 <p class='content-title'>{{ contenttitle|default:"LLM Evaluator" }}</p>
22 |                 {% block bodycontent %}
23 |                 {% endblock %}
24 |             </div>
25 |             <div class="two-col-content-right">
26 |                 <p class='content-title'>{{ contenttitle2 }}</p>
27 |                 {% block bodycontent2 %}
28 |                 {% endblock %}
29 |             </div>
30 |         </div>
31 |     </div>
32 | 
33 | </div>
34 | 
35 | {% endblock %}


--------------------------------------------------------------------------------
/demos-and-products/eval_platform/templates/base-navigation-two-rows.html:
--------------------------------------------------------------------------------
 1 | {% extends 'base.html' %}
 2 | 
 3 | {% block content %}
 4 | 
 5 | <div class="two-col-container">
 6 | 
 7 |     <div id="left-menu">
 8 |         <span id="navlogo">PhaseLLM</span>
 9 |         <span id="navlogo_sub">EVALUATION</span>
10 |         <a class="navlink" href="/"><span class="navicon">🏠</span>Home</a>
11 |         <a class="navlink" href="/chats"><span class="navicon">💬</span>Chat Reviews</a>
12 |         <a class="navlink" href="/view_chat"><span class="navicon">📖</span>New Chat</a>
13 |         <a class="navlink" href="/import"><span class="navicon">📋</span>Paste JSON</a>
14 |         <a class="navlink" href="/groups"><span class="navicon">📚</span>Chat Groups</a>
15 |         <a class="navlink" href="/jobs"><span class="navicon">🤖</span>Create Job</a>
16 |         <a class="navlink" href="/about"><span class="navicon">👋</span>About Us</a>
17 |     </div>
18 |     <div id="main-content">
19 |         <div class="two-row-container">
20 |             <div class="two-row-top-row">
21 |                 <p class='content-title'>{{ contenttitle|default:"LLM Evaluator" }}</p>
22 |                 {% block bodycontent %}
23 |                 {% endblock %}
24 |             </div>
25 |             <div class="two-row-bottom-row">
26 |                 <p class='content-title'>{{ contenttitle2 }}</p>
27 |                 {% block bodycontent2 %}
28 |                 {% endblock %}
29 |             </div>
30 |         </div>
31 |     </div>
32 | 
33 | </div>
34 | 
35 | {% endblock %}


--------------------------------------------------------------------------------
/demos-and-products/eval_platform/templates/base-navigation.html:
--------------------------------------------------------------------------------
 1 | {% extends 'base.html' %}
 2 | 
 3 | {% block content %}
 4 | 
 5 | <div class="two-col-container">
 6 | 
 7 |     <div id="left-menu">
 8 |         <span id="navlogo">PhaseLLM</span>
 9 |         <span id="navlogo_sub">EVALUATION</span>
10 |         <a class="navlink" href="/"><span class="navicon">🏠</span>Home</a>
11 |         <a class="navlink" href="/chats"><span class="navicon">💬</span>Chat Reviews</a>
12 |         <a class="navlink" href="/view_chat"><span class="navicon">📖</span>New Chat</a>
13 |         <a class="navlink" href="/import"><span class="navicon">📋</span>Paste JSON</a>
14 |         <a class="navlink" href="/groups"><span class="navicon">📚</span>Chat Groups</a>
15 |         <a class="navlink" href="/jobs"><span class="navicon">🤖</span>Create Job</a>
16 |         <a class="navlink" href="/about"><span class="navicon">👋</span>About Us</a>
17 |     </div>
18 |     <div id="main-content">
19 |         <p class='content-title'>{{ contenttitle|default:"LLM Evaluator" }}</p>
20 |         {% block bodycontent %}
21 |         {% endblock %}
22 |     </div>
23 | 
24 | </div>
25 | 
26 | {% endblock %}


--------------------------------------------------------------------------------
/demos-and-products/eval_platform/templates/base.html:
--------------------------------------------------------------------------------
 1 | {% load static %}
 2 | 
 3 | <!DOCTYPE html>
 4 | <html lang="en">
 5 | 
 6 | <head>
 7 |     <meta charset="utf-8">
 8 |     <meta name="viewport" content="width=device-width, initial-scale=1">
 9 | 
10 |     <link rel="preconnect" href="https://fonts.googleapis.com">
11 |     <link rel="preconnect" href="https://fonts.gstatic.com" crossorigin>
12 |     <link
13 |         href="https://fonts.googleapis.com/css2?family=Open+Sans:ital,wght@0,300;0,400;0,500;0,600;0,700;0,800;1,300;1,400;1,500;1,600;1,700;1,800&family=Playfair+Display:ital,wght@0,400;0,500;0,600;0,700;0,800;0,900;1,400;1,500;1,600;1,700;1,800;1,900&display=swap"
14 |         rel="stylesheet">
15 | 
16 |     <link rel="icon"
17 |         href="data:image/svg+xml,<svg xmlns=%22http://www.w3.org/2000/svg%22 viewBox=%220 0 100 100%22><text y=%22.9em%22 font-size=%2290%22>👋</text></svg>">
18 | 
19 |     <link href="{% static 'main.css' %}" rel="stylesheet">
20 | 
21 |     <title>
22 |         {% block title %}
23 |         {{ contenttitle|default:"LLM Evaluator" }}
24 |         {% endblock %}
25 |     </title>
26 | </head>
27 | 
28 | <body>
29 | 
30 |     {% block content %}
31 | 
32 |     {% endblock %}
33 | 
34 | </body>
35 | 
36 | </html>


--------------------------------------------------------------------------------
/demos-and-products/eval_platform/templates/batch.html:
--------------------------------------------------------------------------------
 1 | {% extends 'base-navigation.html' %}
 2 | 
 3 | {% block bodycontent %}
 4 | 
 5 | <p><input type="text" id="form_title" name="form_title" placeholder="Title" style="width:100%;"></p>
 6 | 
 7 | <p><input type="text" id="form_groupid" name="form_groupid" placeholder="Group ID" style="width:100%;"></p>
 8 | 
 9 | <p><textarea id="form_description" style="width:100%;height:200px" placeholder="Batch Job Description"></textarea></p>
10 | 
11 | <p><textarea id="form_usermsg" style="width:100%;height:200px" placeholder="Additional user message"></textarea></p>
12 | 
13 | <p><b>Advanced Options</b><br />
14 |     <input type="checkbox" id="check_resend_user_msg" /> Resend Last User Message<br />
15 |     <input type="checkbox" id="check_gpt_4" checked /> Run GPT-4<br />
16 |     <input type="checkbox" id="check_gpt_35" /> Run GPT-3.5<br />
17 |     <input type="checkbox" id="check_temperature_scan" /> Run across temperature = 0.25, 0.75, and 1.25<br />
18 |     Number of times to run: <input type="number" id="num_runs" min="1" max="5" value="1" />
19 | </p>
20 | 
21 | <p><textarea id="form_sytem_prompt" style="width:100%;height:200px"
22 |         placeholder="(Optional) New System Prompt"></textarea></p>
23 | 
24 | 
25 | <p><span class="formfield formfield-hover" onclick="javascript:savejob()">Queue Job</span></p>
26 | 
27 | <script>
28 |     function savejob() {
29 |         var user_message = document.getElementById("form_usermsg").value;
30 |         var desc = document.getElementById("form_description").value;
31 |         var new_system_prompt = document.getElementById("form_sytem_prompt").value;
32 |         var group_id = document.getElementById("form_groupid").value;
33 |         var title = document.getElementById("form_title").value;
34 | 
35 |         var opt_gpt_4 = document.getElementById("check_gpt_4").checked;
36 |         var opt_gpt_35 = document.getElementById("check_gpt_35").checked;
37 |         var opt_temperature_scan = document.getElementById("check_temperature_scan").checked;
38 |         var opt_num_runs = document.getElementById("num_runs").value;
39 |         var opt_resend_user_msg = document.getElementById("check_resend_user_msg").checked;
40 | 
41 |         var data = {
42 |             csrfmiddlewaretoken: '{{ csrf_token }}',
43 |             user_message: user_message,
44 |             description: desc,
45 |             message_collection_id: group_id,
46 |             title: title,
47 |             opt_gpt_4: opt_gpt_4,
48 |             opt_gpt_35: opt_gpt_35,
49 |             opt_temperature_scan: opt_temperature_scan,
50 |             opt_num_runs: opt_num_runs,
51 |             new_system_prompt: new_system_prompt,
52 |             opt_resend_user_msg: opt_resend_user_msg,
53 |         };
54 |         const response = fetch(`create_job`, {
55 |             method: 'POST',
56 |             cache: 'no-cache',
57 |             mode: 'same-origin',
58 |             headers: {
59 |                 'Accept': 'application/json',
60 |                 'Content-Type': 'application/json',
61 |                 'X-CSRFToken': '{{ csrf_token }}'
62 |             },
63 |             body: JSON.stringify(data)
64 |         })
65 |             .then(response => response.json())
66 |             .then(data => {
67 |                 if (data['status'] === "ok") {
68 |                     alert("Saved!");
69 |                 } else {
70 |                     alert("Error! " + data['message']);
71 |                 }
72 |             })
73 |     }
74 | 
75 | </script>
76 | 
77 | {% endblock %}


--------------------------------------------------------------------------------
/demos-and-products/eval_platform/templates/batch_review.html:
--------------------------------------------------------------------------------
 1 | {% extends 'base-navigation.html' %}
 2 | 
 3 | {% block bodycontent %}
 4 | 
 5 | {% if jobs %}
 6 | 
 7 | {% for job in jobs %}
 8 | 
 9 | <div class="job_info_container">
10 |     <div>
11 |         <p class="jobtitle">{{ job.title }}
12 |             {% if job.status == "complete" %}
13 |             <span class="tag-label-green">complete</span>
14 |             {% elif job.status == "scheduled" %}
15 |             <span class="tag-label-blue">scheduled</span>
16 |             {% endif %}
17 |         </p>
18 |         {% if job.description %}
19 |         <p class="jobdescription">{{ job.description }}</p>
20 |         {% else %}
21 |         <p class="jobdescription"><i>No Description Provided</i></p>
22 |         {% endif %}
23 |     </div>
24 |     <div>
25 |         <p class="jobtitle">Input Chats</p>
26 |         {% if job.message_collection_ref %}
27 |         <ul>
28 |             {% for chat in job.message_collection_ref.chats.all %}
29 |             <li><a href='/view_chat/{{ chat.id }}'>{{ chat.title }}</a></li>
30 |             {% endfor %}
31 |         </ul>
32 |         {% else %}
33 |         <p><i>No input chats.</i></p>
34 |         {% endif %}
35 |     </div>
36 |     <div>
37 |         <p class="jobtitle">Generated Chats</p>
38 |         {% if job.results_array %}
39 |         <ul>
40 |             {% for chat in job.results_array.chats.all %}
41 |             <li><a href='/view_chat/{{ chat.id }}'>{{ chat.title }}</a></li>
42 |             {% endfor %}
43 |         </ul>
44 |         {% else %}
45 |         <p><i>No output chats (yet).</i></p>
46 |         {% endif %}
47 |     </div>
48 | </div>
49 | 
50 | {% endfor %}
51 | 
52 | {% else %}
53 | <p><i>No jobs created yet.</i></p>
54 | 
55 | <p>&nbsp;</p>
56 | 
57 | <p>If this is your first time using the <i>Evaluation</i> platform, try doing the following!<br />&nbsp;
58 | <ul>
59 |     <li>Create a New Chat &mdash; <a href="/view_chat">click here</a> to launch the editor.</li>
60 |     <li>Create a <i>group</i> of chats &mdash; this will be used for batch jobs. <a href="/groups">Click here</a> to
61 |         give it a shot.</li>
62 |     <li>Finally, create a batch job &mdash; <a href="/jobs">click here</a> to do so.</li>
63 |     <li>Once you've done the above, make sure to run the Django command 'runjobs' and you're good to go!</li>
64 | </ul>
65 | </p>
66 | {% endif %}
67 | 
68 | {% endblock %}


--------------------------------------------------------------------------------
/demos-and-products/eval_platform/templates/chats.html:
--------------------------------------------------------------------------------
 1 | {% extends 'base-navigation.html' %}
 2 | 
 3 | {% block bodycontent %}
 4 | 
 5 | <p>Please visit /view_chat/chat_id where chat_id is the ID of the chat you want to view.</p>
 6 | 
 7 | {% if all_chats %}
 8 | <ul>
 9 |     {% for chat in all_chats %}
10 |     <li><a href='/view_chat/{{ chat.id }}'>ID: {{chat.id}}, TITLE: {{ chat.title }}</a></li>
11 |     {% endfor %}
12 | </ul>
13 | {% endif %}
14 | 
15 | {% endblock %}


--------------------------------------------------------------------------------
/demos-and-products/eval_platform/templates/create-group.html:
--------------------------------------------------------------------------------
 1 | {% extends 'base-navigation-two-cols.html' %}
 2 | 
 3 | {% block bodycontent2 %}
 4 | 
 5 | <p><input type="text" id="form_title" name="form_title" placeholder="Title" , style="width:100%;"></p>
 6 | <p><textarea id="messagelist" style="width:100%;height:200px" placeholder="Comma-separate IDs here"></textarea></p>
 7 | <p><span class="formfield formfield-hover" onclick="javascript:savegroup()">Save</span></p>
 8 | 
 9 | <script>
10 |     function savegroup() {
11 |         var content = document.getElementById("messagelist").value;
12 |         var title = document.getElementById("form_title").value;
13 |         var data = { csrfmiddlewaretoken: '{{ csrf_token }}', messagelist: content, title: title };
14 |         const response = fetch(`create_group_csv`, {
15 |             method: 'POST',
16 |             cache: 'no-cache',
17 |             mode: 'same-origin',
18 |             headers: {
19 |                 'Accept': 'application/json',
20 |                 'Content-Type': 'application/json',
21 |                 'X-CSRFToken': '{{ csrf_token }}'
22 |             },
23 |             body: JSON.stringify(data)
24 |         })
25 |             .then(response => response.json())
26 |             .then(data => {
27 |                 if (data['status'] === "ok") {
28 |                     alert("Saved!");
29 |                 } else {
30 |                     alert("Error! " + data['message']);
31 |                 }
32 |             })
33 |     }
34 | 
35 | </script>
36 | </body>
37 | 
38 | {% endblock %}
39 | 
40 | {% block bodycontent %}
41 | {% if all_groups %}
42 | {% for g in all_groups %}
43 | <div class="general-list-container">
44 |     <span style="width:50px;display:inline-block;">ID: {{g.id}}</span> <b>{{ g.title }}</b>
45 | </div>
46 | {% endfor %}
47 | {% else %}
48 | <p><i>No groups created yet.</i></p>
49 | {% endif %}
50 | {% endblock %}


--------------------------------------------------------------------------------
/demos-and-products/eval_platform/templates/create.html:
--------------------------------------------------------------------------------
 1 | {% extends 'base-navigation.html' %}
 2 | 
 3 | {% block bodycontent %}
 4 | 
 5 | <p><input type="text" id="form_title" name="form_title" placeholder="Title" style="width:100%;"></p>
 6 | <p><textarea id="messages" style="width:100%;height:200px" placeholder="Put your messages here"></textarea></p>
 7 | <p><span class="formfield formfield-hover" onclick="javascript:savemessages()">Import</span></p>
 8 | 
 9 | <script>
10 |     function savemessages() {
11 |         var content = document.getElementById("messages").value;
12 |         var title = document.getElementById("form_title").value;
13 |         var data = { csrfmiddlewaretoken: '{{ csrf_token }}', messages: content, title: title };
14 |         const response = fetch(`create_save_ma`, {
15 |             method: 'POST',
16 |             cache: 'no-cache',
17 |             mode: 'same-origin',
18 |             headers: {
19 |                 'Accept': 'application/json',
20 |                 'Content-Type': 'application/json',
21 |                 'X-CSRFToken': '{{ csrf_token }}'
22 |             },
23 |             body: JSON.stringify(data)
24 |         })
25 |             .then(response => response.json())
26 |             .then(data => {
27 |                 if (data['status'] === "ok") {
28 |                     alert("Saved!");
29 |                 } else {
30 |                     alert("Error! " + data['message']);
31 |                 }
32 |             })
33 |     }
34 | 
35 | </script>
36 | 
37 | {% endblock %}


--------------------------------------------------------------------------------
/demos-and-products/eval_platform/templates/view-chat.html:
--------------------------------------------------------------------------------
  1 | {% extends 'base-navigation-two-cols.html' %}
  2 | 
  3 | {% block bodycontent %}
  4 | {% if all_chats %}
  5 | {% for chat in all_chats %}
  6 | <div class="general-list-container">
  7 |     <span style="width:50px;display:inline-block;">ID: {{chat.id}}</span> <b><a href='/view_chat/{{ chat.id }}'>
  8 |             {{chat.title }}
  9 |         </a></b>
 10 | </div>
 11 | {% endfor %}
 12 | {% endif %}
 13 | {% endblock %}
 14 | 
 15 | 
 16 | {% block bodycontent2 %}
 17 | 
 18 | {% if chat_id == -1 %}
 19 | <p class='content-title'>{{ chat_title }}</p>
 20 | <p><i>Select a chat to review and edit it.</i></p>
 21 | {% else %}
 22 | 
 23 | {% if error_msg %}
 24 | <p class="error_message"><b>Error!</b> {{ error_msg | safe}}</p>
 25 | {% else %}
 26 | 
 27 | <p style="margin-bottom:25px;">
 28 |     <span class="formfield formfield-hover" onclick="javascript:savemessagesfromarray()">Save to New Chat</span>
 29 |     <!-- <span class="formfield formfield-hover" onclick="javascript:updatetitle()">Update Title</span> -->
 30 |     <span class="formfield formfield-hover" onclick="javascript:overwrite()">Overwrite</span>
 31 |     <span class="formfield formfield-hover" onclick="javascript:deletechat()">Delete</span>
 32 | </p>
 33 | 
 34 | <div><b>Chat Title:</b><br />
 35 |     <span id="input_chat_title" style="width:100%;display:inline-block;" contenteditable=True>{{ chat_title }}
 36 | </div>
 37 | 
 38 | <p>&nbsp;</p>
 39 | 
 40 | <div id="full_messages"></div>
 41 | 
 42 | <p>&nbsp;</p>
 43 | 
 44 | <p>
 45 |     <span class="formfield formfield-hover" onclick="javascript:appendmessage('system')">+ System</span>
 46 |     <span class="formfield formfield-hover" onclick="javascript:appendmessage('assistant')">+ Assistant</span>
 47 |     <span class="formfield formfield-hover" onclick="javascript:appendmessage('user')">+ User</span>
 48 | </p>
 49 | 
 50 | <script>
 51 |     var message_array = {{ json_message_array | safe }};
 52 |     var chat_id = {{ chat_id }};
 53 | 
 54 |     function convert_newlines(str_to_conv) {
 55 |         return str_to_conv.replaceAll("\n", "<br attr='phasellm-br'/>")
 56 |     }
 57 | 
 58 |     function convert_newlines_back(str_to_conv) {
 59 |         var s = str_to_conv.replaceAll(`<br attr="phasellm-br">`, "\n");
 60 |         s = s.replaceAll("<br>", "\n");
 61 |         s = s.replaceAll("</div>", "\n");
 62 |         s = s.replaceAll("<div>", "");
 63 |         s = s.trim();
 64 |         return s;
 65 |     }
 66 | 
 67 |     function display_message_array(message_array) {
 68 |         var new_html = "";
 69 |         for (var i = 0; i < message_array.length; i++) {
 70 |             var new_content = convert_newlines(message_array[i]['content']);
 71 |             new_html += `<div id="message-container-${i}"><div style='margin-top:20px;margin-bottom:10px;'><b>${message_array[i]['role']}</b><span class='delete-icon' onclick="javascript:delete_div(${i});">🗑️</span></div><div id="phasellm-ma-content-${i}" contenteditable=True>${new_content}</div></div>`;
 72 |         }
 73 |         var d = document.getElementById('full_messages');
 74 |         d.innerHTML = new_html;
 75 |     }
 76 | 
 77 |     function rebuild_message_array() {
 78 |         for (var i = 0; i < message_array.length; i++) {
 79 |             var content_div = document.getElementById(`phasellm-ma-content-${i}`);
 80 |             var content = convert_newlines_back(content_div.innerHTML)
 81 |             message_array[i]['content'] = content;
 82 |         }
 83 |     }
 84 | 
 85 |     function delete_div(div_number) {
 86 |         //document.getElementById(`message-container-${div_number}`).outerHTML = "";
 87 |         message_array.splice(div_number, 1);
 88 |         display_message_array(message_array);
 89 |     }
 90 | 
 91 |     function updatetitle() {
 92 |         var new_title = document.getElementById("input_chat_title").innerText;
 93 |         const response = fetch(`/update_title_via_post`, {
 94 |             method: 'POST',
 95 |             cache: 'no-cache',
 96 |             mode: 'same-origin',
 97 |             headers: {
 98 |                 'Accept': 'application/json',
 99 |                 'Content-Type': 'application/json',
100 |                 'X-CSRFToken': '{{ csrf_token }}'
101 |             },
102 |             body: JSON.stringify({ "chat_id": chat_id, "new_title": new_title })
103 |         })
104 |             .then(response => response.json())
105 |             .then(data => {
106 |                 if (data['status'] === "ok") {
107 |                     alert("Title Updated!");
108 |                 } else {
109 |                     alert("Error! " + data['message']);
110 |                 }
111 |             });
112 |     }
113 | 
114 |     function savemessages(content) {
115 |         var new_title = document.getElementById("input_chat_title").innerText;
116 |         var data = { csrfmiddlewaretoken: '{{ csrf_token }}', messages: content, title: new_title };
117 |         const response = fetch(`/create_save_ma_json`, {
118 |             method: 'POST',
119 |             cache: 'no-cache',
120 |             mode: 'same-origin',
121 |             headers: {
122 |                 'Accept': 'application/json',
123 |                 'Content-Type': 'application/json',
124 |                 'X-CSRFToken': '{{ csrf_token }}'
125 |             },
126 |             body: JSON.stringify(data)
127 |         })
128 |             .then(response => response.json())
129 |             .then(data => {
130 |                 if (data['status'] === "ok") {
131 |                     alert("Saved!");
132 |                 } else {
133 |                     alert("Error! " + data['message']);
134 |                 }
135 |             })
136 |     }
137 | 
138 |     function savemessagesfromarray() {
139 |         rebuild_message_array();
140 |         savemessages(message_array);
141 |     }
142 | 
143 |     display_message_array(message_array);
144 | 
145 |     function appendmessage(role) {
146 |         rebuild_message_array();
147 |         message_array.push({ role: role, content: "" });
148 |         display_message_array(message_array);
149 |     }
150 | 
151 |     function deletechat() {
152 |         window.location.href = `/delete_chat/${chat_id}`;
153 |     }
154 | 
155 |     function overwrite() {
156 |         var new_title = document.getElementById("input_chat_title").innerText;
157 |         rebuild_message_array();
158 |         var content = message_array;
159 | 
160 |         var data = { csrfmiddlewaretoken: '{{ csrf_token }}', chat_id: chat_id, messages: content, title: new_title };
161 | 
162 | 
163 |         const response = fetch(`/overwrite_chat`, {
164 |             method: 'POST',
165 |             cache: 'no-cache',
166 |             mode: 'same-origin',
167 |             headers: {
168 |                 'Accept': 'application/json',
169 |                 'Content-Type': 'application/json',
170 |                 'X-CSRFToken': '{{ csrf_token }}'
171 |             },
172 |             body: JSON.stringify(data)
173 |         })
174 |             .then(response => response.json())
175 |             .then(data => {
176 |                 if (data['status'] === "ok") {
177 |                     alert("Chat Data Updated!");
178 |                 } else {
179 |                     alert("Error! " + data['message']);
180 |                 }
181 |             });
182 |     }
183 | 
184 | </script>
185 | {% endif %}
186 | 
187 | {% endif %}
188 | 
189 | {% endblock %}


--------------------------------------------------------------------------------
/demos-and-products/newsbot/README.md:
--------------------------------------------------------------------------------
 1 | # NewsBot
 2 | 
 3 | An autonomous news summarizer. You can set this up to execute regularly and it will email you a summary of news articles for a given period, on specific queries or topics.
 4 | 
 5 | ## Installation and Setup
 6 | 
 7 | You need `phasellm` installed; no additional packages need to be installed. However, you do need to have...
 8 | 
 9 | - An OpenAI API key
10 | - A GMail account (we'll use this to send news summaries)
11 | - A newsapi.org API key
12 | 
13 | Set up a .env file with the above, as follows:
14 | 
15 | ```
16 | OPENAI_API_KEY=<OpenAI API key>
17 | NEWS_API_API_KEY=<newsapi.org API key>
18 | GMAIL_EMAIL=<GMail address that will send emails>
19 | GMAIL_PASSWORD=<Password for the above>
20 | ```
21 | 
22 | Note that you'll likely need to set up an [app password](https://myaccount.google.com/apppasswords) for your GMail account, rather than using your actual password. This is something GMail requires for security purposes (and it's a great idea!). [Learn more here.](https://support.google.com/mail/answer/185833)
23 | 
24 | ## Running
25 | 
26 | Once you've done the above, simply run `python newsbot.py` and you're good to go!
27 | 


--------------------------------------------------------------------------------
/demos-and-products/newsbot/newsbot.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Sample code for getting a list of news articles, having OpenAI summarize them, and then deploying an email with the summaries.
 3 | """
 4 | 
 5 | from phasellm.agents import EmailSenderAgent, NewsSummaryAgent
 6 | from phasellm.llms import OpenAIGPTWrapper, ClaudeWrapper
 7 | 
 8 | queries = ["inflation", "openai", "llm"] # We will generate a summary for each element in the list
 9 | 
10 | ##########################################################################
11 | #
12 | # ENVIRONMENT VARIABLES (Gmail, News API, etc.) (START)
13 | # Update this to customize your newsbot experience.
14 | #
15 | 
16 | import os
17 | from dotenv import load_dotenv
18 | 
19 | load_dotenv()
20 | 
21 | # Load OpenAI and newsapi.org API keys
22 | openai_api_key = os.getenv("OPENAI_API_KEY")
23 | news_api_api_key = os.getenv("NEWS_API_API_KEY")
24 | 
25 | # Load Anthropic API key
26 | anthropic_api_key = os.getenv("ANTHROPIC_API_KEY")
27 | 
28 | # Gmail credentials.
29 | gmail_email = os.getenv("GMAIL_EMAIL")
30 | gmail_password = os.getenv("GMAIL_PASSWORD")
31 | 
32 | RECIPIENT_EMAIL="<who will get the email summary>"
33 | SENDER_NAME="<who is sending the email>"
34 | 
35 | #
36 | # ENVIRONMENT VARIABLES (END) 
37 | #
38 | ##########################################################################
39 | 
40 | def getArticlesAndSummarize(news_agent, llm, query, days_back=1, include_description=True, max_articles=30):
41 |     """
42 |     See NewsSummaryAgent docs for what the above variables mean.
43 |     """
44 | 
45 |     # First, we obtain the news articles for the query We limit this to 30 articles going back 1 day.
46 |     news_articles = news_agent.getQuery(query, days_back=1, include_descriptions=True, max_articles=max_articles)
47 | 
48 |     # Set up messages for summarization.
49 |     system = "You are a helpful news summarizer. We will provide you with a list of news articles and will ask that you summarize them and retain links to source by adding footnotes. For example, if you have a news article describing XYZ and URL to the article, you would discuss XYZ[1] and add '[1] URL' to the bottom of the message. The footnote numbers should start at [1] and increase consecutively. In other words, footnotes should start at 1, 2, 3, etc. For the actual paragraph, you can reorder reference articles and choose the ones to include as to make the paragraph as informative, pithy, and concise as possible. You can also have multiple footnotes per sentence if this helps tell the story. While you should avoid adding your own commentary in most cases, feel free to do so if it will help the reader understand the context of the paragraph you are writing."
50 |     user_prompt = f"The articles below are about '{query}'. Please summarize them into a short paragraph with link retained as per the earlier instructions.\n\n{news_articles}"
51 |     messages = [{"role":"system", "content":system}, {"role":"user", "content":user_prompt}]
52 | 
53 |     news_message = llm.complete_chat(messages)
54 | 
55 |     return news_message
56 | 
57 | # News agent
58 | news_agent = NewsSummaryAgent(news_api_api_key, name="tester agent")
59 | 
60 | # OpenAI model, GPT-4. You can use other models, of course.
61 | #llm = OpenAIGPTWrapper(openai_api_key, model="gpt-4")
62 | #MAX_ARTICLES = 30
63 | 
64 | # Claude (Anthropic) with 100K tokens.
65 | llm = ClaudeWrapper(anthropic_api_key, model="claude-v1-100k")
66 | MAX_ARTICLES = 100
67 | 
68 | news_content = ""
69 | for query in queries:
70 |     content = getArticlesAndSummarize(news_agent, llm, query, max_articles=MAX_ARTICLES)
71 |     news_content += f"# News for {query}\n\n{content}\n\n"
72 | 
73 | # Generate subject line.
74 | news_subject = f"News about: {', '.join(queries)}"
75 | 
76 | # Send email.
77 | e = EmailSenderAgent(SENDER_NAME, 'smtp.gmail.com', gmail_email, gmail_password, 587)
78 | e.sendPlainEmail(RECIPIENT_EMAIL, news_subject, news_content)
79 | 


--------------------------------------------------------------------------------
/demos-and-products/newsbot/newsbot_create.py:
--------------------------------------------------------------------------------
 1 | ### IMPORTS 
 2 | 
 3 | from phasellm.llms import OpenAIGPTWrapper, ClaudeWrapper, ChatPrompt
 4 | from phasellm.agents import NewsSummaryAgent
 5 | import json 
 6 | 
 7 | ### ENVIRONMENT VARIABLES
 8 | 
 9 | import os
10 | from dotenv import load_dotenv
11 | 
12 | load_dotenv()
13 | openai_api_key = os.getenv("OPENAI_API_KEY")
14 | anthropic_api_key = os.getenv("ANTHROPIC_API_KEY")
15 | news_api_api_key = os.getenv("NEWS_API_API_KEY")
16 | 
17 | ### SETUP THE EXPERIMENTAL DATA
18 | 
19 | queries = ['spacex', 'federal reserve', 'shopify', 'openai', 'biden', 'trump', 'met gala', 'king charles', 'poland', 'euro']
20 | JSON_FILE = "news_articles.json"
21 | 
22 | llm_1 = OpenAIGPTWrapper(openai_api_key, model="gpt-4")
23 | llm_2 = OpenAIGPTWrapper(openai_api_key, model="gpt-4") # ClaudeWrapper(anthropic_api_key)
24 | 
25 | chat_prompt_raw_1 = [
26 | {"role":"system",
27 |  "content": "You are a helpful news summarizer. We will provide you with a list of news articles and will ask that you summarize them and retain links to source by adding footnotes. For example, if you have a news article describing XYZ and URL to the article, you would discuss XYZ[1] and add '[1] URL' to the bottom of the message. Note that the footnotes should be counted as of the summary; you do not need to keep the numbers from the earlier order, just from your summary. In other words, footnotes should start at 1, 2, 3, etc..."},
28 | {"role":"user",
29 |  "content": "The articles below are about '{query}'. Please summarize them into a short paragraph with link retained as per the earlier instructions.\n\n{news_articles}"},
30 | ]
31 | 
32 | chat_prompt_raw_2 = [
33 | {"role":"system",
34 |  "content": "You are a helpful news summarizer. We will provide you with a list of news articles and will ask that you summarize them and retain links to source by adding footnotes. For example, if you have a news article describing XYZ and URL to the article, you would discuss XYZ[1] and add '[1] URL' to the bottom of the message. The footnote numbers should start at [1] and increase consecutively. In other words, footnotes should start at 1, 2, 3, etc. For the actual paragraph, you can reorder reference articles and choose the ones to include as to make the paragraph as informative, pithy, and concise as possible. You can also have multiple footnotes per sentence if this helps tell the story. While you should avoid adding your own commentary in most cases, feel free to do so if it will help the reader understand the context of the paragraph you are writing."},
35 | {"role":"user",
36 |  "content": "The articles below are about '{query}'. Please take on the role of an entertaining, successful, AI-driven investigative journalists and summarize them into a short paragraph. Make sure to follow the 'system' instructions.\n\n{news_articles}"},
37 | ]
38 | 
39 | chat_prompt_1 = ChatPrompt(chat_prompt_raw_1)
40 | chat_prompt_2 = ChatPrompt(chat_prompt_raw_2)
41 | 
42 | ### DATA HELPERS
43 | 
44 | def create_data_set(queries, json_file):
45 |     article_dict = {}
46 |     news_agent = NewsSummaryAgent(news_api_api_key, name="tester agent")
47 |     for query in queries:
48 |         news_articles = news_agent.getQuery(query, days_back=1, include_descriptions=True, max_articles=30)
49 |         article_dict[query] = {"articles":news_articles}
50 | 
51 |     update_data_set(article_dict, json_file)
52 | 
53 | def update_data_set(dict_obj, json_file):
54 |     with open(json_file, 'w') as writer:
55 |         writer.write(json.dumps(dict_obj))
56 | 
57 | def load_data_set(json_file):
58 |     articles = None
59 |     with open(json_file, 'r') as reader:
60 |         articles = json.loads(reader.read())
61 |     return articles
62 | 
63 | ### RUNNING DATA SET CREATION
64 | 
65 | create_data_set(queries, JSON_FILE)
66 | 
67 | articles = load_data_set(JSON_FILE)
68 | for query, article_dict in articles.items():
69 | 
70 |     print(f"Generating news summary for '{query}'")
71 | 
72 |     print("... llm_1")
73 |     llm_1_completion = llm_1.complete_chat(chat_prompt_1.fill(query=query, news_articles=article_dict['articles']))
74 | 
75 |     print("... llm_2")
76 |     llm_2_completion = llm_2.complete_chat(chat_prompt_2.fill(query=query, news_articles=article_dict['articles']))
77 |     
78 |     # Saving results...
79 |     article_dict["llm_1"] = llm_1_completion
80 |     article_dict["llm_2"] = llm_2_completion
81 |     articles[query] = article_dict
82 | 
83 | update_data_set(articles, JSON_FILE)


--------------------------------------------------------------------------------
/demos-and-products/newsbot/newsbot_evaluate.py:
--------------------------------------------------------------------------------
 1 | from phasellm.eval import EvaluationStream
 2 | 
 3 | import json 
 4 | 
 5 | JSON_FILE = "news_articles.json"
 6 | 
 7 | def load_data_set(json_file):
 8 |     articles = None
 9 |     with open(json_file, 'r') as reader:
10 |         articles = json.loads(reader.read())
11 |     return articles
12 | 
13 | articles = load_data_set(JSON_FILE)
14 | 
15 | # Note that we don't pass the two LLMs to the Evaluation Stream -- no need to do so in this example.
16 | es = EvaluationStream("Which news summary is higher quality and more engaging?", "You are a helpful news summarizer. We will provide you with a list of news articles and will ask that you summarize them and retain links to source by adding footnotes. For example, if you have a news article describing XYZ and URL to the article, you would discuss XYZ[1] and add '[1] URL' to the bottom of the message. Note that the footnotes should be counted as of the summary; you do not need to keep the numbers from the earlier order, just from your summary. In other words, footnotes should start at 1, 2, 3, etc...", [None, None])
17 | 
18 | for key, article_dict in articles.items():
19 |     r1 = article_dict["llm_1"]
20 |     r2 = article_dict["llm_2"]
21 |     es.evaluate(r1, r2)
22 | 
23 | print(es.prefs)


--------------------------------------------------------------------------------
/demos-and-products/newsbot/notes.md:
--------------------------------------------------------------------------------
 1 | # Notes on Evaluations
 2 | 
 3 | There's a four-step process to testing these applications:
 4 | 1. input data
 5 | 2. prompt
 6 | 3. execute
 7 | 4. evaluate
 8 | 
 9 | We'll begin by very specifically exploring this from the perspective of newsbot.py
10 | 
11 | ## Input Data
12 | 
13 | In this case, we have the following input data for each query:
14 | (a) A purpose for the news bot. This is basically a higher-level prompt (e.g., system prompt) that stays the same within an experiment but might be optimized or changed across models or experiments.
15 | (b) A query. This is the actual news topic we are asking to summarize. We have multiple queries per experiment.
16 | (c) A list of articles with descriptions and links. This is generated by our agent.
17 | 
18 | ## Prompt
19 | 
20 | There are two types of prompts, based on what we're doign so far: (1) text completion prompts, and (2) chat prompts.
21 | 
22 | A text completion prompt is our traditional approach to generating prompts. You have a set of instructions, and varibales will be replaced as needed (e.g., replace {query} with the topic of interest).
23 | 
24 | A chat prompt is different. Since a chat prompt has multiple messages, we might actually need to convert variables across the entire structure of chat. Today, we do not support chat prompts, but will need to do so for the news bot demo.
25 | 
26 | ## Execute
27 | 
28 | This is the actual model execution loop. In this case, we take the input data and insert it into our prompts. Then we take those prompts and execute against models. We get the results and save them.
29 | 
30 | ## Evaluation
31 | 
32 | Once all of the above has taken place, we then go ahead and review all the results. We want to do this in a 'blind peer review' approach where we randomize the order of outputs so we do not know which prompt/model combination is which.


--------------------------------------------------------------------------------
/demos-and-products/researchllm/README.md:
--------------------------------------------------------------------------------
 1 | # ResearchLLM
 2 | 
 3 | An autonomous statistics helper that converts your natural language queries about a data set to insights.
 4 | 
 5 | - Converts natural language questions to Python code
 6 | - Runs code locally without sharing data with third parties (just shares metadata)
 7 | - Interpets results
 8 | - Provide access to underlying Python code for audit and review
 9 | 
10 | [2-minute demo below:](https://www.youtube.com/watch?v=-fzFCii6UoA)
11 | [![ResearchLLM screenshot](screenshot.png)](https://www.youtube.com/watch?v=-fzFCii6UoA)
12 | 
13 | Please note that we originally launched this as *ResearchLLM* and have since renamed the demo to *ResearchLLM*. Apologies for any confusion!
14 | 
15 | ## 🚨🚨 WARNING: Runs LLM-Generated Python Code
16 | 
17 | This product will run LLM-generated Python code on your computer/server. We highly recommend sandboxing the code or running this on a server that doesn't contain any sensitive information or processes.
18 | 
19 | ## Installation and Setup
20 | 
21 | ### Installation
22 | 
23 | Clone the GitHub repository and navigate to the folder containing this README.md file. Install the relevant packages (including PhaseLLM):
24 | 
25 | ```
26 | pip install -r requirements.txt
27 | ```
28 | 
29 | Next, make sure you edit the `researchllm.py` file to include the proper API keys. You'll find these around line 19:
30 | ```python
31 | ANTHROPIC_API_KEY = os.getenv("ANTHROPIC_API_KEY")
32 | MODEL = ClaudeWrapper(ANTHROPIC_API_KEY)
33 | ```
34 | 
35 | You can change the model type from ClaudeWrapper to other PhaseLLM wrappers. Make sure to update your API key accordingly, either via an environment variable or directly in the code.
36 | 
37 | ### Running With Sample Data
38 | 
39 | Start a Python REPL (i.e, run `python` in the folder with all the files from this repo) and then type the following:
40 | 
41 | ```
42 | from frontend import *
43 | run() # Or, run('0.0.0.0', 80) for a public server
44 | ```
45 | 
46 | Running `run()` will launch the server on 127.0.0.1:5000 (i.e., the default Flask setting).
47 | 
48 | ### Running With Your Own Custom Data
49 | 
50 | Running this with your own data only requires a few simple changes to `frontend.py`. Around Line 20, you'll see the following comments:
51 | ```python
52 | ##########################################################################
53 | #
54 | # DATA SET SETUP (START)
55 | # Please review the code below to set up your own data set for analysis.
56 | #
57 | ```
58 | 
59 | All the instructions are there, but we repeat them here for your convenience. You will have to update the two variables below:
60 | ```python
61 | DATA_SETUP_INTRO = "I am researching the relationship between income and sociodemographic census info."
62 | DATA_FILE_LOC = "incomes.csv"
63 | ```
64 | 
65 | `DATA_SETUP_INTRO` should be one short sentence on the context of your data, while `DATA_FILE_LOC` is the location of the file you're loading.
66 | 
67 | If you are *not* using a CSV file, you can also load the DataFrame via a few lines down:
68 | ```python
69 | df = pd.read_csv(DATA_FILE_LOC)
70 | ```
71 | 
72 | Replace the line above with your custom loader (e.g., read_excel() or something else). The `df` variable needs to be a Pandas dataframe for this to work.
73 | 
74 | ## Sample Data Files and Credits
75 | 
76 | The sample data set included in this project and in the demo video is from the 1994 US census. It was put together by Ron Kohavi and is [available on Kaggle](https://www.kaggle.com/datasets/uciml/adult-census-income?select=adult.csv).
77 | 
78 | The other data set referenced in our code is [also on Kaggle](https://www.kaggle.com/datasets/new-york-city/nypd-motor-vehicle-collisions), focusing on motor vehicle collisions in New York City. We didn't include it in the repository as it's about 500MB in size. It's a good alternative to the census data above because it contains location data (latitude, longitude pairs), leading to some really interesting analysis options.
79 | 


--------------------------------------------------------------------------------
/demos-and-products/researchllm/frontend.py:
--------------------------------------------------------------------------------
  1 | """
  2 | A Flask frontend for ResearchLLM
  3 | 
  4 | To run, start a Python REPL and in the same directory as this file and run the following:
  5 | > from frontend import *
  6 | > run() # Or, run('0.0.0.0', 80)
  7 | 
  8 | """
  9 | 
 10 | from researchllm import *
 11 | 
 12 | from flask import Flask, request, render_template
 13 | import pandas as pd
 14 | import numpy as np
 15 | 
 16 | APP = Flask(__name__)
 17 | 
 18 | ##########################################################################
 19 | #
 20 | # DATA SET SETUP (START)
 21 | # Please review the code below to set up your own data set for analysis.
 22 | #
 23 | 
 24 | # Data set to load and analyze.
 25 | DATA_SETUP_INTRO = "I am researching the relationship between income and sociodemographic census info."
 26 | DATA_FILE_LOC = "incomes.csv"
 27 | 
 28 | # Another sample we explored.
 29 | #DATA_SETUP_INTRO = "I am researching car crashes in NYC."
 30 | #DATA_FILE_LOC = "nypd-motor-vehicle-collisions.csv"
 31 | 
 32 | # Want to analyze your own data set? Simply replace the two variables above:
 33 | # DATA_SETUP_INTRO = "What are you researching? Please provide a short description.
 34 | # DATA_FILE_LOC = "The location of the CSV file."
 35 | # Note that you DO NOT have to provide metadata about the CSV file. This gets generated automatically.
 36 | 
 37 | # Loads the CSV file.
 38 | # If you want to load another file (e.g., Excel file), replace the code below with the relevant function (e.g., read_excel()).
 39 | df = pd.read_csv(DATA_FILE_LOC)
 40 | 
 41 | # Advanced settings
 42 | INCLUDE_COL_DESCRIPTION_VALS = True # Choose whether to include sample values in the column descriptions (within the prompt)
 43 | MAX_UNIQUES_FOR_DESC = 10 # Number of unique values to show in column description
 44 | 
 45 | #
 46 | # DATA SET SETUP (END)
 47 | #
 48 | ##########################################################################
 49 | 
 50 | def generateOverview(df):
 51 |     """
 52 |     Generates a prompt providing an overview of a data set. This should only be used to generate the initial data prompt for now.
 53 |     """
 54 |     description = ""
 55 |     for column in df:
 56 |         col_name = df[column].name
 57 |         col_type = df[column].dtype
 58 |         col_description = f"Column Name: {col_name}\nColumn Type: {col_type}"
 59 |         if col_type == "object":
 60 | 
 61 |             # Get unique values for column descriptions.
 62 |             column_values = df[col_name].values
 63 |             uniques = list(set(column_values))
 64 |             
 65 |             if INCLUDE_COL_DESCRIPTION_VALS:
 66 |                 if len(uniques) > MAX_UNIQUES_FOR_DESC:
 67 |                     col_description += f"\nSample Values: {str(uniques[0:MAX_UNIQUES_FOR_DESC])}"
 68 |                 else:
 69 |                     col_description += f"\nSample Values: {str(uniques)}"
 70 |         description += col_description + "\n\n"
 71 |     return description.strip()
 72 | 
 73 | # The prompt used to set up the entire chat session. This prompt is used regularly for analysis.
 74 | base_prompt = f"{DATA_SETUP_INTRO} I have imported Pandas as `pd`, Numpy as `np`, `scipy`, and `sklearn`, and have a dataframe called `df` loaded into Python. `df` contains the following variables and variable types:\n\n" + generateOverview(df) 
 75 | 
 76 | # Calls the researchllm.py function to set the current dataframe as the main one for analysis.
 77 | set_df(df)
 78 | 
 79 | ##########################################################################
 80 | #
 81 | # FLASK FUNCTIONS
 82 | # Everything below manages the frontend.
 83 | #
 84 | ##########################################################################
 85 | 
 86 | @APP.route('/get_prompt')
 87 | def get_prompt():
 88 |     """
 89 |     Returns a JSON object with the prompt being passed on to the language model.
 90 |     """
 91 |     return {"status":"ok", "prompt":base_prompt}
 92 | 
 93 | @APP.route('/')
 94 | def index():
 95 |     """
 96 |     Displays the index page accessible at '/'
 97 |     """
 98 |     return render_template('index.html')
 99 | 
100 | @APP.route("/text_completion", methods = ['POST'])
101 | def analysis():
102 |     """
103 |     Calls the researchllm.py code to request analysis and interpretation thereof.
104 |     
105 |     See run_analysis(message) in researchllm.py for more information.
106 |     """
107 |     text_to_complete = request.json["input"]
108 |     new_request = base_prompt + text_to_complete
109 |     response_object = run_analysis(new_request)
110 |     return {"status":"ok", "content":response_object["interpretation"], "code":response_object["code"], "code_output":response_object["code_output"], "error":response_object["error"]}
111 | 
112 | def run(host="127.0.0.1", port=5000):
113 |     """
114 |     Launches a local web server for interfacing with PhaseLLM. This is meant to be for testing purposes only.
115 |     """
116 |     start_bi_session()
117 |     APP.run(host=host, port=port)
118 | 


--------------------------------------------------------------------------------
/demos-and-products/researchllm/requirements.txt:
--------------------------------------------------------------------------------
1 | phasellm
2 | scikit-learn
3 | pandas
4 | numpy
5 | scipy
6 | statsmodels


--------------------------------------------------------------------------------
/demos-and-products/researchllm/screenshot.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wgryc/phasellm/974d026dc649e4a71da4c25bf8c934622e56cf5d/demos-and-products/researchllm/screenshot.png


--------------------------------------------------------------------------------
/demos-and-products/web-search-chatbot/demo.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | from dotenv import load_dotenv
  3 | 
  4 | from phasellm.llms import ClaudeWrapper, ChatBot
  5 | from phasellm.agents import WebSearchAgent
  6 | 
  7 | from flask import Flask, request, render_template, jsonify
  8 | 
  9 | load_dotenv()
 10 | llm = ClaudeWrapper(os.getenv("ANTHROPIC_API_KEY"), model='claude-2')
 11 | web_search_agent = WebSearchAgent(
 12 |     api_key=os.getenv("GOOGLE_SEARCH_API_KEY")
 13 | )
 14 | 
 15 | CHATBOT: ChatBot
 16 | 
 17 | APP = Flask(__name__)
 18 | 
 19 | 
 20 | def reset_chatbot():
 21 |     """
 22 |     Reset the chatbot state.
 23 |     Returns:
 24 | 
 25 |     """
 26 |     global CHATBOT
 27 |     CHATBOT = ChatBot(llm)
 28 |     return True
 29 | 
 30 | 
 31 | # Call reset_chatbot() to initialize the chatbot.
 32 | reset_chatbot()
 33 | 
 34 | 
 35 | @APP.route('/submit-chat-message', methods=['POST'])
 36 | def route_send_chat():
 37 |     try:
 38 |         global CHATBOT
 39 |         message = request.json["input"]
 40 | 
 41 |         query = CHATBOT.chat(
 42 |             f'Come up with a google search query that will provide more information to help answer the question: '
 43 |             f'"{message}". Respond with only the query.'
 44 |         )
 45 |         print(f'Google search query: {query}')
 46 | 
 47 |         # Submit the query to the Google Search Agent.
 48 |         results = web_search_agent.search_google(
 49 |             query,
 50 |             custom_search_engine_id=os.getenv("GOOGLE_SEARCH_ENGINE_ID"),
 51 |             num=2
 52 |         )
 53 | 
 54 |         sources = []
 55 |         # Add the contents of the top result into the chatbot message queue.
 56 |         if len(results) >= 1:
 57 |             for result in results:
 58 |                 CHATBOT.append_message(
 59 |                     role='search result',
 60 |                     message=result.content
 61 |                 )
 62 |                 sources.append(result.url)
 63 | 
 64 |         # Resubmit the message with the new search result as context.
 65 |         response = CHATBOT.chat(message + '. Answer using the information from the search results above.')
 66 | 
 67 |         return {"status": "ok", "content": response, "sources": sources}
 68 |     except Exception as e:
 69 |         return {"status": "error", "message": e}
 70 | 
 71 | 
 72 | @APP.route('/reset-chatbot')
 73 | def route_reset_chatbot():
 74 |     if reset_chatbot():
 75 |         return jsonify({"status": "ok", "message": "ChatBot has been restarted."})
 76 |     else:
 77 |         return jsonify({"status": "error", "message": "ChatBot could not be restarted."})
 78 | 
 79 | 
 80 | @APP.route('/')
 81 | def route_index():
 82 | 
 83 |     if "reset" in request.args:
 84 |         if request.args['reset'] == 'true':
 85 |             reset_chatbot()
 86 | 
 87 |     return render_template('index.html')
 88 | 
 89 | 
 90 | def run(host="127.0.0.1", port=5000):
 91 |     """
 92 |     Launches a local web server for interfacing with PhaseLLM. This is meant to be for testing purposes only.
 93 |     """
 94 |     APP.run(host=host, port=port)
 95 | 
 96 | 
 97 | MAIN_HOST = "127.0.0.1"
 98 | MAIN_PORT = 8000
 99 | if __name__ == '__main__':
100 |     run(MAIN_HOST, MAIN_PORT)
101 | 


--------------------------------------------------------------------------------
/demos-and-products/web-search-chatbot/templates/index.html:
--------------------------------------------------------------------------------
  1 | <html>
  2 | <head>
  3 | <title>Web Search Chatbot</title>
  4 | 
  5 | <link rel="preconnect" href="https://fonts.googleapis.com">
  6 | <link rel="preconnect" href="https://fonts.gstatic.com" crossorigin>
  7 | <link href="https://fonts.googleapis.com/css2?family=Open+Sans:ital,wght@0,300;0,400;0,500;0,600;0,700;0,800;1,300;1,400;1,500;1,600;1,700;1,800&display=swap" rel="stylesheet">
  8 | 
  9 | <style>
 10 | * {
 11 |     font-family: 'Open Sans', sans-serif;
 12 |     box-sizing: border-box;
 13 | }
 14 | 
 15 | body {
 16 | 	margin:0;
 17 | 	padding:0;
 18 | 	overflow:hidden;
 19 | }
 20 | 
 21 | #chatstream {
 22 |     padding:20px;
 23 |     height:calc(100vh - 200px);
 24 |     margin:20px;
 25 |     border:1px solid lightgray;
 26 |     border-radius:10px;
 27 |     overflow-y:auto;
 28 | }
 29 | 
 30 | #chatinputdiv {
 31 |     padding:20px;
 32 |     height:150px;
 33 |     margin:20px;
 34 |     border:1px solid lightgray;
 35 |     border-radius:10px;
 36 | }
 37 | 
 38 | .interface-element {
 39 | 	font-size:15px;
 40 | 	font-family: 'Open Sans', sans-serif;
 41 | }
 42 | 
 43 | #chat-input {
 44 | 	padding:10px;
 45 | 	border-radius:8px;
 46 | 	border:1px solid rgb(230,230,230);
 47 | 	width:100%;
 48 | }
 49 | 
 50 | #chat-button {
 51 | 	padding:10px;
 52 | 	border-radius:8px;
 53 | 	border:1px solid rgb(230,230,230);
 54 | 	width:100%;
 55 | 	background:rgb(83, 145, 101);
 56 | 	color:rgb(248, 245, 228);
 57 | 	font-weight:800px;
 58 | 	margin-top:10px;
 59 | 	width:100px;
 60 | 	cursor:pointer;
 61 | }
 62 | 
 63 | .chat-msg-user {
 64 | 	background-color:rgb(63, 73, 127);
 65 | 	color:white;
 66 | 	border-radius:8px;
 67 | 	margin-left:50px;
 68 | 	padding:10px;
 69 | 	margin-top:10px;
 70 | }
 71 | 
 72 | .chat-msg-chatbot {
 73 | 	background-color:rgb(248, 245, 228);
 74 | 	border-radius:8px;
 75 | 	margin-right:50px;
 76 | 	padding:10px;
 77 | 	margin-top:10px;
 78 | }
 79 | 
 80 | </style>
 81 | 
 82 | </head>
 83 | <body>
 84 | 
 85 | <div id="chatstream">
 86 | 
 87 | </div>
 88 | 
 89 | 
 90 | <div id="chatinputdiv">
 91 |     <form>
 92 |         <input type="text" class="interface-element" id="chat-input" placeholder="Say something! 😀">
 93 |         <button type="button" class="interface-element" id="chat-button" onclick="javascript:send_message();">Submit</button>
 94 |     </form>
 95 | </div>
 96 | 
 97 | <script>
 98 | 
 99 | function append_message(msg, who, sources = null) {
100 | 	var output_div = document.getElementById("chatstream");
101 | 	let innerHTML = output_div.innerHTML;
102 |     innerHTML = innerHTML + `<div class="chat-msg-${who}">${msg}`;
103 | 	if (sources !== null && sources.length > 0) {
104 |         innerHTML = innerHTML + `</br></br><div>Source(s):</div>`;
105 | 	    for (const source of sources) {
106 |             innerHTML = innerHTML + `<div><a href="${source}" target="_blank">${source}</a></div>`;
107 |         }
108 | 	}
109 | 	innerHTML = innerHTML + `</div>`;
110 | 	output_div.innerHTML = innerHTML;
111 | }
112 | 
113 | function send_message() {
114 | 	var chat_message_box = document.getElementById("chat-input");
115 | 	var msg_to_send = chat_message_box.value + "";
116 | 	append_message(msg_to_send, "user");
117 | 	chat_message_box.value = "";
118 | 
119 |     var data = {"input":msg_to_send};
120 |     const response = fetch("submit-chat-message", {
121 |         method: "POST",
122 |         cache: "no-cache",
123 |         credentials: "same-origin",
124 |         headers: {"Content-Type": "application/json"},
125 |         body: JSON.stringify(data),
126 |     })
127 |     .then(response=>response.json())
128 |     .then(data=>{
129 | 		console.log(data);
130 | 		console.log(`STATUS: ${data["status"]}`);
131 | 		console.log(`CONTENT: ${data["content"]}`);
132 | 		console.log(`SOURCES: ${data["sources"]}`);
133 | 		var new_content = data["content"].replace(/(?:\r\n|\r|\n)/g, '<br>');
134 | 		append_message(new_content, "chatbot", data["sources"]);
135 |     })
136 | }
137 | 
138 | var element = document.getElementById('chat-input');
139 | element.addEventListener('keypress', function(e){
140 |   if (e.keyCode == 13) {
141 |     send_message();
142 | 	e.preventDefault();
143 |   }
144 | });
145 | 
146 | </script>
147 | 
148 | </body>
149 | </html>


--------------------------------------------------------------------------------
/docs/Makefile:
--------------------------------------------------------------------------------
 1 | # Minimal makefile for Sphinx documentation
 2 | #
 3 | 
 4 | # You can set these variables from the command line, and also
 5 | # from the environment for the first two.
 6 | SPHINXOPTS    ?=
 7 | SPHINXBUILD   ?= sphinx-build
 8 | SOURCEDIR     = source
 9 | BUILDDIR      = build
10 | 
11 | # Put it first so that "make" without argument is like "make help".
12 | help:
13 | 	@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
14 | 
15 | .PHONY: help Makefile
16 | 
17 | # Catch-all target: route all unknown targets to Sphinx using the new
18 | # "make mode" option.  $(O) is meant as a shortcut for $(SPHINXOPTS).
19 | %: Makefile
20 | 	@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
21 | 


--------------------------------------------------------------------------------
/docs/README.md:
--------------------------------------------------------------------------------
 1 | ### Docs Setup
 2 | 
 3 | 1) Install docs dependencies
 4 |     ```
 5 |     pip install -e .[docs]
 6 |     ```
 7 | 
 8 | 2) Run a local docs server
 9 |    ```
10 |    sphinx-autobuild docs/source/ docs/build/html
11 |    ```
12 | 
13 | ### Manual Build
14 | 
15 | ```
16 | cd docs
17 | make html
18 | ```
19 | 
20 | ### Helpful Tools
21 | 
22 | * Convert reStructuredText (.rst) to Markdown (.md)
23 |     ```
24 |     pip install rst-to-myst[sphinx]
25 |     rst2myst convert docs/**/*.rst
26 |     ```
27 | 
28 | ### Useful Resources
29 | 
30 | * Document Your Scientific Project With Markdown, Sphinx, and Read the Docs | PyData Global 2021
31 |   * https://www.sphinx-doc.org/en/master/usage/quickstart.html
32 |   * https://www.youtube.com/watch?v=qRSb299awB0


--------------------------------------------------------------------------------
/docs/make.bat:
--------------------------------------------------------------------------------
 1 | @ECHO OFF
 2 | 
 3 | pushd %~dp0
 4 | 
 5 | REM Command file for Sphinx documentation
 6 | 
 7 | if "%SPHINXBUILD%" == "" (
 8 | 	set SPHINXBUILD=sphinx-build
 9 | )
10 | set SOURCEDIR=source
11 | set BUILDDIR=build
12 | 
13 | %SPHINXBUILD% >NUL 2>NUL
14 | if errorlevel 9009 (
15 | 	echo.
16 | 	echo.The 'sphinx-build' command was not found. Make sure you have Sphinx
17 | 	echo.installed, then set the SPHINXBUILD environment variable to point
18 | 	echo.to the full path of the 'sphinx-build' executable. Alternatively you
19 | 	echo.may add the Sphinx directory to PATH.
20 | 	echo.
21 | 	echo.If you don't have Sphinx installed, grab it from
22 | 	echo.https://www.sphinx-doc.org/
23 | 	exit /b 1
24 | )
25 | 
26 | if "%1" == "" goto help
27 | 
28 | %SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
29 | goto end
30 | 
31 | :help
32 | %SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
33 | 
34 | :end
35 | popd
36 | 


--------------------------------------------------------------------------------
/docs/source/conf.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | 
 3 | sys.path.append('../../')
 4 | 
 5 | from project_metadata import NAME, VERSION, AUTHOR  # noqa: E402
 6 | 
 7 | # Configuration file for the Sphinx documentation builder.
 8 | #
 9 | # For the full list of built-in configuration values, see the documentation:
10 | # https://www.sphinx-doc.org/en/master/usage/configuration.html
11 | 
12 | # -- Project information -----------------------------------------------------
13 | # https://www.sphinx-doc.org/en/master/usage/configuration.html#project-information
14 | 
15 | project = NAME
16 | copyright = f'2023, {AUTHOR}'
17 | author = AUTHOR
18 | release = VERSION
19 | 
20 | # -- General configuration ---------------------------------------------------
21 | # https://www.sphinx-doc.org/en/master/usage/configuration.html#general-configuration
22 | 
23 | # Add paths to the Python source code.
24 | sys.path.append('../../phasellm')
25 | 
26 | # Allow markdown files to be used.
27 | extensions = [
28 |     'myst_parser',
29 |     'autoapi.extension',
30 |     'sphinx.ext.duration',
31 |     'sphinx.ext.autodoc',
32 |     'sphinx.ext.napoleon'
33 | ]
34 | 
35 | # Configure autoapi.
36 | autoapi_dirs = ['../../phasellm']
37 | autoapi_python_class_content = "init"
38 | 
39 | templates_path = ['_templates']
40 | exclude_patterns = []
41 | 
42 | # -- Options for HTML output -------------------------------------------------
43 | # https://www.sphinx-doc.org/en/master/usage/configuration.html#options-for-html-output
44 | 
45 | html_theme = 'furo'
46 | html_static_path = ['_static']
47 | 


--------------------------------------------------------------------------------
/docs/source/index.md:
--------------------------------------------------------------------------------
 1 | % Phasellm documentation master file, created by
 2 | % sphinx-quickstart on Tue Aug  8 15:42:56 2023.
 3 | % You can adapt this file completely to your liking, but it should at least
 4 | % contain the root `toctree` directive.
 5 | 
 6 | ```{include} ../../README.md
 7 | :relative-images:
 8 | ```
 9 | 
10 | ## Contents
11 | ```{toctree}
12 | :maxdepth: 2
13 | 
14 | ```
15 | 


--------------------------------------------------------------------------------
/phasellm/__init__.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Welcome to PhaseLLM!
 3 | 
 4 | We are a framework to help you build robust Large Language Model (LLM)-based apps. Please visit our site at phasellm.com for documents, tutorials, and more.
 5 | 
 6 | The module comes with the following submodules:
 7 | - agents: components that can execute specific tasks, such as downloading the latest news, executing code, sending an email, and more.
 8 | - eval: ways to evaluate LLM and app performance.
 9 | - exceptions: classes to track LLM-specific types of exceptions.
10 | - llms: helper classes for dealing with LLMs, including wrappers for popular models, chatbots, and more.
11 | 
12 | Have any questions? Reach out at hello (at) phaseai (dot) com
13 | """


--------------------------------------------------------------------------------
/phasellm/configurations_utils.py:
--------------------------------------------------------------------------------
 1 | import re
 2 | 
 3 | from warnings import warn
 4 | 
 5 | 
 6 | def coerce_azure_base_url(url: str) -> str:
 7 |     """
 8 |     This function coerces the base URL to the proper format for the Azure OpenAI API. This is used for backwards
 9 |     compatibility of base_url and api_base arguments.
10 |     Args:
11 |         url: The url to coerce.
12 | 
13 |     Returns:
14 |         The coerced URL.
15 | 
16 |     """
17 |     match = re.match(r'https:\/\/.*\.openai\.azure.com\/openai\/deployments\/.*', url)
18 |     if not match:
19 |         # Ensure proper format of the base URL.
20 |         res = re.search(r'https:\/\/.*\.openai\.azure.com(?!\/openai\/deployments\/)', url)
21 |         if res.group():
22 |             # see https://github.com/openai/openai-python/blob/v1/examples/azure.py
23 |             warn('The base_url argument must be in the format:'
24 |                  'https://{resource}.openai.azure.com/openai/deployments/{model}\n'
25 |                  'Attempting to coerce base_url to the proper format.')
26 |             url = f"{url[:res.end()]}/openai/deployments{url[res.end():]}"
27 |             warn(f'Coerced url to: {url}')
28 |             return url
29 |     return url
30 | 


--------------------------------------------------------------------------------
/phasellm/eval.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Support for LLM evaluation.
  3 | """
  4 | 
  5 | from typing import Optional, List
  6 | 
  7 | from .llms import OpenAIGPTWrapper, ChatBot
  8 | 
  9 | import pandas as pd
 10 | 
 11 | import random
 12 | 
 13 | 
 14 | def simulate_n_chat_simulations(chatbot: ChatBot, n: int, out_path_excel: Optional[str] = None) -> List[str]:
 15 |     """
 16 |     Reruns a chat message n times, returning a list of responses. Note that this will query an external API n times, so
 17 |     please be careful with costs.
 18 | 
 19 |     Args:
 20 |         chatbot: the chat sequence to rerun. The last message will be resent.
 21 |         n: number of times to run the simulation.
 22 |         out_path_excel: if provides, the output will also be written to an Excel file.
 23 | 
 24 |     Returns:
 25 |         A list of messages representing the responses in the chat.
 26 | 
 27 |     """
 28 | 
 29 |     original_chat_messages = chatbot.messages.copy()
 30 |     responses = []
 31 | 
 32 |     for i in range(0, n):
 33 |         r = chatbot.resend()
 34 |         responses.append(r)
 35 |         chatbot.messages = original_chat_messages.copy()
 36 | 
 37 |     if out_path_excel:
 38 |         df = pd.DataFrame({'responses': responses})
 39 |         df.to_excel(out_path_excel, sheet_name='responses', index=False)
 40 | 
 41 |     return responses
 42 | 
 43 | 
 44 | class BinaryPreference:
 45 | 
 46 |     def __init__(self, prompt: str, prompt_vars: str, response1: str, response2: str):
 47 |         """
 48 |         Tracks a prompt, prompt variables, responses, and the calculated preference.
 49 | 
 50 |         Args:
 51 |             prompt: The prompt
 52 |             prompt_vars: The variables to use in the prompt.
 53 |             response1: The first response.
 54 |             response2: The second response.
 55 | 
 56 |         """
 57 |         self.prompt = prompt
 58 |         self.prompt_vars = prompt_vars
 59 |         self.response1 = response1
 60 |         self.response2 = response2
 61 |         self.preference = -1
 62 | 
 63 |     def __repr__(self):
 64 |         return "<BinaryPreference>"
 65 | 
 66 |     def set_preference(self, pref):
 67 |         """
 68 |         Set the preference of the class.
 69 |         """
 70 |         self.preference = pref
 71 | 
 72 |     def get_preference(self):
 73 |         """
 74 |         Get the preference of the class.
 75 |         """
 76 |         return self.preference
 77 | 
 78 | 
 79 | class EvaluationStream:
 80 | 
 81 |     def __init__(self, objective, prompt, models):
 82 |         """
 83 |         Tracks human evaluation on the command line and records results.
 84 | 
 85 |         Args:
 86 |             objective: what you are trying to do.
 87 |             prompt: the prompt you are using. Could be a summary thereof, too. We do not actively use this prompt in
 88 |                 generating data for evaluation.
 89 |             models: an array of two models. These can be referenced later if need be, but are not necessary for running
 90 |                 the evaluation workflow.
 91 | 
 92 |         """
 93 |         self.models = models
 94 |         self.objective = objective
 95 |         self.prompt = prompt
 96 |         self.objective = objective
 97 |         self.evaluator = HumanEvaluatorCommandLine()
 98 |         self.prefs = [0] * len(models)  # This will be a simple counter for now.
 99 | 
100 |     def __repr__(self):
101 |         return f"<EvaluationStream>"
102 | 
103 |     def evaluate(self, response1, response2):
104 |         """
105 |         Shows both sets of options for review and tracks the result.
106 |         """
107 |         pref = self.evaluator.choose(self.objective, self.prompt, response1, response2)
108 |         self.prefs[pref - 1] += 1
109 | 
110 | 
111 | class HumanEvaluatorCommandLine():
112 | 
113 |     def __init__(self):
114 |         """
115 |         Presents an objective, prompt, and two potential responses and has a human choose between the two.
116 |         """
117 |         pass
118 | 
119 |     def __repr__(self):
120 |         return "<HumanEvaluatorCommandLine>"
121 | 
122 |     def choose(self, objective, prompt, response1, response2):
123 |         response_map = {"A": 1, "B": 2}
124 |         response_a = response1
125 |         response_b = response2
126 |         if random.random() <= 0.5:
127 |             response_map = {"A": 2, "B": 1}
128 |             response_a = response2
129 |             response_b = response1
130 | 
131 |         output_string = f"""OBJECTIVE: {objective}
132 | 
133 | PROMPT: {prompt}
134 | 
135 | --------------------        
136 | RESPONSE 'A':
137 | {response_a}
138 | 
139 | --------------------
140 | RESPONSE 'B':
141 | {response_b}
142 | 
143 | --------------------
144 |         """
145 | 
146 |         print(output_string)
147 |         user_input = ""
148 |         user_input = input()
149 |         if user_input not in ["A", "B"]:
150 |             print("Please put in 'A' or 'B' to tell us which is the better response.")
151 |             user_input = input()
152 | 
153 |         return response_map[user_input]
154 | 
155 | 
156 | class GPTEvaluator:
157 | 
158 |     def __init__(self, apikey, model="gpt-3.5-turbo"):
159 |         """
160 |         Passes two model outputs to GPT-3.5 or GPT-4 and has it decide which is the better output.
161 | 
162 |         Args:
163 |             apikey: the OpenAI API key.
164 |             model: the model to use. Defaults to GPT-3.5 Turbo.
165 |         """
166 |         self.model = OpenAIGPTWrapper(apikey, model=model)
167 | 
168 |     def __repr__(self):
169 |         return f"GPT35Evaluator()"
170 | 
171 |     def choose(self, objective, prompt, response1, response2):
172 |         """
173 |         Presents the objective of the evaluation task, a prompt, and then two responses. GPT-3.5/GPT-4 chooses the
174 |         preference.
175 |         Args:
176 |             objective: the objective of the modeling task.
177 |             prompt: the prompt to use.
178 |             response1: the first response.
179 |             response2: the second response.
180 | 
181 |         Returns:
182 |             1 if response1 is preferred, 2 if response2 is preferred.
183 | 
184 |         """
185 | 
186 |         response_map = {"A": 1, "B": 2}
187 |         response_a = response1
188 |         response_b = response2
189 |         if random.random() <= 0.5:
190 |             response_map = {"A": 2, "B": 1}
191 |             response_a = response2
192 |             response_b = response1
193 | 
194 |         prompt = f"""We would like your feedback on a large language model we are building. Specifically, we would like you to compare two different LLM responses and let us know which one is better.
195 | 
196 | Our objective for the LLM is:
197 | {objective}
198 | 
199 | The prompt we are using for the LLM is:
200 | {prompt}
201 | 
202 | Here are the two pieces of generated text.
203 | 
204 | A: `{response_a}`
205 | 
206 | B: `{response_b}`
207 | 
208 | Please simply respond 'A' or 'B' as to which of the texts above address our earlier objective more effectively. Do not add any additional explanations, thoughts, punctuation, or anything; simply write 'A' or 'B'."""
209 | 
210 |         messages = [
211 |             {"role": "system",
212 |              "content": "You are an AI assistant helping with prompt engineering and model evaluation."},
213 |             {"role": "user", "content": prompt},
214 |         ]
215 | 
216 |         response = self.model.complete_chat(messages, ['\n'])
217 | 
218 |         # ChatGPT has a knack for adding "." to the end of the reply.
219 |         if len(response) == 2:
220 |             response = response[0]
221 | 
222 |         choice = response_map[response]
223 | 
224 |         return choice
225 | 


--------------------------------------------------------------------------------
/phasellm/exceptions.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Exception classes and tests for prompts, LLMs, and workflows.
  3 | """
  4 | 
  5 | from typing import List
  6 | 
  7 | from phasellm.llms import ChatPrompt
  8 | 
  9 | 
 10 | def isAcceptableLLMResponse(response_given, acceptable_options) -> bool:
 11 |     """
 12 |     Tests to confirm the response_given is in the list of acceptable_options. acceptable_options can also be a single
 13 |     string.
 14 | 
 15 |     Args:
 16 |         response_given: The response given by the LLM.
 17 |         acceptable_options: The acceptable options.
 18 | 
 19 |     Returns:
 20 |         True if the response is 'acceptable', otherwise throws an LLMResponseException.
 21 |     """
 22 | 
 23 |     compare_to = None
 24 |     if isinstance(acceptable_options, str):
 25 |         compare_to = [acceptable_options]
 26 |     elif isinstance(acceptable_options, list):
 27 |         compare_to = acceptable_options
 28 | 
 29 |     if compare_to is None:
 30 |         raise Exception("testLLMResponse() only accepts a list or string object for acceptable_options.")
 31 | 
 32 |     if response_given not in acceptable_options:
 33 |         raise LLMResponseException(response_given, compare_to)
 34 | 
 35 |     return True
 36 | 
 37 | 
 38 | def isLLMCodeExecutable(llm_code: str) -> bool:
 39 |     """
 40 |     Runs code and checks if any errors occur. Returns True if there are no errors.
 41 | 
 42 |     Args:
 43 |         llm_code: The code to run.
 44 | 
 45 |     Returns:
 46 |         True if the code is executable, otherwise throws an LLMCodeException.
 47 | 
 48 |     """
 49 |     try:
 50 |         exec(llm_code)
 51 |     except Exception as e:
 52 |         raise LLMCodeException(llm_code, e)
 53 | 
 54 |     return True
 55 | 
 56 | 
 57 | def isProperlyStructuredChat(messages, force_roles=False) -> bool:
 58 |     """
 59 |     Checks if messages are an array of dicts with (role, content) keys.
 60 | 
 61 |     force_roles=True also confirms we only have roles of "system", "user", and "assistant" to abide by OpenAI's API.
 62 | 
 63 |     Args:
 64 |         messages: The messages to check.
 65 |         force_roles: If True, checks that the roles are "system", "user", and "assistant".
 66 | 
 67 |     Returns:
 68 |         True if the messages are properly structured, otherwise False.
 69 | 
 70 |     """
 71 | 
 72 |     for m in messages:
 73 |         keys = m.keys()
 74 |         if not (len(keys) == 2 and "role" in keys and "content" in keys):
 75 |             return False
 76 |         if force_roles:
 77 |             role = m["role"]
 78 |             if role not in ["system", "user", "assistant"]:
 79 |                 return False
 80 |     return True
 81 | 
 82 | 
 83 | def reviewOutputWithLLM(text, requirements, llm):
 84 |     """
 85 |     Has an LLM review an output and determines whether the output is OK or not.
 86 |     Args:
 87 |         text: The text to review.
 88 |         requirements: The requirements to review against.
 89 |         llm: The LLM to use for the review.
 90 | 
 91 |     Returns:
 92 |         True if the text meets the requirements, otherwise throws an LLMReviewException.
 93 | 
 94 |     """
 95 |     prompt = ChatPrompt(
 96 |         [{"role": "system",
 97 |           "content": "Follow the user's instructions exactly, and only respond with YES or NO (with additional info)."},
 98 |          {"role": "user",
 99 |           "content": "I'm working with a large language model and hope you can confirm if the following text abides by "
100 |                      "a set of requirements I've provided. Here is the text:\n-----\n{output}\n-----\n\nBelow are the "
101 |                      "requirements the text above is supposed to meet.\n\n-----\n{requirements}\n-----\n\nDoes the "
102 |                      "text meet the requirements? Please only answer YES or NO. If NO, you can provide additional "
103 |                      "information on what the text is missing."}
104 |          ])
105 | 
106 |     result = llm.complete_chat(prompt.fill(text=text, requirements=requirements))
107 |     if result == "YES":
108 |         return True
109 |     else:
110 |         raise LLMReviewException(result)
111 | 
112 | 
113 | class LLMReviewException(Exception):
114 | 
115 |     def __init__(self, message):
116 |         """
117 |         Exception that gets thrown when an LLM review does not meet requirements.
118 | 
119 |         Args:
120 |             message: The error message
121 | 
122 |         """
123 |         super().__init__("LLM Review Exception: text does not meet requirements.\nInfo: " + message)
124 |         self.message = message
125 | 
126 |     def __repr__(self):
127 |         return "LLM Review Exception: text does not meet requirements.\nInfo: " + self.message
128 | 
129 | 
130 | class ChatStructureException(Exception):
131 | 
132 |     def __init__(self):
133 |         """
134 |         Exception that gets thrown when a chat structure isn't correct (i.e., role, content pairs are not pairs)
135 |         """
136 |         super().__init__("Chat Structure Exception: chat messages are not following the proper chat structure.")
137 | 
138 |     def __repr__(self):
139 |         return f"Chat Structure Exception: chat messages are not following the proper chat structure."
140 | 
141 | 
142 | class LLMCodeException(Exception):
143 | 
144 |     def __init__(self, code, exc):
145 |         """
146 |         Exception to track exceptions from code generated by LLMs.
147 | 
148 |         Args:
149 |             code: The code that is raising an error.
150 |             exc: The exception that is being raised.
151 | 
152 |         """
153 |         super().__init__("LLM Code Exception: code is raising an error.")
154 |         self.code = code
155 |         self.exception = exc
156 |         self.exception_string = str(exc)
157 | 
158 |     def __repr__(self):
159 |         return f"LLM Code Exception: code is raising an error."
160 | 
161 | 
162 | class LLMResponseException(Exception):
163 | 
164 |     def __init__(self, response_given: str, acceptable_options: List[str]):
165 |         """
166 |         Exception to track acceptable responses from an LLM.
167 | 
168 |         Args:
169 |             response_given: The response given by the LLM.
170 |             acceptable_options: The acceptable options for the LLM.
171 | 
172 |         """
173 |         super().__init__("LLM Response Exception: response given is not in the list of acceptable options.")
174 |         self.response_given = response_given
175 |         self.acceptable_options = acceptable_options
176 | 
177 |     def __repr__(self):
178 |         return f"LLM Response Exception: response given is not in the list of acceptable options."
179 | 


--------------------------------------------------------------------------------
/phasellm/html.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Support for convering LLM-related classes and objects to HTML and various outputs.
  3 | """
  4 | 
  5 | import re
  6 | 
  7 | # Easier to have this variable than to escape all the "{" and "}" later.
  8 | style = """
  9 | .phasellm_chatbot_stream {
 10 |     margin:5px;
 11 |     box-sizing:content-box;
 12 |     padding:8px;
 13 |     border-radius:8px;
 14 |     border:1px solid black;
 15 |     display:inline-block;
 16 | }
 17 | .phasellm_chatbot_stream .response_container {
 18 |     display:block;
 19 |     display:block;
 20 |     margin:5px;
 21 |     padding:8px;
 22 | }
 23 | 
 24 | .content_user {
 25 |     background-color:green;
 26 |     color:white;
 27 | }
 28 | 
 29 | .content_system {
 30 |     color:gray;
 31 |     background-color:lightgray;
 32 |     font-style:italic;
 33 | }
 34 | 
 35 | .content_assistant {
 36 |     color:white;
 37 |     background-color:crimson;
 38 | }
 39 | 
 40 | .response {
 41 |     padding:8px;
 42 |     border-radius:8px;  
 43 | }
 44 | 
 45 | .phasellm_chatbot_stream .timestamp {
 46 |     margin:5px 5px 5px 15px;
 47 |     font-size:70%;
 48 |     color:gray;
 49 |     font-style:italic;
 50 |     display:inline-block;
 51 | }
 52 | 
 53 | .phasellm_chatbot_stream .time_taken {
 54 |     margin:5px 5px 5px 15px;
 55 |     font-size:70%;
 56 |     color:gray;
 57 |     font-style:italic;
 58 |     display:inline-block;
 59 | }
 60 | 
 61 | .legend {
 62 |     font-size:70%;
 63 |     text-align:right;
 64 |     padding-right:15px;
 65 | }
 66 | 
 67 | .legend_box {
 68 |     width:10px;
 69 |     height:10px;
 70 |     display:inline-block;
 71 |     position:relative;
 72 |     top:2px;
 73 |     margin-left:8px;
 74 |     margin-right:2px;
 75 | }
 76 | """
 77 | 
 78 | 
 79 | def _formatContentToHtml(string) -> str:
 80 |     """
 81 |     Converts a String into an HTML-friendly representation.
 82 | 
 83 |     Args:
 84 |         string: The string to convert.
 85 | 
 86 |     Returns:
 87 |         The HTML formatted string.
 88 | 
 89 |     """
 90 |     new_string = re.sub("<", "&lt;", string)
 91 |     new_string = re.sub(">", "&gt;", new_string)
 92 |     new_string = re.sub("[\r\n]+", "<br>", new_string)
 93 |     return new_string
 94 | 
 95 | 
 96 | def toHtmlFile(html, filepath) -> None:
 97 |     """
 98 |     Takes a html object generated by PhaseLLM and saves it to an HTML file.
 99 | 
100 |     Args:
101 |         html: The HTML object to save.
102 |         filepath: The path to save the HTML file to.
103 | 
104 |     """
105 | 
106 |     html_content = f"""
107 | <!doctype html>
108 | <html lang="en">
109 | <head>
110 | <style>
111 | {style}
112 | </style>
113 | </head>
114 | <body>
115 | {html}
116 | </body>
117 | </html>
118 |     """
119 |     with open(filepath, "w") as w:
120 |         w.write(html_content)
121 | 
122 | 
123 | def chatbotToJson(chatbot, order_field=None) -> str:
124 |     """
125 |     Converts a chatbot's message stack to a JSON array. Optionally, add an order_field key to save the order of the array itself.
126 | 
127 |     Args:
128 |         chatbot: The ChatBot object whose message stack we want.
129 |         order_field: Optional key to include the array order value into the dictionary.
130 | 
131 |     Returns:
132 |         The JSON dictionary representing the mesages from the chatbot object.
133 |     """
134 | 
135 |     messages = chatbot.messages
136 |     json_to_return = []
137 |     ctr = 0
138 | 
139 |     for m in messages:
140 |         new_m = m.copy()
141 |         if order_field is not None:
142 |             new_m[order_field] = ctr
143 |             ctr += 1
144 |         json_to_return.append(new_m)
145 | 
146 |     return json_to_return
147 | 
148 | 
149 | def chatbotToHtml(chatbot) -> str:
150 |     """
151 |     Converts a chatbot's message stack to HTML.
152 | 
153 |     Args:
154 |         chatbot: The chatbot to convert.
155 | 
156 |     Returns:
157 |         The HTML representation of the chatbot message stack.
158 | 
159 |     """
160 | 
161 |     chatbot_html = """<div class='phasellm_chatbot_stream'>
162 | <div class="legend">
163 |     <b>Legend</b><div class="legend_box content_system">&nbsp;</div> System <div class="legend_box content_assistant">&nbsp;</div> Assistant <div class="legend_box content_user">&nbsp;</div> User
164 | </div>"""
165 | 
166 |     messages = chatbot.messages
167 |     for m in messages:
168 |         m_timestamp = ""
169 |         if "timestamp_utc" in m:
170 |             m_timestamp = m["timestamp_utc"].strftime("%d %B %Y at %H:%M:%S")
171 | 
172 |         m_log_time_seconds_string = ""
173 |         if "log_time_seconds" in m:
174 |             m_log_time_seconds_string = f"""<div class='time_taken'>({str(round(m['log_time_seconds'], 3))} seconds)</div>"""
175 | 
176 |         response_html = f"""
177 | <div class='response_container'>
178 |     <div class='response content_{m['role']}'>{_formatContentToHtml(m['content'])}</div>
179 |     <div class='timestamp'>{m_timestamp}</div>
180 |     {m_log_time_seconds_string}
181 | </div>
182 | """
183 | 
184 |         chatbot_html += response_html
185 | 
186 |     chatbot_html += "\n</div>"
187 | 
188 |     return chatbot_html
189 | 


--------------------------------------------------------------------------------
/phasellm/llms_utils.py:
--------------------------------------------------------------------------------
 1 | def extract_vertex_ai_kwargs(kwargs: dict) -> dict:
 2 |     """
 3 |     Extracts the Vertex AI kwargs from the kwargs dictionary.
 4 |     Args:
 5 |         kwargs: The kwargs dictionary.
 6 | 
 7 |     Returns:
 8 |         The Vertex AI kwargs.
 9 | 
10 |     """
11 | 
12 |     return {
13 |         'max_output_tokens': kwargs['max_output_tokens'] if 'max_output_tokens' in kwargs else None,
14 |         'candidate_count': kwargs['candidate_count'] if 'candidate_count' in kwargs else None,
15 |         'top_p': kwargs['top_p'] if 'top_p' in kwargs else None,
16 |         'top_k': kwargs['top_k'] if 'top_k' in kwargs else None,
17 |         'logprobs': kwargs['logprobs'] if 'logprobs' in kwargs else None,
18 |         'presence_penalty': kwargs['presence_penalty'] if 'presence_penalty' in kwargs else None,
19 |         'frequency_penalty': kwargs['frequency_penalty'] if 'frequency_penalty' in kwargs else None,
20 |         'logit_bias': kwargs['logit_bias'] if 'logit_bias' in kwargs else None
21 |     }
22 | 
23 | 
24 | def extract_vertex_ai_response_metadata(response) -> dict:
25 |     last_response_header = {}
26 |     if hasattr(response, '_raw_response'):
27 |         last_response_header = {
28 |             **last_response_header,
29 |             **response._raw_response.PromptFeedback.to_dict(response._raw_response.prompt_feedback),
30 |             **response._raw_response.UsageMetadata.to_dict(response._raw_response.usage_metadata)
31 |         }
32 |     if hasattr(response, '_prediction_response'):
33 |         last_response_header = {
34 |             **last_response_header,
35 |             **response._prediction_response.metadata
36 |         }
37 |     if hasattr(response, 'safety_attributes'):
38 |         last_response_header = {
39 |             **last_response_header,
40 |             **response.safety_attributes
41 |         }
42 |     return last_response_header
43 | 


--------------------------------------------------------------------------------
/phasellm/logging.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Logging support. This allows you to use the phasellm library to send chats to evals.phasellm.com and review them via our hosted front-end.
  3 | """
  4 | 
  5 | import requests
  6 | import json
  7 | 
  8 | from typing import List, Optional
  9 | 
 10 | from .llms import Message, ChatBot
 11 | 
 12 | import hashlib
 13 | import os
 14 | 
 15 | _PHASELLM_EVALS_BASE_URL = "https://evals.phasellm.com/api/v0.1"
 16 | 
 17 | 
 18 | class FileLogger:
 19 |     """
 20 |     This logger will save chats to disk. It will export chats to a flat TXT file.
 21 |     """
 22 | 
 23 |     def __init__(self, folder_path: str, separator: str = "\n\n-----------------\n\n"):
 24 |         """
 25 |         Args:
 26 |             folder_path: The path to the folder where the logs will be saved.
 27 |             separator: The separator between messages in the log file.
 28 |         """
 29 |         self.folder_path = folder_path
 30 |         if not os.path.exists(folder_path):
 31 |             os.makedirs(folder_path)
 32 |         self.separator = separator
 33 | 
 34 |     def log(
 35 |         self,
 36 |         messages: List[Message],
 37 |         chat_id: Optional[int] = None,
 38 |         title: Optional[str] = None,
 39 |         source_id: Optional[str] = None,
 40 |         file_name: str = None,
 41 |     ) -> str:
 42 |         """
 43 |         Saves or updates the relevant chat to a folder.
 44 | 
 45 |         Args:
 46 |             messages: The messages array from the chat.
 47 |             chat_id: Optional chat ID. If you provide a chat ID from an earlier log event, the messages will overwrite the original chat. This should be used for updating conversations rather than replacing them.
 48 |             title: Optional title for the chat.
 49 |             source_id: Optional String representing an ID for the chat. This is to enable easier referencing of chats for end users and is not used by PhaseLLM Evals.
 50 |             file_name: Optional String for what to call the file. Otherwise will use chat_id. If chat_id is not given, then will use an MD5 sum of the content.
 51 | 
 52 |         Returns:
 53 |             The chat_id associated with the chat.
 54 |         """
 55 | 
 56 |         file_content = ""
 57 |         for ctr, m in enumerate(messages):
 58 |             file_content += f"ROLE: {m['role']}\n{m['content']}\n\n"
 59 |             if ctr < len(messages) - 1:
 60 |                 file_content += self.separator
 61 | 
 62 |         if file_name is None:
 63 |             if chat_id is not None:
 64 |                 file_name = chat_id + ".txt"
 65 |             else:
 66 |                 chat_id = hashlib.md5(file_content.encode()).hexdigest()
 67 |                 file_name = chat_id + ".txt"
 68 | 
 69 |         with open(self.folder_path + "/" + file_name, "w") as f:
 70 |             f.write(file_content)
 71 | 
 72 |         return chat_id
 73 | 
 74 |     def logChatBot(
 75 |         self,
 76 |         chatbot: ChatBot,
 77 |         chat_id: Optional[int] = None,
 78 |         title: Optional[str] = None,
 79 |         source_id: Optional[str] = None,
 80 |         file_name: str = None,
 81 |     ) -> str:
 82 |         """
 83 |         Logs the message stack for a chatbot to a folder.
 84 | 
 85 |         Args:
 86 |             chatbot: The chatbot object to log.
 87 |             chat_id: Optional chat ID. If you provide a chat ID from an earlier log event, the messages will overwrite the original chat. This should be used for updating conversations rather than replacing them.
 88 |             title: Optional title for the chat.
 89 |             source_id: Optional String representing an ID for the chat. This is to enable easier referencing of chats for end users and is not used by PhaseLLM Evals.
 90 |             file_name: Optional String for what to call the file. Otherwise will use chat_id. If chat_id is not given, then will use an MD5 sum of the content.
 91 | 
 92 |         Returns:
 93 |             The chat_id associated with the chat.
 94 |         """
 95 | 
 96 |         message_array = []
 97 |         for m in chatbot.messages:
 98 |             new_m = {"role": m["role"], "content": m["content"]}
 99 |             message_array.append(new_m)
100 |         return self.log(message_array, chat_id, title, source_id, file_name)
101 | 
102 | 
103 | class PhaseLogger:
104 | 
105 |     def __init__(
106 |         self,
107 |         apikey: str,
108 |     ):
109 |         """
110 |         Helper class for logging chats to evals.phasellm.com.
111 | 
112 |         Args:
113 |             apikey: The API key associated with your evals.phasellm.com account.
114 |         """
115 |         super().__init__()
116 |         self.apikey = apikey
117 | 
118 |     def log(
119 |         self,
120 |         messages: List[Message],
121 |         chat_id: Optional[int] = None,
122 |         title: Optional[str] = None,
123 |         source_id: Optional[str] = None,
124 |     ) -> int:
125 |         """
126 |         Saves or updates the relevant chat at evals.phasellm.com
127 | 
128 |         Args:
129 |             messages: The messages array from the chat.
130 |             chat_id: Optional chat ID. If you provide a chat ID from an earlier log event, the messages will overwrite the original chat. This should be used for updating conversations rather than replacing them.
131 |             title: Optional title for the chat.
132 |             source_id: Optional String representing an ID for the chat. This is to enable easier referencing of chats for end users and is not used by PhaseLLM Evals.
133 | 
134 |         Returns:
135 |             The chat_id associated with the chat.
136 |         """
137 | 
138 |         save_url = _PHASELLM_EVALS_BASE_URL + "/save_chat"
139 |         headers = {
140 |             "Authorization": f"Bearer {self.apikey}",
141 |             "Content-Type": "application/json",
142 |         }
143 |         payload = {"messages": messages}
144 |         if chat_id is not None:
145 |             payload["chat_id"] = chat_id
146 | 
147 |         if title is not None:
148 |             payload["title"] = title
149 | 
150 |         if source_id is not None:
151 |             payload["source_id"] = source_id
152 | 
153 |         response = requests.post(save_url, json=payload, headers=headers)
154 |         data = json.loads(response.text)
155 |         if data["status"] == "error":
156 |             raise Exception(f"PhaseLLM Evals: an error occured. {data['message']}")
157 | 
158 |         return data["chat_id"]
159 | 
160 |     def logChatBot(
161 |         self,
162 |         chatbot: ChatBot,
163 |         chat_id: Optional[int] = None,
164 |         title: Optional[str] = None,
165 |         source_id: Optional[str] = None,
166 |     ) -> int:
167 |         """
168 |         Logs the message stack for a chatbot to evals.phasellm.com.
169 | 
170 |         Args:
171 |             chatbot: The chatbot object to log.
172 |             chat_id: Optional chat ID. If you provide a chat ID from an earlier log event, the messages will overwrite the original chat. This should be used for updating conversations rather than replacing them.
173 |             title: Optional title for the chat.
174 |             source_id: Optional String representing an ID for the chat. This is to enable easier referencing of chats for end users and is not used by PhaseLLM Evals.
175 | 
176 |         Returns:
177 |             The chat_id associated with the chat.
178 |         """
179 |         message_array = []
180 |         for m in chatbot.messages:
181 |             new_m = {"role": m["role"], "content": m["content"]}
182 |             message_array.append(new_m)
183 |         return self.log(message_array, chat_id, title, source_id)
184 | 


--------------------------------------------------------------------------------
/phasellm/types.py:
--------------------------------------------------------------------------------
 1 | from phasellm.configurations import OpenAIConfiguration, AzureAPIConfiguration, AzureActiveDirectoryConfiguration, \
 2 |     VertexAIConfiguration
 3 | 
 4 | from typing import Union, Literal
 5 | 
 6 | CLAUDE_MODEL = Union[
 7 |     str,
 8 |     Literal["claude-v1"],
 9 |     Literal["claude-instant-1"],
10 |     Literal["claude-instant-1.1"],
11 |     Literal["claude-2"],
12 |     Literal["claude-2.0"],
13 | ]
14 | 
15 | OPENAI_API_CONFIG = Union[
16 |     OpenAIConfiguration,
17 |     AzureAPIConfiguration,
18 |     AzureActiveDirectoryConfiguration
19 | ]
20 | 
21 | VERTEXAI_API_CONFIG = VertexAIConfiguration
22 | 


--------------------------------------------------------------------------------
/project_metadata.py:
--------------------------------------------------------------------------------
 1 | NAME = "phasellm"
 2 | 
 3 | AUTHOR = "Wojciech Gryc"
 4 | 
 5 | VERSION = "0.0.25"
 6 | 
 7 | DESCRIPTION = (
 8 |     "Wrappers for common large language models (LLMs) with support for evaluation."
 9 | )
10 | 
11 | LONG_DESCRIPTION = (
12 |     "PhaseLLM provides wrappers for common large language models and use cases. This makes it easy to "
13 |     "swap models in and out as needed. We also provide support for evaluation of models so you can "
14 |     "choose which models are better to use."
15 | )
16 | 


--------------------------------------------------------------------------------
/readthedocs.yaml:
--------------------------------------------------------------------------------
 1 | version: 2
 2 | 
 3 | build:
 4 |   os: "ubuntu-20.04"
 5 |   tools:
 6 |     python: "3.8"
 7 | 
 8 | sphinx:
 9 |   configuration: docs/source/conf.py
10 | 
11 | python:
12 |   install:
13 |     - method: pip
14 |       path: .
15 |       extra_requirements:
16 |         - docs


--------------------------------------------------------------------------------
/release_checklist.md:
--------------------------------------------------------------------------------
 1 | # Release Checklist
 2 | 
 3 | This checklist is used prior to a new release, to ensure everything works properly and that we have a high quality release.
 4 | 
 5 | - [ ] Update version #
 6 | - [ ] Do a local package install, ensuring all tests run properly
 7 | - [ ] Publish final version to PyPI
 8 | - [ ] Publish release in GitHub
 9 | - [ ] Tweet about it
10 | - [ ] Update *Change Log* on site 
11 | 


--------------------------------------------------------------------------------
/requirements-dev.txt:
--------------------------------------------------------------------------------
1 | twine>=4.0.2
2 | wheel>=0.41.3


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | Flask>=2.0.0
 2 | requests>=2.24.0
 3 | httpx>=0.25.0
 4 | openai>=1.1.0
 5 | cohere>=4.0.0
 6 | transformers>=4.25.0
 7 | accelerate>=0.16.0
 8 | torch>=1.0.0
 9 | python-dotenv
10 | typing-extensions>=4.6.3
11 | urllib3==2.0.7
12 | sseclient-py>=1.7.2
13 | docker>=6.1.3
14 | pandas>=2.0.0
15 | openpyxl>=3.1.0
16 | beautifulsoup4>=4.12.2
17 | lxml>=4.9.2
18 | fake-useragent>=1.2.1
19 | playwright>=1.35.0
20 | feedparser>=6.0.10
21 | azure-identity>=1.14.0
22 | replicate==0.20.0
23 | google-cloud-aiplatform>=1.42.1
24 | anthropic>=0.30.1


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | from setuptools import setup, find_packages
 2 | 
 3 | from project_metadata import NAME, VERSION, AUTHOR, DESCRIPTION, LONG_DESCRIPTION
 4 | 
 5 | setup(
 6 |     name=NAME,
 7 |     version=VERSION,
 8 |     description=DESCRIPTION,
 9 |     long_description=LONG_DESCRIPTION,
10 |     author=AUTHOR,
11 |     author_email="hello@phaseai.com",
12 |     license="MIT",
13 |     packages=find_packages(),
14 |     install_requires=[
15 |         "Flask>=2.0.0",
16 |         "requests>=2.24.0",
17 |         "httpx>=0.25.0",
18 |         "openai>=1.1.0",
19 |         "cohere>=4.0.0",
20 |         "python-dotenv",
21 |         "pandas>=2.0.0",
22 |         "openpyxl>=3.1.0",
23 |         "typing-extensions>=4.6.3",
24 |         "urllib3==2.0.7",
25 |         "sseclient-py>=1.7.2",
26 |         "docker>=6.1.3",
27 |         "beautifulsoup4>=4.12.2",
28 |         "lxml>=4.9.2",
29 |         "fake-useragent>=1.2.1",
30 |         "playwright>=1.35.0",
31 |         "feedparser>=6.0.10",
32 |         "azure-identity>=1.14.0",
33 |         "replicate==0.20.0",
34 |         "google-cloud-aiplatform>=1.42.1",
35 |         "anthropic>=0.30.1",
36 |     ],
37 |     extras_require={
38 |         "complete": [
39 |             "transformers>=4.25.0",
40 |             "accelerate>=0.16.0",
41 |             "torch>=1.0.0",
42 |         ],
43 |         "docs": [
44 |             "furo",
45 |             "sphinx>=7.1.2",
46 |             "myst_parser>=2.0.0",
47 |             "sphinx-autoapi>=2.1.1",
48 |             "sphinx-autobuild>=2021.3.14",
49 |         ],
50 |     },
51 |     python_requires=">=3.8.0",
52 |     keywords="llm, nlp, evaluation, ai",
53 |     classifiers=[
54 |         "Development Status :: 3 - Alpha",
55 |         "Intended Audience :: Developers",
56 |         "License :: OSI Approved :: MIT License",
57 |         "Programming Language :: Python :: 3",
58 |     ],
59 | )
60 | 


--------------------------------------------------------------------------------
/tests-non-deterministic/README.md:
--------------------------------------------------------------------------------
1 | ### Non Deterministic Tests
2 | 
3 | These tests are non-deterministic in nature, so they should only be run and reviewed by a human.
4 | 
5 | **Do not include these tests in an automated CI pipeline, or you may experience transient 
6 | failures**
7 | 
8 | Note: we may be able to integrate these into CI if we set them up with retries and acceptable
9 | success rate thresholds.


--------------------------------------------------------------------------------
/tests-non-deterministic/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wgryc/phasellm/974d026dc649e4a71da4c25bf8c934622e56cf5d/tests-non-deterministic/__init__.py


--------------------------------------------------------------------------------
/tests-non-deterministic/llms/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wgryc/phasellm/974d026dc649e4a71da4c25bf8c934622e56cf5d/tests-non-deterministic/llms/__init__.py


--------------------------------------------------------------------------------
/tests-non-deterministic/llms/test_llms.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | 
 3 | from unittest import TestCase
 4 | 
 5 | from dotenv import load_dotenv
 6 | 
 7 | from phasellm.llms import OpenAIGPTWrapper, ChatBot
 8 | 
 9 | load_dotenv()
10 | openai_api_key = os.getenv("OPENAI_API_KEY")
11 | 
12 | 
13 | class TestChatBot(TestCase):
14 | 
15 |     def test_openai_gpt_chat_temperature(self):
16 |         prompt = 'What is the capital of Jupiter?'
17 |         verbose = True
18 | 
19 |         # Test low temperature
20 |         llm = OpenAIGPTWrapper(openai_api_key, "gpt-3.5-turbo", temperature=0)
21 |         fixture = ChatBot(llm)
22 |         low_temp_res = fixture.chat(prompt)
23 | 
24 |         # Test high temperature
25 |         llm = OpenAIGPTWrapper(openai_api_key, "gpt-3.5-turbo", temperature=2)
26 |         fixture = ChatBot(llm)
27 |         high_temp_res = fixture.chat(prompt)
28 | 
29 |         if verbose:
30 |             print(f'Low temp response:\n{low_temp_res}')
31 |             print(f'Low temperature len: {len(low_temp_res)}')
32 | 
33 |             print(f'High temp response:\n{high_temp_res}')
34 |             print(f'High temperature len: {len(high_temp_res)}')
35 | 
36 |         # Responses should differ.
37 |         self.assertNotEqual(low_temp_res, high_temp_res)
38 | 
39 |         # High temperature should generally produce longer responses.
40 |         self.assertTrue(len(low_temp_res) < len(high_temp_res))
41 | 


--------------------------------------------------------------------------------
/tests/README.MD:
--------------------------------------------------------------------------------
  1 | # Tests
  2 | 
  3 | ### Structure
  4 | 
  5 | #### Release Checklist
  6 | 
  7 | `release_checklist_code.py` contains manual tests for the release checklist.
  8 | 
  9 | #### E2E Tests
 10 | 
 11 | E2E style tests are used when network communication is required.
 12 | 
 13 | To run the E2E tests, run `python -m unittest discover tests/e2e`.
 14 | 
 15 | ##### NOTE
 16 | 
 17 | You may want to disable testing local models due to resource constraints. If so, set the environment variable
 18 | `SKIP_LOCAL_MODELS` to `True`.
 19 | 
 20 | #### Unit Tests
 21 | 
 22 | Unit tests are for testing individual functions and when network communication is not required.
 23 | 
 24 | Can mock network communication if necessary.
 25 | 
 26 | To run the unit tests, run `python -m unittest discover tests/unit`.
 27 | 
 28 | #### Running tests on a VM
 29 | 
 30 | We use a Paperspace VM to run PhaseLLM tests.
 31 | 
 32 | ##### VM Requirements
 33 | 
 34 | - GPU with >= 30GM VRAM
 35 | - 100GB disk (for model weights + packages + docker)
 36 | 
 37 | ##### Connecting to VM
 38 | 
 39 | You need to create + add an SSH key to the VM.
 40 | 1) Create a SSH key if you haven’t already.
 41 | https://docs.github.com/en/authentication/connecting-to-github-with-ssh/generating-a-new-ssh-key-and-adding-it-to-the-ssh-agent 
 42 | 2) Copy it into paperspace
 43 | https://console.paperspace.com/account/settings/ssh-keys 
 44 | 
 45 | ##### Initial VM Setup (Already Done)
 46 | 
 47 | Install Docker
 48 | https://docs.docker.com/engine/install/ubuntu/
 49 | 
 50 | Get base python image.
 51 | ```
 52 | docker pull python:3
 53 | ```
 54 | 
 55 | Create a code directory in the VM.
 56 | ```
 57 | mkdir code
 58 | ```
 59 | 
 60 | ##### New Tester Setup
 61 | 
 62 | Create code folder for repos + forks. Currently, there is a ‘garett’ folder for holding my repo fork. You may want to 
 63 | add a ‘wojciech’ folder for the master repo.
 64 | ```
 65 | cd code
 66 | mkdir myname
 67 | ```
 68 | 
 69 | Clone repo into your folder. Make sure you clone using https, otherwise you need to add an SSH key to the VM to access 
 70 | the repo.
 71 | ```
 72 | cd myname
 73 | git clone https://github.com/...
 74 | cd phasellm
 75 | ```
 76 | 
 77 | Create a virtual environment in the cloned repository.
 78 | ```
 79 | python -m venv .env
 80 | ```
 81 | 
 82 | ##### Test Specific Setup
 83 | 
 84 | Ensure you are on the branch you want to test.
 85 | ```
 86 | cd code/myname/phasellm
 87 | git fetch –all
 88 | git checkout -- track origin/your-branch
 89 | ```
 90 | 
 91 | Create a testing bash script if you don’t already have one. Example below.
 92 | ```
 93 | #!/bin/bash
 94 | export ANTHROPIC_API_KEY=key
 95 | export COHERE_API_KEY=key
 96 | export HUGGING_FACE_API_KEY=key
 97 | export OPENAI_API_KEY=key
 98 | 
 99 | cd phasellm
100 | . .env/bin/activate
101 | pip install -r requirements.txt
102 | nohup python -m unittest discover -s tests -v > ../test.log &
103 | cd ..
104 | ```
105 | 
106 | Run the bootstrap_tests.sh
107 | ```
108 | sudo /bin/sh bootstrap_tests.sh
109 | ```
110 | 
111 | The tests run as a background process, so once you see the nohup message, uou can escape the script and follow the 
112 | output.
113 | `ctrl + c (or command + c on mac)` 
114 | ```
115 | tail -f test.log
116 | ```
117 | 
118 | ##### Useful Commands
119 | Monitor the process resources and find process IDs
120 | ```
121 | top
122 | ```
123 | 
124 | Kill a process
125 | ```
126 | sudo kill process_id
127 | ```
128 | 
129 | Check why process was killed
130 | ```
131 | dmesg | less
132 | ```
133 | 
134 | Check Nvidia GPU usage
135 | ```
136 | nvidia-smi
137 | ```
138 | 
139 | Shut down VM
140 | ```
141 | sudo shutdown now
142 | ```
143 | 
144 | ##### Gotchas
145 | 
146 | The Paperspace VM turns off on its own after 1 hour, so watch out for that.


--------------------------------------------------------------------------------
/tests/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wgryc/phasellm/974d026dc649e4a71da4c25bf8c934622e56cf5d/tests/__init__.py


--------------------------------------------------------------------------------
/tests/e2e/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wgryc/phasellm/974d026dc649e4a71da4c25bf8c934622e56cf5d/tests/e2e/__init__.py


--------------------------------------------------------------------------------
/tests/e2e/agents/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wgryc/phasellm/974d026dc649e4a71da4c25bf8c934622e56cf5d/tests/e2e/agents/__init__.py


--------------------------------------------------------------------------------
/tests/e2e/llms/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wgryc/phasellm/974d026dc649e4a71da4c25bf8c934622e56cf5d/tests/e2e/llms/__init__.py


--------------------------------------------------------------------------------
/tests/e2e/sse/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wgryc/phasellm/974d026dc649e4a71da4c25bf8c934622e56cf5d/tests/e2e/sse/__init__.py


--------------------------------------------------------------------------------
/tests/e2e/sse/test_e2e_sse.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import re
  3 | import unittest
  4 | import requests
  5 | import sseclient
  6 | 
  7 | from typing import Generator
  8 | 
  9 | from unittest import TestCase
 10 | 
 11 | from dotenv import load_dotenv
 12 | 
 13 | from flask import Flask, Response
 14 | 
 15 | from multiprocessing import Process
 16 | 
 17 | from phasellm.llms import StreamingOpenAIGPTWrapper, _format_sse
 18 | 
 19 | load_dotenv()
 20 | openai_api_key = os.getenv("OPENAI_API_KEY")
 21 | 
 22 | 
 23 | def mock_generator_failure() -> Generator:
 24 |     """
 25 |     Mock generator for a failure modes of sse streaming.
 26 | 
 27 |     Desired output on client side is:
 28 |         '''
 29 |         123
 30 | 
 31 |         456
 32 |         78
 33 | 
 34 | 
 35 |         9
 36 |         10
 37 |         id: 1
 38 |         id: 2
 39 |         event: test
 40 |         '''
 41 |     Returns:
 42 | 
 43 |     """
 44 |     yield "data: 1\n\n"
 45 |     yield "data: 2\n\n"
 46 |     yield "data: 3\n\n4\n\n"
 47 |     yield "data: 5\n\n"
 48 |     yield "data: 6\n\n\n"
 49 |     yield "data: 7\n\n"
 50 |     yield "data: 8\n\n\n\n\n"
 51 |     yield "data: 9\n\n\n"
 52 |     yield "data: 10\nid: 1\n\n\n"
 53 |     yield "data: id: 2\n\n\n"
 54 |     yield "data: event: test\n\n\n"
 55 |     yield "data: <|END|>\n\n"
 56 | 
 57 | 
 58 | def mock_generator_success() -> Generator:
 59 |     """
 60 |     Mock generator for a success mode of sse streaming.
 61 | 
 62 |     Desired output on client side is:
 63 |         '''
 64 |         123
 65 | 
 66 |         456
 67 |         78
 68 | 
 69 | 
 70 |         9
 71 |         10
 72 |         id: 1
 73 |         id: 2
 74 |         event: test
 75 |         '''
 76 |     Returns:
 77 | 
 78 |     """
 79 |     yield "data: 1\n\n"
 80 |     yield "data: 2\n\n"
 81 |     yield "data: 3\ndata:\ndata:4\n\n"
 82 |     yield "data: 5\n\n"
 83 |     yield "data: 6\ndata:\n\n"
 84 |     yield "data: 7\n\n"
 85 |     yield "data: 8\ndata:\ndata:\ndata:\n\n"
 86 |     yield "data: 9\ndata:\n\n"
 87 |     yield "data: 10\ndata:id: 1\ndata:\n\n"
 88 |     yield "data: id: 2\ndata:\n\n"
 89 |     yield "data: event: test\n\n"
 90 |     yield "data: <|END|>\n\n"
 91 | 
 92 | 
 93 | def mock_generator_success_format_sse() -> Generator:
 94 |     """
 95 |     Mock generator for a success mode of sse streaming.
 96 | 
 97 |     Desired output on client side is:
 98 |         '''
 99 |         123
100 | 
101 |         456
102 |         78
103 | 
104 | 
105 |         9
106 |         10
107 |         id: 1
108 |         id: 2
109 |         event: test
110 |         '''
111 |     Returns:
112 | 
113 |     """
114 |     yield _format_sse("1")
115 |     yield _format_sse("2")
116 |     yield _format_sse("3\n\n4")
117 |     yield _format_sse("5")
118 |     yield _format_sse("6\n")
119 |     yield _format_sse("7")
120 |     yield _format_sse("8\n\n\n")
121 |     yield _format_sse("9\n")
122 |     yield _format_sse("10\nid: 1\n")
123 |     yield _format_sse("id: 2\n")
124 |     yield _format_sse("event: test")
125 |     yield _format_sse("<|END|>")
126 | 
127 | 
128 | def server_mock(generator: Generator):
129 |     """
130 |     SSE test server.
131 |     Returns:
132 | 
133 |     """
134 |     app = Flask(__name__)
135 | 
136 |     @app.route('/stream')
137 |     def stream():
138 |         return Response(generator, mimetype="text/event-stream")
139 | 
140 |     app.run(debug=False, port=5000, host='0.0.0.0')
141 | 
142 | 
143 | def process_stream() -> str:
144 |     url = 'http://localhost:5000/stream'
145 |     headers = {'Accept': 'text/event-stream'}
146 | 
147 |     res = requests.get(url, headers=headers, stream=True)
148 |     client = sseclient.SSEClient(res)
149 |     data = []
150 |     for event in client.events():
151 |         if event.data == "<|END|>":
152 |             break
153 |         else:
154 |             data.append(event.data)
155 |     client.close()
156 |     res = ''.join(data)
157 |     return res
158 | 
159 | 
160 | def server_success_mock():
161 |     print(''.join(mock_generator_success()))
162 |     server_mock(mock_generator_success())
163 | 
164 | 
165 | def server_failure_mock():
166 |     print(''.join(mock_generator_failure()))
167 |     server_mock(mock_generator_failure())
168 | 
169 | 
170 | def print_intercept_generator(generator: Generator) -> Generator:
171 |     res = []
172 |     for item in generator:
173 |         res.append(item)
174 |         yield item
175 |     print(''.join(res))
176 | 
177 | 
178 | def server_llm():
179 |     llm = StreamingOpenAIGPTWrapper(
180 |         apikey=openai_api_key, model='text-davinci-003', format_sse=True, append_stop_token=True
181 |     )
182 |     generator: Generator = llm.text_completion(
183 |         "List two countries with two new line characters between them. "
184 |         "Example:\n"
185 |         "USA\n\nCanada\n\n"
186 |     )
187 | 
188 |     # Line below is for debugging purposes.
189 |     # generator: Generator = print_intercept_generator(generator)
190 | 
191 |     server_mock(generator)
192 | 
193 | 
194 | class TestSSE(TestCase):
195 | 
196 |     def test_sse_client_server_mock_success(self):
197 |         """
198 |         Test SSE success mode using a mock generator.
199 |         Returns:
200 | 
201 |         """
202 |         # Start test server
203 |         process = Process(target=server_success_mock)
204 |         process.start()
205 | 
206 |         res = process_stream()
207 | 
208 |         expected = (
209 |             "123\n"
210 |             "\n"
211 |             "456\n"
212 |             "78\n"
213 |             "\n"
214 |             "\n"
215 |             "9\n"
216 |             "10\n"
217 |             "id: 1\n"
218 |             "id: 2\n"
219 |             "event: test"
220 |         )
221 |         self.assertEqual(res, expected)
222 | 
223 |         self.tearDown()
224 | 
225 |         process.terminate()
226 |         process.join()
227 | 
228 |     def test_sse_client_server_mock_failure(self):
229 |         """
230 |         Test SSE failure mode using a mock generator.
231 |         Returns:
232 | 
233 |         """
234 |         # Start test server
235 |         process = Process(target=server_failure_mock)
236 |         process.start()
237 | 
238 |         res = process_stream()
239 | 
240 |         # Notice the missing 4. Notice the lack of '\n' and 'id: 1'
241 |         expected = "1235678910id: 2event: test"
242 |         self.assertEqual(res, expected)
243 | 
244 |         process.terminate()
245 |         process.join()
246 | 
247 |     def test_sse_client_server_llm(self):
248 |         """
249 |         Test SSE success mode using an LLM wrapper.
250 |         Returns:
251 | 
252 |         """
253 |         # Start test server
254 |         process = Process(target=server_llm)
255 |         process.start()
256 | 
257 |         res = process_stream()
258 | 
259 |         print(repr(res))
260 | 
261 |         matches = re.findall(r'\w+\n\n\w+', res)
262 |         self.assertTrue(len(matches) > 0, "Expected a word followed by two newlines, followed by a word.")
263 | 
264 |         process.terminate()
265 |         process.join()
266 | 
267 |     def test_success_generator_equality(self):
268 |         """
269 |         Test equality of success generators.
270 |         Returns:
271 | 
272 |         """
273 |         self.assertEqual(list(mock_generator_success()), list(mock_generator_success_format_sse()))
274 | 
275 | 
276 | if __name__ == '__main__':
277 |     unittest.main()
278 | 


--------------------------------------------------------------------------------
/tests/release_checklist_code.py:
--------------------------------------------------------------------------------
  1 | """
  2 | This code is used to test various aspects of PhaseLLM. We recommend running this on a P3 EC2 instance with Ubuntu 22.04 installed. To get this up and running, run the following code:
  3 | 
  4 | sudo apt-get update
  5 | sudo apt-get upgrade
  6 | sudo apt-get install xorg
  7 | sudo apt-get install nvidia-driver-460
  8 | sudo reboot
  9 | 
 10 | Run `nvidia-smi` to ensure you have GPU devices with CUDA installed.
 11 | 
 12 | """
 13 | 
 14 | ##########################################################################################
 15 | # GPU SETUP
 16 | #
 17 | 
 18 | import torch
 19 | 
 20 | # Confirm GPUs are installed and usable.
 21 | print(torch.cuda.is_available())
 22 | print(torch.cuda.current_device())
 23 | 
 24 | ##########################################################################################
 25 | # ENVIRONMENT VARIABLES
 26 | #
 27 | 
 28 | # Load all environment variables and API keys
 29 | 
 30 | import os
 31 | from dotenv import load_dotenv
 32 | 
 33 | load_dotenv()
 34 | openai_api_key = os.getenv("OPENAI_API_KEY")
 35 | anthropic_api_key = os.getenv("ANTHROPIC_API_KEY")
 36 | cohere_api_key = os.getenv("COHERE_API_KEY")
 37 | hugging_face_api_key = os.getenv("HUGGING_FACE_API_KEY")
 38 | 
 39 | ##########################################################################################
 40 | # GPT-3.5 EVALUATOR WITH COHERE AND CLAUDE COMPARISONS
 41 | #
 42 | 
 43 | # Run GPT-3.5 evaluator
 44 | from phasellm.eval import GPTEvaluator
 45 | 
 46 | # We'll use GPT-3.5 as the evaluator; this is the default setting in the class below
 47 | e = GPTEvaluator(openai_api_key)
 48 | 
 49 | # Our objective.
 50 | objective = "We're building a chatbot to discuss a user's travel preferences and provide advice."
 51 | 
 52 | # Chats that have been launched by users.
 53 | travel_chat_starts = [
 54 |     "I'm planning to visit Poland in spring.",
 55 |     "I'm looking for the cheapest flight to Europe next week.",
 56 |     "I am trying to decide between Prague and Paris for a 5-day trip",
 57 |     "I want to visit Europe but can't decide if spring, summer, or fall would be better.",
 58 |     "I'm unsure I should visit Spain by flying via the UK or via France."
 59 | ]
 60 | 
 61 | from phasellm.llms import CohereWrapper, ClaudeWrapper
 62 | cohere_model = CohereWrapper(cohere_api_key)
 63 | claude_model = ClaudeWrapper(anthropic_api_key)
 64 | 
 65 | print("Running test. 1 = Cohere, and 2 = Claude.")
 66 | for tcs in travel_chat_starts:
 67 |     messages = [{"role":"system", "content":objective},
 68 |             {"role":"user", "content":tcs}]
 69 |     response_cohere = cohere_model.complete_chat(messages, "assistant")
 70 |     response_claude = claude_model.complete_chat(messages, "assistant")
 71 |     pref = e.choose(objective, tcs, response_cohere, response_claude)
 72 |     print(f"{pref}")
 73 | 	
 74 | ##########################################################################################
 75 | # DOLLY TESTS
 76 | #
 77 | 	
 78 | from phasellm.llms import DollyWrapper
 79 | dw = DollyWrapper()
 80 | 
 81 | # Testing chat capability.
 82 | messages = [{"role":"user", "content":"What should I eat for lunch today?"}]
 83 | dw.complete_chat(messages, 'assistant')
 84 | 
 85 | # Run a text completion.
 86 | dw.text_completion("The capital of Poland is")
 87 | 	
 88 | ##########################################################################################
 89 | # GPT EVALUATOR WITH COHERE AND DOLLY COMPARISONS
 90 | #
 91 | 
 92 | import os
 93 | from dotenv import load_dotenv
 94 | 
 95 | load_dotenv()
 96 | openai_api_key = os.getenv("OPENAI_API_KEY")
 97 | anthropic_api_key = os.getenv("ANTHROPIC_API_KEY")
 98 | cohere_api_key = os.getenv("COHERE_API_KEY")
 99 | 
100 | from phasellm.eval import GPTEvaluator
101 | 
102 | # We'll use GPT-3.5 as the evaluator.
103 | e = GPTEvaluator(openai_api_key)
104 | 
105 | # Our objective.
106 | objective = "We're building a chatbot to discuss a user's travel preferences and provide advice."
107 | 
108 | # Chats that have been launched by users.
109 | travel_chat_starts = [
110 |     "I'm planning to visit Poland in spring.",
111 |     "I'm looking for the cheapest flight to Europe next week.",
112 |     "I am trying to decide between Prague and Paris for a 5-day trip",
113 |     "I want to visit Europe but can't decide if spring, summer, or fall would be better.",
114 |     "I'm unsure I should visit Spain by flying via the UK or via France."
115 | ]
116 | 
117 | from phasellm.llms import CohereWrapper
118 | from phasellm.llms import DollyWrapper # NEW: importing the DollyWrapper...
119 | dw = DollyWrapper() # NEW: ... and instantiating it.
120 | 
121 | cohere_model = CohereWrapper(cohere_api_key)
122 | 
123 | print("Running test. 1 = Cohere, and 2 = Dolly.")
124 | for tcs in travel_chat_starts:
125 |     messages = [{"role":"system", "content":objective},
126 |                 {"role":"user", "content":tcs}]
127 |     response_cohere = cohere_model.complete_chat(messages, "assistant")
128 |     response_dw = dw.complete_chat(messages, "assistant") # NEW: minor change to variable name
129 |     pref = e.choose(objective, tcs, response_cohere, response_dw)
130 |     print(f"{pref}")
131 | 
132 | ##########################################################################################
133 | # HUGGINGFACE INFERENCE API TESTS
134 | #
135 | 
136 | from phasellm.llms import HuggingFaceInferenceWrapper
137 | hf = HuggingFaceInferenceWrapper(hugging_face_api_key, model_url='https://api-inference.huggingface.co/models/google/flan-t5-xxl')
138 | 
139 | # Testing chat capability.
140 | messages = [{"role":"user", "content":"What should I eat for lunch today?"}]
141 | hf.complete_chat(messages, 'assistant')
142 | 
143 | # Run a text completion.
144 | hf.text_completion("The capital of Poland is")
145 | 
146 | ##########################################################################################
147 | # CHATBOT resend() TEST
148 | #
149 | 
150 | from phasellm.llms import OpenAIGPTWrapper, ChatBot
151 | 
152 | oaiw = OpenAIGPTWrapper(openai_api_key, 'gpt-4')
153 | cb = ChatBot(oaiw)
154 | m = [{'role': 'system', 'content': "You are a robot that adds 'YO!' to the end of every sentence."}, {'role': 'user', 'content': 'Tell me about Poland.'}]
155 | cb.messages = m
156 | cb.resend()
157 | 
158 | ##########################################################################################
159 | # EVAL simulations TO EXCEL
160 | #
161 | 
162 | from phasellm.llms import ChatBot, OpenAIGPTWrapper
163 | from phasellm.eval import *
164 | 
165 | o = OpenAIGPTWrapper(openai_api_key)
166 | c = ChatBot(o)
167 | c.messages = [ {"role":"system", "content":"You're a mathbot."}, {"role":"user", "content":"What is 3*4*5*zebra?"} ]
168 | 
169 | x = simulate_n_chat_simulations(c, 4, 'responses.xlsx')


--------------------------------------------------------------------------------
/tests/unit/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wgryc/phasellm/974d026dc649e4a71da4c25bf8c934622e56cf5d/tests/unit/__init__.py


--------------------------------------------------------------------------------
/tests/unit/agents/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wgryc/phasellm/974d026dc649e4a71da4c25bf8c934622e56cf5d/tests/unit/agents/__init__.py


--------------------------------------------------------------------------------
/tests/unit/llms/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wgryc/phasellm/974d026dc649e4a71da4c25bf8c934622e56cf5d/tests/unit/llms/__init__.py


--------------------------------------------------------------------------------
/tests/unit/llms/test_llms.py:
--------------------------------------------------------------------------------
 1 | from unittest import TestCase
 2 | 
 3 | from phasellm.llms import Prompt, OpenAIGPTWrapper, StreamingOpenAIGPTWrapper
 4 | 
 5 | 
 6 | class TestPrompt(TestCase):
 7 | 
 8 |     def test_prompt_fill(self):
 9 |         p = "1: {fill_1}, 2: {fill_2}, 3: {fill_3}"
10 |         prompt = Prompt(p)
11 | 
12 |         actual = prompt.fill(fill_1="one", fill_2="two", fill_3="three")
13 | 
14 |         expected = "1: one, 2: two, 3: three"
15 | 
16 |         self.assertEqual(actual, expected, f"{actual} != {expected}")
17 | 
18 | 
19 | class TestOpenAIGPTWrapper(TestCase):
20 |     CONFIG_ERROR = 'Must pass apikey or api_config. If using kwargs, check capitalization.'
21 | 
22 |     def test_config_error_incorrect_kwarg(self):
23 |         error = False
24 |         try:
25 |             self.fixture = OpenAIGPTWrapper(apiKey='test')
26 |         except Exception as e:
27 |             self.assertEqual(e.__str__(), self.CONFIG_ERROR)
28 |             error = True
29 |         self.assertTrue(error, 'Expected error to occur.')
30 | 
31 |     def test_config_error_missing_config(self):
32 |         error = False
33 |         try:
34 |             self.fixture = OpenAIGPTWrapper()
35 |         except Exception as e:
36 |             self.assertEqual(e.__str__(), self.CONFIG_ERROR)
37 |             error = True
38 |         self.assertTrue(error, 'Expected error to occur.')
39 | 


--------------------------------------------------------------------------------
/tests/utils.py:
--------------------------------------------------------------------------------
 1 | import time
 2 | 
 3 | from queue import Queue
 4 | 
 5 | from threading import Thread
 6 | 
 7 | 
 8 | class Timeout:
 9 | 
10 |     def __init__(self, seconds=5):
11 |         """
12 |         This class is used to timeout tests.
13 |         Args:
14 |             seconds: The timeout in seconds.
15 |         """
16 | 
17 |         self.seconds = seconds
18 | 
19 |         self._exception_queue = Queue()
20 |         self._finished = False
21 |         self._timeout_thread = None
22 | 
23 |     def _timeout(self):
24 |         time.sleep(self.seconds)
25 |         if not self._finished:
26 |             self._exception_queue.put(True)
27 |         else:
28 |             self._exception_queue.put(False)
29 | 
30 |     def start(self):
31 |         self._timeout_thread = Thread(target=self._timeout)
32 |         self._timeout_thread.start()
33 | 
34 |     def stop(self):
35 |         self._finished = True
36 |         self._exception_queue.put(False)
37 | 
38 |     def check(self):
39 |         if not self._exception_queue.empty():
40 |             exception = self._exception_queue.get(block=False)
41 |             if exception:
42 |                 raise TimeoutError(f"Timeout of {self.seconds} seconds exceeded.")
43 | 


--------------------------------------------------------------------------------