46 | Say Goodbye to
47 | Hours
49 | of Research
50 |
51 |
52 | Say Hello to GPT Researcher, your AI mate for rapid insights and comprehensive research. GPT Researcher
53 | takes care of everything from accurate source gathering to organization of research results - all in one
54 | platform designed to make your research process a breeze.
55 |
103 |
104 |
105 |
109 |
110 |
111 |
112 |
132 |
133 |
134 |
135 |
--------------------------------------------------------------------------------
/actions/web_scrape.py:
--------------------------------------------------------------------------------
1 | """Selenium web scraping module."""
2 | from __future__ import annotations
3 |
4 | import logging
5 | import asyncio
6 | from pathlib import Path
7 | from sys import platform
8 |
9 | from bs4 import BeautifulSoup
10 | from webdriver_manager.chrome import ChromeDriverManager
11 | from webdriver_manager.firefox import GeckoDriverManager
12 | from selenium import webdriver
13 | from selenium.webdriver.chrome.service import Service
14 | from selenium.webdriver.chrome.options import Options as ChromeOptions
15 | from selenium.webdriver.common.by import By
16 | from selenium.webdriver.firefox.options import Options as FirefoxOptions
17 | from selenium.webdriver.remote.webdriver import WebDriver
18 | from selenium.webdriver.safari.options import Options as SafariOptions
19 | from selenium.webdriver.support import expected_conditions as EC
20 | from selenium.webdriver.support.wait import WebDriverWait
21 | from fastapi import WebSocket
22 |
23 | import processing.text as summary
24 |
25 | from config import Config
26 | from processing.html import extract_hyperlinks, format_hyperlinks
27 |
28 | from concurrent.futures import ThreadPoolExecutor
29 |
30 | executor = ThreadPoolExecutor()
31 |
32 | FILE_DIR = Path(__file__).parent.parent
33 | CFG = Config()
34 |
35 |
36 | async def async_browse(url: str, question: str, websocket: WebSocket) -> str:
37 | """Browse a website and return the answer and links to the user
38 |
39 | Args:
40 | url (str): The url of the website to browse
41 | question (str): The question asked by the user
42 | websocket (WebSocketManager): The websocket manager
43 |
44 | Returns:
45 | str: The answer and links to the user
46 | """
47 | loop = asyncio.get_event_loop()
48 | executor = ThreadPoolExecutor(max_workers=8)
49 |
50 | print(f"Scraping url {url} with question {question}")
51 | await websocket.send_json(
52 | {"type": "logs", "output": f"🔎 Browsing the {url} for relevant about: {question}..."})
53 |
54 | try:
55 | driver, text = await loop.run_in_executor(executor, scrape_text_with_selenium, url)
56 | await loop.run_in_executor(executor, add_header, driver)
57 | summary_text = await loop.run_in_executor(executor, summary.summarize_text, url, text, question, driver)
58 |
59 | await websocket.send_json(
60 | {"type": "logs", "output": f"📝 Information gathered from url {url}: {summary_text}"})
61 |
62 | return f"Information gathered from url {url}: {summary_text}"
63 | except Exception as e:
64 | print(f"An error occurred while processing the url {url}: {e}")
65 | return f"Error processing the url {url}: {e}"
66 |
67 |
68 |
69 | def browse_website(url: str, question: str) -> tuple[str, WebDriver]:
70 | """Browse a website and return the answer and links to the user
71 |
72 | Args:
73 | url (str): The url of the website to browse
74 | question (str): The question asked by the user
75 |
76 | Returns:
77 | Tuple[str, WebDriver]: The answer and links to the user and the webdriver
78 | """
79 |
80 | if not url:
81 | return "A URL was not specified, cancelling request to browse website.", None
82 |
83 | driver, text = scrape_text_with_selenium(url)
84 | add_header(driver)
85 | summary_text = summary.summarize_text(url, text, question, driver)
86 |
87 | links = scrape_links_with_selenium(driver, url)
88 |
89 | # Limit links to 5
90 | if len(links) > 5:
91 | links = links[:5]
92 |
93 | # write_to_file('research-{0}.txt'.format(url), summary_text + "\nSource Links: {0}\n\n".format(links))
94 |
95 | close_browser(driver)
96 | return f"Answer gathered from website: {summary_text} \n \n Links: {links}", driver
97 |
98 |
99 | def scrape_text_with_selenium(url: str) -> tuple[WebDriver, str]:
100 | """Scrape text from a website using selenium
101 |
102 | Args:
103 | url (str): The url of the website to scrape
104 |
105 | Returns:
106 | Tuple[WebDriver, str]: The webdriver and the text scraped from the website
107 | """
108 | logging.getLogger("selenium").setLevel(logging.CRITICAL)
109 |
110 | options = ChromeOptions()
111 | options.add_argument("--headless")
112 | options.add_argument("--no-sandbox")
113 | options.add_argument("--disable-dev-shm-usage") # Overcomes limited resource problems
114 | options.add_argument(f'user-agent={CFG.user_agent}')
115 | options.add_experimental_option("prefs", {"download_restrictions": 3})
116 |
117 | driver = webdriver.Chrome(options=options)
118 | driver.get(url)
119 |
120 | WebDriverWait(driver, 10).until(
121 | EC.presence_of_element_located((By.TAG_NAME, "body"))
122 | )
123 |
124 | # Get the HTML content directly from the browser's DOM
125 | page_source = driver.execute_script("return document.body.outerHTML;")
126 | soup = BeautifulSoup(page_source, "html.parser")
127 |
128 | for script in soup(["script", "style"]):
129 | script.extract()
130 |
131 | # text = soup.get_text()
132 | text = get_text(soup)
133 |
134 | lines = (line.strip() for line in text.splitlines())
135 | chunks = (phrase.strip() for line in lines for phrase in line.split(" "))
136 | text = "\n".join(chunk for chunk in chunks if chunk)
137 | return driver, text
138 |
139 |
140 | def get_text(soup):
141 | """Get the text from the soup
142 |
143 | Args:
144 | soup (BeautifulSoup): The soup to get the text from
145 |
146 | Returns:
147 | str: The text from the soup
148 | """
149 | text = ""
150 | tags = ['h1', 'h2', 'h3', 'h4', 'h5', 'p']
151 | for element in soup.find_all(tags): # Find all the
elements
152 | text += element.text + "\n\n"
153 | return text
154 |
155 |
156 | def scrape_links_with_selenium(driver: WebDriver, url: str) -> list[str]:
157 | """Scrape links from a website using selenium
158 |
159 | Args:
160 | driver (WebDriver): The webdriver to use to scrape the links
161 |
162 | Returns:
163 | List[str]: The links scraped from the website
164 | """
165 | page_source = driver.page_source
166 | soup = BeautifulSoup(page_source, "html.parser")
167 |
168 | for script in soup(["script", "style"]):
169 | script.extract()
170 |
171 | hyperlinks = extract_hyperlinks(soup, url)
172 |
173 | return format_hyperlinks(hyperlinks)
174 |
175 |
176 | def close_browser(driver: WebDriver) -> None:
177 | """Close the browser
178 |
179 | Args:
180 | driver (WebDriver): The webdriver to close
181 |
182 | Returns:
183 | None
184 | """
185 | driver.quit()
186 |
187 |
188 | def add_header(driver: WebDriver) -> None:
189 | """Add a header to the website
190 |
191 | Args:
192 | driver (WebDriver): The webdriver to use to add the header
193 |
194 | Returns:
195 | None
196 | """
197 | driver.execute_script(open(f"{FILE_DIR}/js/overlay.js", "r").read())
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # 🔎 GPT Researcher
2 | [](https://tavily.com)
3 | [](https://discord.com/invite/rqw8dnM8)
4 | [](https://github.com/assafelovic/gpt-researcher)
5 | [](https://twitter.com/assaf_elovic)
6 |
7 | **GPT Researcher is an autonomous agent designed for comprehensive online research on a variety of tasks.**
8 |
9 | The agent can produce detailed, factual and unbiased research reports, with customization options for focusing on relevant resources, outlines, and lessons. Inspired by [AutoGPT](https://github.com/Significant-Gravitas/Auto-GPT) and the recent [Plan-and-Solve](https://arxiv.org/abs/2305.04091) paper, GPT Researcher addresses issues of speed and determinism, offering a more stable performance and increased speed through parallelized agent work, as opposed to synchronous operations.
10 |
11 | **Our mission is to empower individuals and organizations with accurate, unbiased, and factual information by leveraging the power of AI.**
12 |
13 | ## Why GPT Researcher?
14 |
15 | - To form objective conclusions for manual research tasks can take time, sometimes weeks to find the right resources and information.
16 | - Current LLMs are trained on past and outdated information, with heavy risks of hallucinations, making them almost irrelevant for research tasks.
17 | - Solutions that enable web search (such as ChatGPT + Web Plugin), only consider limited resources that in some cases result in superficial conclusions or biased answers.
18 | - Using only a selection of resources can create bias in determining the right conclusions for research questions or tasks.
19 |
20 | ## Architecture
21 | The main idea is to run "planner" and "execution" agents, whereas the planner generates questions to research, and the execution agents seek the most related information based on each generated research question. Finally, the planner filters and aggregates all related information and creates a research report. The agents leverage both gpt3.5-turbo-16k and gpt-4 to complete a research task.
22 |
23 |
24 |
25 |
26 |
27 |
28 | More specifcally:
29 | * Generate a set of research questions that together form an objective opinion on any given task.
30 | * For each research question, trigger a crawler agent that scrapes online resources for information relevant to the given task.
31 | * For each scraped resources, summarize based on relevant information and keep track of its sources.
32 | * Finally, filter and aggregate all summarized sources and generate a final research report.
33 |
34 | ## Demo
35 | https://github.com/assafelovic/gpt-researcher/assets/13554167/a00c89a6-a295-4dd0-b58d-098a31c40fda
36 |
37 | ## Features
38 | - 📝 Generate research, outlines, resources and lessons reports
39 | - 🌐 Aggregates over 20 web sources per research to form objective and factual conclusions
40 | - 🖥️ Includes an easy-to-use web interface (HTML/CSS/JS)
41 | - 🔍 Scrapes web sources with javascript support
42 | - 📂 Keeps track and context of visited and used web sources
43 | - 📄 Export research reports to PDF and more...
44 |
45 | ## Quickstart
46 | > **Step 0** - Install Python 3.11 or later. [See here](https://www.tutorialsteacher.com/python/install-python) for a step-by-step guide.
47 |
48 |
49 |
50 | > **Step 1** - Download the project
51 |
52 | ```bash
53 | $ git clone https://github.com/assafelovic/gpt-researcher.git
54 | $ cd gpt-researcher
55 | ```
56 |
57 |
58 |
59 | > **Step 2** - Install dependencies
60 | ```bash
61 | $ pip install -r requirements.txt
62 | ```
63 |
64 |
65 | > **Step 3** - Create .env file with your OpenAI Key or simply export it
66 |
67 | ```bash
68 | $ export OPENAI_API_KEY={Your API Key here}
69 | ```
70 |
71 |
72 | > **Step 4** - Run the agent with FastAPI
73 |
74 | ```bash
75 | $ uvicorn main:app --reload
76 | ```
77 |
78 |
79 | > **Step 5** - Go to http://localhost:8000 on any browser and enjoy researching!
80 |
81 | - **update:** if you are having issues with weasyprint, please visit their website and follow the installation instructions: https://doc.courtbouillon.org/weasyprint/stable/first_steps.html
82 |
83 | ## Try it with Docker
84 |
85 | > **Step 1** - Install Docker
86 |
87 | Follow instructions at https://docs.docker.com/engine/install/
88 |
89 | > **Step 2** - Create .env file with your OpenAI Key or simply export it
90 |
91 | ```bash
92 | $ export OPENAI_API_KEY={Your API Key here}
93 | ```
94 |
95 | > **Step 3** - Run the application
96 |
97 | ```bash
98 | $ docker-compose up
99 | ```
100 |
101 | > **Step 4** - Go to http://localhost:8000 on any browser and enjoy researching!
102 |
103 | - **update:** if you are having issues with weasyprint, please visit their website and follow the installation instructions: https://doc.courtbouillon.org/weasyprint/stable/first_steps.html
104 |
105 | ## 🛡 Disclaimer
106 |
107 | This project, GPT Researcher, is an experimental application and is provided "as-is" without any warranty, express or implied. We are sharing codes for academic purposes under the MIT education license. Nothing herein is academic advice, and NOT a recommendation to use in academic or research papers.
108 |
109 | Our view on unbiased research claims:
110 | 1. The whole point of our scraping system is to reduce incorrect fact. How? The more sites we scrape the less chances of incorrect data. We are scraping 20 per research, the chances that they are all wrong is extremely low.
111 | 2. We do not aim to eliminate biases; we aim to reduce it as much as possible. **We are here as a community to figure out the most effective human/llm interactions.**
112 | 3. In research, people also tend towards biases as most have already opinions on the topics they research about. This tool scrapes many opinions and will evenly explain diverse views that a biased person would never have read.
113 |
114 | **Please note that the use of the GPT-4 language model can be expensive due to its token usage.** By utilizing this project, you acknowledge that you are responsible for monitoring and managing your own token usage and the associated costs. It is highly recommended to check your OpenAI API usage regularly and set up any necessary limits or alerts to prevent unexpected charges.
115 |
116 | ## 🔧 Troubleshooting
117 | We're constantly working to provide a more stable version. In the meantime, see here for known issues:
118 |
119 | **cannot load library 'gobject-2.0-0'**
120 |
121 | The issue relates to the library WeasyPrint (which is used to generate PDFs from the research report). Please follow this guide to resolve it: https://doc.courtbouillon.org/weasyprint/stable/first_steps.html
122 |
123 | **Error processing the url**
124 |
125 | We're using [Selenium](https://www.selenium.dev) for site scraping. Some sites fail to be scraped. In these cases, restart and try running again.
126 |
127 |
128 |
129 |
--------------------------------------------------------------------------------
/agent/prompts.py:
--------------------------------------------------------------------------------
1 | def generate_agent_role_prompt(agent):
2 | """ Generates the agent role prompt.
3 | Args: agent (str): The type of the agent.
4 | Returns: str: The agent role prompt.
5 | """
6 | prompts = {
7 | "Finance Agent": "You are a seasoned finance analyst AI assistant. Your primary goal is to compose comprehensive, astute, impartial, and methodically arranged financial reports based on provided data and trends.",
8 | "Travel Agent": "You are a world-travelled AI tour guide assistant. Your main purpose is to draft engaging, insightful, unbiased, and well-structured travel reports on given locations, including history, attractions, and cultural insights.",
9 | "Academic Research Agent": "You are an AI academic research assistant. Your primary responsibility is to create thorough, academically rigorous, unbiased, and systematically organized reports on a given research topic, following the standards of scholarly work.",
10 | "Business Analyst": "You are an experienced AI business analyst assistant. Your main objective is to produce comprehensive, insightful, impartial, and systematically structured business reports based on provided business data, market trends, and strategic analysis.",
11 | "Computer Security Analyst Agent": "You are an AI specializing in computer security analysis. Your principal duty is to generate comprehensive, meticulously detailed, impartial, and systematically structured reports on computer security topics. This includes Exploits, Techniques, Threat Actors, and Advanced Persistent Threat (APT) Groups. All produced reports should adhere to the highest standards of scholarly work and provide in-depth insights into the complexities of computer security.",
12 | "Default Agent": "You are an AI critical thinker research assistant. Your sole purpose is to write well written, critically acclaimed, objective and structured reports on given text."
13 | }
14 |
15 | return prompts.get(agent, "No such agent")
16 |
17 |
18 | def generate_report_prompt(question, research_summary):
19 | """ Generates the report prompt for the given question and research summary.
20 | Args: question (str): The question to generate the report prompt for
21 | research_summary (str): The research summary to generate the report prompt for
22 | Returns: str: The report prompt for the given question and research summary
23 | """
24 |
25 | return f'"""{research_summary}""" Using the above information, answer the following'\
26 | f' question or topic: "{question}" in a detailed report --'\
27 | " The report should focus on the answer to the question, should be well structured, informative," \
28 | " in depth, with facts and numbers if available, a minimum of 1,200 words and with markdown syntax and apa format. "\
29 | "Write all source urls at the end of the report in apa format"
30 |
31 | def generate_search_queries_prompt(question):
32 | """ Generates the search queries prompt for the given question.
33 | Args: question (str): The question to generate the search queries prompt for
34 | Returns: str: The search queries prompt for the given question
35 | """
36 |
37 | return f'Write 4 google search queries to search online that form an objective opinion from the following: "{question}"'\
38 | f'You must respond with a list of strings in the following format: ["query 1", "query 2", "query 3", "query 4"]'
39 |
40 |
41 | def generate_resource_report_prompt(question, research_summary):
42 | """Generates the resource report prompt for the given question and research summary.
43 |
44 | Args:
45 | question (str): The question to generate the resource report prompt for.
46 | research_summary (str): The research summary to generate the resource report prompt for.
47 |
48 | Returns:
49 | str: The resource report prompt for the given question and research summary.
50 | """
51 | return f'"""{research_summary}""" Based on the above information, generate a bibliography recommendation report for the following' \
52 | f' question or topic: "{question}". The report should provide a detailed analysis of each recommended resource,' \
53 | ' explaining how each source can contribute to finding answers to the research question.' \
54 | ' Focus on the relevance, reliability, and significance of each source.' \
55 | ' Ensure that the report is well-structured, informative, in-depth, and follows Markdown syntax.' \
56 | ' Include relevant facts, figures, and numbers whenever available.' \
57 | ' The report should have a minimum length of 1,200 words.'
58 |
59 |
60 | def generate_outline_report_prompt(question, research_summary):
61 | """ Generates the outline report prompt for the given question and research summary.
62 | Args: question (str): The question to generate the outline report prompt for
63 | research_summary (str): The research summary to generate the outline report prompt for
64 | Returns: str: The outline report prompt for the given question and research summary
65 | """
66 |
67 | return f'"""{research_summary}""" Using the above information, generate an outline for a research report in Markdown syntax'\
68 | f' for the following question or topic: "{question}". The outline should provide a well-structured framework'\
69 | ' for the research report, including the main sections, subsections, and key points to be covered.' \
70 | ' The research report should be detailed, informative, in-depth, and a minimum of 1,200 words.' \
71 | ' Use appropriate Markdown syntax to format the outline and ensure readability.'
72 |
73 | def generate_concepts_prompt(question, research_summary):
74 | """ Generates the concepts prompt for the given question.
75 | Args: question (str): The question to generate the concepts prompt for
76 | research_summary (str): The research summary to generate the concepts prompt for
77 | Returns: str: The concepts prompt for the given question
78 | """
79 |
80 | return f'"""{research_summary}""" Using the above information, generate a list of 5 main concepts to learn for a research report'\
81 | f' on the following question or topic: "{question}". The outline should provide a well-structured framework'\
82 | 'You must respond with a list of strings in the following format: ["concepts 1", "concepts 2", "concepts 3", "concepts 4, concepts 5"]'
83 |
84 |
85 | def generate_lesson_prompt(concept):
86 | """
87 | Generates the lesson prompt for the given question.
88 | Args:
89 | concept (str): The concept to generate the lesson prompt for.
90 | Returns:
91 | str: The lesson prompt for the given concept.
92 | """
93 |
94 | prompt = f'generate a comprehensive lesson about {concept} in Markdown syntax. This should include the definition'\
95 | f'of {concept}, its historical background and development, its applications or uses in different'\
96 | f'fields, and notable events or facts related to {concept}.'
97 |
98 | return prompt
99 |
100 | def get_report_by_type(report_type):
101 | report_type_mapping = {
102 | 'research_report': generate_report_prompt,
103 | 'resource_report': generate_resource_report_prompt,
104 | 'outline_report': generate_outline_report_prompt
105 | }
106 | return report_type_mapping[report_type]
107 |
108 |
--------------------------------------------------------------------------------
/agent/research_agent.py:
--------------------------------------------------------------------------------
1 | # Description: Research assistant class that handles the research process for a given question.
2 |
3 | # libraries
4 | import asyncio
5 | import json
6 | from actions.web_search import web_search
7 | from actions.web_scrape import async_browse
8 | from processing.text import \
9 | write_to_file, \
10 | create_message, \
11 | create_chat_completion, \
12 | read_txt_files, \
13 | write_md_to_pdf
14 | from config import Config
15 | from agent import prompts
16 | import os
17 | import string
18 |
19 |
20 | CFG = Config()
21 |
22 |
23 | class ResearchAgent:
24 | def __init__(self, question, agent, agent_role_prompt, websocket):
25 | """ Initializes the research assistant with the given question.
26 | Args: question (str): The question to research
27 | Returns: None
28 | """
29 |
30 | self.question = question
31 | self.agent = agent
32 | self.agent_role_prompt = agent_role_prompt
33 | self.visited_urls = set()
34 | self.research_summary = ""
35 | self.directory_name = ''.join(c for c in question if c.isascii() and c not in string.punctuation)[:100]
36 | self.dir_path = os.path.dirname(f"./outputs/{self.directory_name}/")
37 | self.websocket = websocket
38 |
39 | async def summarize(self, text, topic):
40 | """ Summarizes the given text for the given topic.
41 | Args: text (str): The text to summarize
42 | topic (str): The topic to summarize the text for
43 | Returns: str: The summarized text
44 | """
45 |
46 | messages = [create_message(text, topic)]
47 | await self.websocket.send_json({"type": "logs", "output": f"📝 Summarizing text for query: {text}"})
48 |
49 | return create_chat_completion(
50 | model=CFG.fast_llm_model,
51 | messages=messages,
52 | )
53 |
54 | async def get_new_urls(self, url_set_input):
55 | """ Gets the new urls from the given url set.
56 | Args: url_set_input (set[str]): The url set to get the new urls from
57 | Returns: list[str]: The new urls from the given url set
58 | """
59 |
60 | new_urls = []
61 | for url in url_set_input:
62 | if url not in self.visited_urls:
63 | await self.websocket.send_json({"type": "logs", "output": f"✅ Adding source url to research: {url}\n"})
64 | self.visited_urls.add(url)
65 | new_urls.append(url)
66 |
67 | return new_urls
68 |
69 | async def call_agent(self, action, stream=False, websocket=None):
70 | messages = [{
71 | "role": "system",
72 | "content": self.agent_role_prompt if self.agent_role_prompt else prompts.generate_agent_role_prompt(self.agent)
73 | }, {
74 | "role": "user",
75 | "content": action,
76 | }]
77 | answer = create_chat_completion(
78 | model=CFG.smart_llm_model,
79 | messages=messages,
80 | stream=stream,
81 | websocket=websocket,
82 | )
83 | return answer
84 |
85 | async def create_search_queries(self):
86 | """ Creates the search queries for the given question.
87 | Args: None
88 | Returns: list[str]: The search queries for the given question
89 | """
90 | result = await self.call_agent(prompts.generate_search_queries_prompt(self.question))
91 | print(result)
92 | await self.websocket.send_json({"type": "logs", "output": f"🧠 I will conduct my research based on the following queries: {result}..."})
93 | return json.loads(result)
94 |
95 | async def async_search(self, query):
96 | """ Runs the async search for the given query.
97 | Args: query (str): The query to run the async search for
98 | Returns: list[str]: The async search for the given query
99 | """
100 | search_results = json.loads(web_search(query))
101 | new_search_urls = self.get_new_urls([url.get("href") for url in search_results])
102 |
103 | await self.websocket.send_json(
104 | {"type": "logs", "output": f"🌐 Browsing the following sites for relevant information: {new_search_urls}..."})
105 |
106 | # Create a list to hold the coroutine objects
107 | tasks = [async_browse(url, query, self.websocket) for url in await new_search_urls]
108 |
109 | # Gather the results as they become available
110 | responses = await asyncio.gather(*tasks, return_exceptions=True)
111 |
112 | return responses
113 |
114 | async def run_search_summary(self, query):
115 | """ Runs the search summary for the given query.
116 | Args: query (str): The query to run the search summary for
117 | Returns: str: The search summary for the given query
118 | """
119 |
120 | await self.websocket.send_json({"type": "logs", "output": f"🔎 Running research for '{query}'..."})
121 |
122 | responses = await self.async_search(query)
123 |
124 | result = "\n".join(responses)
125 | os.makedirs(os.path.dirname(f"./outputs/{self.directory_name}/research-{query}.txt"), exist_ok=True)
126 | write_to_file(f"./outputs/{self.directory_name}/research-{query}.txt", result)
127 | return result
128 |
129 | async def conduct_research(self):
130 | """ Conducts the research for the given question.
131 | Args: None
132 | Returns: str: The research for the given question
133 | """
134 | try:
135 | #self.research_summary = read_txt_files(self.dir_path) if os.path.isdir(self.dir_path) else ""
136 |
137 | #if not self.research_summary:
138 | search_queries = await self.create_search_queries()
139 | for query in search_queries:
140 | research_result = await self.run_search_summary(query)
141 | self.research_summary += f"{research_result}\n\n"
142 |
143 | await self.websocket.send_json(
144 | {"type": "logs", "output": f"Total research words: {len(self.research_summary.split(' '))}"})
145 |
146 | return self.research_summary, None
147 | except Exception as e:
148 | return None, e
149 |
150 |
151 | async def create_concepts(self):
152 | """ Creates the concepts for the given question.
153 | Args: None
154 | Returns: list[str]: The concepts for the given question
155 | """
156 | result = self.call_agent(prompts.generate_concepts_prompt(self.question, self.research_summary))
157 |
158 | await self.websocket.send_json({"type": "logs", "output": f"I will research based on the following concepts: {result}\n"})
159 | return json.loads(result)
160 |
161 | async def write_report(self, report_type, websocket):
162 | """ Writes the report for the given question.
163 | Args: None
164 | Returns: str: The report for the given question
165 | """
166 | report_type_func = prompts.get_report_by_type(report_type)
167 | await websocket.send_json(
168 | {"type": "logs", "output": f"✍️ Writing {report_type} for research task: {self.question}..."})
169 | answer = await self.call_agent(report_type_func(self.question, self.research_summary), stream=True,
170 | websocket=websocket)
171 |
172 | encoded_path, path = await write_md_to_pdf(report_type, self.directory_name, await answer)
173 |
174 | return answer, encoded_path, path
175 |
176 | async def write_lessons(self):
177 | """ Writes lessons on essential concepts of the research.
178 | Args: None
179 | Returns: None
180 | """
181 | concepts = await self.create_concepts()
182 | for concept in concepts:
183 | answer = await self.call_agent(prompts.generate_lesson_prompt(concept), stream=True)
184 | write_md_to_pdf("Lesson", self.directory_name, answer)
185 |
--------------------------------------------------------------------------------
/agent/llm_utils.py:
--------------------------------------------------------------------------------
1 | from __future__ import annotations
2 |
3 | import json
4 |
5 | from fastapi import WebSocket
6 | import time
7 |
8 | import openai
9 | from colorama import Fore, Style
10 | from openai.error import APIError, RateLimitError
11 |
12 | from config import Config
13 |
14 | CFG = Config()
15 |
16 | #openai.api_key = CFG.openai_api_key
17 |
18 | from typing import Optional
19 | import logging
20 |
21 |
22 | def create_chat_completion(
23 | messages: list, # type: ignore
24 | model: Optional[str] = None,
25 | temperature: float = CFG.temperature,
26 | max_tokens: Optional[int] = None,
27 | stream: Optional[bool] = False,
28 | websocket: WebSocket | None = None,
29 | ) -> str:
30 | """Create a chat completion using the OpenAI API
31 | Args:
32 | messages (list[dict[str, str]]): The messages to send to the chat completion
33 | model (str, optional): The model to use. Defaults to None.
34 | temperature (float, optional): The temperature to use. Defaults to 0.9.
35 | max_tokens (int, optional): The max tokens to use. Defaults to None.
36 | stream (bool, optional): Whether to stream the response. Defaults to False.
37 | Returns:
38 | str: The response from the chat completion
39 | """
40 |
41 | # validate input
42 | if model is None:
43 | raise ValueError("Model cannot be None")
44 | if max_tokens is not None and max_tokens > 8001:
45 | raise ValueError(f"Max tokens cannot be more than 8001, but got {max_tokens}")
46 | if stream and websocket is None:
47 | raise ValueError("Websocket cannot be None when stream is True")
48 |
49 | # create response
50 | for attempt in range(10): # maximum of 10 attempts
51 | try:
52 | response = send_chat_completion_request(
53 | messages, model, temperature, max_tokens, stream, websocket
54 | )
55 | return response
56 | except RateLimitError:
57 | logging.warning("Rate limit reached, backing off...")
58 | time.sleep(2 ** (attempt + 2)) # exponential backoff
59 | except APIError as e:
60 | if e.http_status != 502 or attempt == 9: # if not Bad Gateway error or final attempt
61 | raise
62 | logging.error("API Error: Bad gateway, backing off...")
63 | time.sleep(2 ** (attempt + 2)) # exponential backoff
64 |
65 | logging.error("Failed to get response after 10 attempts")
66 | raise RuntimeError("Failed to get response from OpenAI API")
67 |
68 |
69 | def send_chat_completion_request(
70 | messages, model, temperature, max_tokens, stream, websocket
71 | ):
72 | if not stream:
73 | result = openai.ChatCompletion.create(
74 | model=model,
75 | messages=messages,
76 | temperature=temperature,
77 | max_tokens=max_tokens,
78 | )
79 | return result.choices[0].message["content"]
80 | else:
81 | return stream_response(model, messages, temperature, max_tokens, websocket)
82 |
83 |
84 | async def stream_response(model, messages, temperature, max_tokens, websocket):
85 | paragraph = ""
86 | response = ""
87 | print(f"streaming response...")
88 |
89 | for chunk in openai.ChatCompletion.create(
90 | model=model,
91 | messages=messages,
92 | temperature=temperature,
93 | max_tokens=max_tokens,
94 | stream=True,
95 | ):
96 | content = chunk["choices"][0].get("delta", {}).get("content")
97 | if content is not None:
98 | response += content
99 | paragraph += content
100 | if "\n" in paragraph:
101 | await websocket.send_json({"type": "report", "output": paragraph})
102 | paragraph = ""
103 | print(f"streaming response complete")
104 | return response
105 |
106 |
107 | def choose_agent(task: str) -> str:
108 | """Determines what agent should be used
109 | Args:
110 | task (str): The research question the user asked
111 | Returns:
112 | agent - The agent that will be used
113 | agent_role_prompt (str): The prompt for the agent
114 | """
115 | try:
116 | configuration = choose_agent_configuration()
117 |
118 | response = openai.ChatCompletion.create(
119 | model=CFG.smart_llm_model,
120 | messages=[
121 | {"role": "user", "content": f"{task}"}],
122 | functions=configuration,
123 | temperature=0,
124 | )
125 | message = response["choices"][0]["message"]
126 |
127 | if message.get("function_call"):
128 | function_name = message["function_call"]["name"]
129 | return {"agent": json.loads(message["function_call"]["arguments"]).get("agent"),
130 | "agent_role_prompt": json.loads(message["function_call"]["arguments"]).get("instructions")}
131 | else:
132 | return {"agent": "Default Agent",
133 | "agent_role_prompt": "You are an AI critical thinker research assistant. Your sole purpose is to write well written, critically acclaimed, objective and structured reports on given text."}
134 | except Exception as e:
135 | print(f"{Fore.RED}Error in choose_agent: {e}{Style.RESET_ALL}")
136 | return {"agent": "Default Agent",
137 | "agent_role_prompt": "You are an AI critical thinker research assistant. Your sole purpose is to write well written, critically acclaimed, objective and structured reports on given text."}
138 |
139 |
140 | def choose_agent_configuration():
141 | configuration = [
142 | {
143 | "name": "research",
144 | "description": "Researches the given topic even if it can't be answered",
145 | "parameters": {
146 | "type": "object",
147 | "properties": {
148 | "agent": {
149 | "type": "string",
150 | "description":
151 | """
152 | Determines the field of the topic and the name of the agent we could use in order to research
153 | about the topic provided.
154 |
155 | Example of agents:
156 | "Business Analyst Agent", "Finance Agent", "Travel Agent",
157 | "Academic Research Agent", "Computer Security Analyst Agent"
158 |
159 | if an agent for the field required doesn't exist make one up
160 | fit an emoji to every agent before the agent name
161 | """,
162 | },
163 | "instructions": {
164 | "type": "string",
165 | "description":
166 | """
167 | each provided agent needs instructions in order to start working,
168 | examples for agents and their instructions:
169 | "Finance Agent": "You are a seasoned finance analyst AI assistant. Your primary goal is to compose comprehensive, astute, impartial, and methodically arranged financial reports based on provided data and trends.",
170 | "Travel Agent": "You are a world-travelled AI tour guide assistant. Your main purpose is to draft engaging, insightful, unbiased, and well-structured travel reports on given locations, including history, attractions, and cultural insights.",
171 | "Academic Research Agent": "You are an AI academic research assistant. Your primary responsibility is to create thorough, academically rigorous, unbiased, and systematically organized reports on a given research topic, following the standards of scholarly work.",
172 | "Business Analyst": "You are an experienced AI business analyst assistant. Your main objective is to produce comprehensive, insightful, impartial, and systematically structured business reports based on provided business data, market trends, and strategic analysis.",
173 | "Computer Security Analyst Agent": "You are an AI specializing in computer security analysis. Your principal duty is to generate comprehensive, meticulously detailed, impartial, and systematically structured reports on computer security topics. This includes Exploits, Techniques, Threat Actors, and Advanced Persistent Threat (APT) Groups. All produced reports should adhere to the highest standards of scholarly work and provide in-depth insights into the complexities of computer security.",
174 |
175 | """,
176 | },
177 | },
178 | "required": ["agent", "instructions"],
179 | },
180 | }
181 | ]
182 | return configuration
183 |
184 |
185 |
--------------------------------------------------------------------------------