├── processing ├── __init__.py ├── html.py ├── style.css └── text.py ├── Procfile ├── client ├── static │ ├── favicon.ico │ ├── mathAgentAvatar.png │ ├── travelAgentAvatar.png │ ├── defaultAgentAvatar.JPG │ ├── financeAgentAvatar.png │ ├── academicResearchAgentAvatar.png │ ├── businessAnalystAgentAvatar.png │ └── computerSecurityanalystAvatar.png ├── scripts.js ├── styles.css └── index.html ├── .platform └── hooks │ ├── nginx │ └── conf.d │ │ └── timeout.conf │ └── predeploy │ ├── 01_chrome.sh │ └── 01_weasyprint.sh ├── .ebextensions ├── 01_fastapi.config └── 02_install_fonts.config ├── config ├── __init__.py ├── singleton.py └── config.py ├── requirements.txt ├── .github └── dependabot.yml ├── actions ├── web_search.py └── web_scrape.py ├── js └── overlay.js ├── LICENSE ├── main.py ├── agent ├── run.py ├── prompts.py ├── research_agent.py └── llm_utils.py ├── utils └── utils.py └── README.md /processing/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /Procfile: -------------------------------------------------------------------------------- 1 | web: gunicorn main:app --workers=4 --worker-class=uvicorn.workers.UvicornWorker --timeout 600 2 | -------------------------------------------------------------------------------- /client/static/favicon.ico: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rotemweiss57/gpt-researcher/HEAD/client/static/favicon.ico -------------------------------------------------------------------------------- /client/static/mathAgentAvatar.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rotemweiss57/gpt-researcher/HEAD/client/static/mathAgentAvatar.png -------------------------------------------------------------------------------- /client/static/travelAgentAvatar.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rotemweiss57/gpt-researcher/HEAD/client/static/travelAgentAvatar.png -------------------------------------------------------------------------------- /client/static/defaultAgentAvatar.JPG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rotemweiss57/gpt-researcher/HEAD/client/static/defaultAgentAvatar.JPG -------------------------------------------------------------------------------- /client/static/financeAgentAvatar.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rotemweiss57/gpt-researcher/HEAD/client/static/financeAgentAvatar.png -------------------------------------------------------------------------------- /client/static/academicResearchAgentAvatar.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rotemweiss57/gpt-researcher/HEAD/client/static/academicResearchAgentAvatar.png -------------------------------------------------------------------------------- /client/static/businessAnalystAgentAvatar.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rotemweiss57/gpt-researcher/HEAD/client/static/businessAnalystAgentAvatar.png -------------------------------------------------------------------------------- /client/static/computerSecurityanalystAvatar.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rotemweiss57/gpt-researcher/HEAD/client/static/computerSecurityanalystAvatar.png -------------------------------------------------------------------------------- /.platform/hooks/nginx/conf.d/timeout.conf: -------------------------------------------------------------------------------- 1 | proxy_connect_timeout 600s; 2 | proxy_send_timeout 600s; 3 | proxy_read_timeout 600s; 4 | fastcgi_send_timeout 600s; 5 | fastcgi_read_timeout 600s; 6 | -------------------------------------------------------------------------------- /.ebextensions/01_fastapi.config: -------------------------------------------------------------------------------- 1 | option_settings: 2 | aws:elasticbeanstalk:application:environment: 3 | PYTHONPATH: "/var/app/current:$PYTHONPATH" 4 | aws:elasticbeanstalk:container:python: 5 | WSGIPath: "main:app" 6 | -------------------------------------------------------------------------------- /config/__init__.py: -------------------------------------------------------------------------------- 1 | from config.config import Config, check_openai_api_key 2 | from config.singleton import AbstractSingleton, Singleton 3 | 4 | __all__ = [ 5 | "check_openai_api_key", 6 | "AbstractSingleton", 7 | "Config", 8 | "Singleton", 9 | ] 10 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | # dependencies 2 | asyncio==3.4.3 3 | beautifulsoup4==4.12.2 4 | colorama==0.4.6 5 | duckduckgo_search==3.8.3 6 | md2pdf==1.0.1 7 | openai~=0.27.8 8 | playwright==1.35.0 9 | python-dotenv~=1.0.0 10 | pyyaml==6.0 11 | selenium 12 | webdriver-manager==3.9.1 13 | boto3 14 | flask 15 | uvicorn 16 | pydantic 17 | fastapi 18 | python-multipart 19 | markdown 20 | pymongo 21 | seleniumbase 22 | -------------------------------------------------------------------------------- /.ebextensions/02_install_fonts.config: -------------------------------------------------------------------------------- 1 | container_commands: 2 | 01_download_librebaskerville_font: 3 | command: wget -P /tmp/ https://github.com/google/fonts/raw/main/ofl/librebaskerville/LibreBaskerville-Regular.ttf 4 | 02_create_fontdir: 5 | command: sudo mkdir -p /usr/share/fonts/librebaskerville 6 | 03_mv_font: 7 | command: sudo mv /tmp/LibreBaskerville-Regular.ttf /usr/share/fonts/librebaskerville 8 | -------------------------------------------------------------------------------- /.github/dependabot.yml: -------------------------------------------------------------------------------- 1 | # To get started with Dependabot version updates, you'll need to specify which 2 | # package ecosystems to update and where the package manifests are located. 3 | # Please see the documentation for all configuration options: 4 | # https://docs.github.com/github/administering-a-repository/configuration-options-for-dependency-updates 5 | 6 | version: 2 7 | updates: 8 | - package-ecosystem: "pip" # See documentation for possible values 9 | directory: "/" # Location of package manifests 10 | schedule: 11 | interval: "weekly" 12 | -------------------------------------------------------------------------------- /config/singleton.py: -------------------------------------------------------------------------------- 1 | """The singleton metaclass for ensuring only one instance of a class.""" 2 | import abc 3 | 4 | 5 | class Singleton(abc.ABCMeta, type): 6 | """ 7 | Singleton metaclass for ensuring only one instance of a class. 8 | """ 9 | 10 | _instances = {} 11 | 12 | def __call__(cls, *args, **kwargs): 13 | """Call method for the singleton metaclass.""" 14 | if cls not in cls._instances: 15 | cls._instances[cls] = super(Singleton, cls).__call__(*args, **kwargs) 16 | return cls._instances[cls] 17 | 18 | 19 | class AbstractSingleton(abc.ABC, metaclass=Singleton): 20 | """ 21 | Abstract singleton class for ensuring only one instance of a class. 22 | """ 23 | 24 | pass 25 | -------------------------------------------------------------------------------- /actions/web_search.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | import json 3 | from duckduckgo_search import DDGS 4 | 5 | ddgs = DDGS() 6 | 7 | def web_search(query: str, num_results: int = 5) -> str: 8 | """Useful for general internet search queries.""" 9 | print("Searching with query {0}...".format(query)) 10 | search_results = [] 11 | if not query: 12 | return json.dumps(search_results) 13 | 14 | results = ddgs.text(query) 15 | if not results: 16 | return json.dumps(search_results) 17 | 18 | total_added = 0 19 | for j in results: 20 | search_results.append(j) 21 | total_added += 1 22 | if total_added >= num_results: 23 | break 24 | 25 | return json.dumps(search_results, ensure_ascii=False, indent=4) 26 | -------------------------------------------------------------------------------- /js/overlay.js: -------------------------------------------------------------------------------- 1 | const overlay = document.createElement('div'); 2 | Object.assign(overlay.style, { 3 | position: 'fixed', 4 | zIndex: 999999, 5 | top: 0, 6 | left: 0, 7 | width: '100%', 8 | height: '100%', 9 | background: 'rgba(0, 0, 0, 0.7)', 10 | color: '#fff', 11 | fontSize: '24px', 12 | fontWeight: 'bold', 13 | display: 'flex', 14 | justifyContent: 'center', 15 | alignItems: 'center', 16 | }); 17 | const textContent = document.createElement('div'); 18 | Object.assign(textContent.style, { 19 | textAlign: 'center', 20 | }); 21 | textContent.textContent = 'Tavily AI: Analyzing Page'; 22 | overlay.appendChild(textContent); 23 | document.body.append(overlay); 24 | document.body.style.overflow = 'hidden'; 25 | let dotCount = 0; 26 | setInterval(() => { 27 | textContent.textContent = 'Tavily AI: Analyzing Page' + '.'.repeat(dotCount); 28 | dotCount = (dotCount + 1) % 4; 29 | }, 1000); 30 | -------------------------------------------------------------------------------- /.platform/hooks/predeploy/01_chrome.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Check if Google Chrome is already installed 4 | if ! command -v google-chrome &> /dev/null 5 | then 6 | # Install Google Chrome 7 | curl -sS https://intoli.com/install-google-chrome.sh | bash 8 | if [ $? -ne 0 ]; then 9 | echo "Failed to install Google Chrome" 10 | exit 1 11 | fi 12 | mv -f /usr/bin/google-chrome-stable /usr/bin/google-chrome 13 | if [ $? -ne 0 ]; then 14 | echo "Failed to move Google Chrome executable" 15 | exit 1 16 | fi 17 | else 18 | echo "Google Chrome is already installed" 19 | fi 20 | 21 | # Print the version and location 22 | google-chrome --version && which google-chrome 23 | 24 | # Check if the temporary file exists before trying to remove it 25 | if [ -f "chromedriver_linux64.zip" ]; then 26 | rm chromedriver_linux64.zip 27 | fi 28 | 29 | echo "Google Chrome installation script completed successfully." 30 | -------------------------------------------------------------------------------- /processing/html.py: -------------------------------------------------------------------------------- 1 | """HTML processing functions""" 2 | from __future__ import annotations 3 | 4 | from bs4 import BeautifulSoup 5 | from requests.compat import urljoin 6 | 7 | 8 | def extract_hyperlinks(soup: BeautifulSoup, base_url: str) -> list[tuple[str, str]]: 9 | """Extract hyperlinks from a BeautifulSoup object 10 | 11 | Args: 12 | soup (BeautifulSoup): The BeautifulSoup object 13 | base_url (str): The base URL 14 | 15 | Returns: 16 | List[Tuple[str, str]]: The extracted hyperlinks 17 | """ 18 | return [ 19 | (link.text, urljoin(base_url, link["href"])) 20 | for link in soup.find_all("a", href=True) 21 | ] 22 | 23 | 24 | def format_hyperlinks(hyperlinks: list[tuple[str, str]]) -> list[str]: 25 | """Format hyperlinks to be displayed to the user 26 | 27 | Args: 28 | hyperlinks (List[Tuple[str, str]]): The hyperlinks to format 29 | 30 | Returns: 31 | List[str]: The formatted hyperlinks 32 | """ 33 | return [f"{link_text} ({link_url})" for link_text, link_url in hyperlinks] 34 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2023 Assaf Elovic 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /processing/style.css: -------------------------------------------------------------------------------- 1 | body { 2 | font-family: 'Libre Baskerville', serif; 3 | font-size: 12pt; /* standard size for academic papers */ 4 | line-height: 1.6; /* for readability */ 5 | color: #333; /* softer on the eyes than black */ 6 | background-color: #fff; /* white background */ 7 | margin: 0; 8 | padding: 0; 9 | } 10 | 11 | h1, h2, h3, h4, h5, h6 { 12 | font-family: 'Libre Baskerville', serif; 13 | color: #000; /* darker than the body text */ 14 | margin-top: 1em; /* space above headers */ 15 | } 16 | 17 | h1 { 18 | font-size: 1.8em; 19 | } 20 | 21 | h2 { 22 | font-size: 1.4em; 23 | } 24 | 25 | /* Add some space between paragraphs */ 26 | p { 27 | margin-bottom: 1em; 28 | } 29 | 30 | /* Style for blockquotes, often used in academic papers */ 31 | blockquote { 32 | font-style: italic; 33 | margin: 1em 0; 34 | padding: 1em; 35 | background-color: #f9f9f9; /* a light grey background */ 36 | } 37 | 38 | /* You might want to style tables, figures, etc. too */ 39 | table { 40 | border-collapse: collapse; 41 | width: 100%; 42 | } 43 | 44 | table, th, td { 45 | border: 1px solid #ddd; 46 | text-align: left; 47 | padding: 8px; 48 | } 49 | 50 | th { 51 | background-color: #f2f2f2; 52 | color: black; 53 | } 54 | -------------------------------------------------------------------------------- /.platform/hooks/predeploy/01_weasyprint.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | declare -a packages=("libxml2-devel" "libxslt-devel" "python-devel" "redhat-rpm-config" "libffi-devel" "cairo" "pango") 4 | 5 | for package in "${packages[@]}"; do 6 | if ! rpm -q $package; then 7 | yum install -y $package 8 | fi 9 | done 10 | 11 | export PKG_CONFIG_PATH=/usr/lib64/pkgconfig:/usr/lib/pkgconfig 12 | export PATH=/usr/bin:$PATH 13 | export LDFLAGS=-L/usr/lib64:/usr/lib 14 | export LD_LIBRARY_PATH=/usr/lib64:/usr/lib 15 | export CPPFLAGS=-I/usr/include 16 | 17 | sudo yum-config-manager --enable epel 18 | 19 | sudo yum update -y 20 | 21 | declare -a packages2=("gcc" "gcc-c++" "glib2-devel" "libxml2-devel" "libpng-devel" "libjpeg-turbo-devel" "gobject-introspection" "gobject-introspection-devel") 22 | 23 | for package in "${packages2[@]}"; do 24 | if ! rpm -q $package; then 25 | yum install -y $package 26 | fi 27 | done 28 | 29 | if [ ! -f /usr/lib/libcroco-0.6.8/libcroco.la ]; then 30 | wget http://ftp.gnome.org/pub/GNOME/sources/libcroco/0.6/libcroco-0.6.8.tar.xz 31 | tar xvfJ libcroco-0.6.8.tar.xz 32 | cd libcroco-0.6.8 33 | ./configure --prefix=/usr 34 | make 35 | sudo make install 36 | cd .. 37 | fi 38 | 39 | if [ ! -f /usr/lib/gdk-pixbuf-2.0/2.10.0/loaders/libpixbufloader-svg.so ]; then 40 | wget http://ftp.gnome.org/pub/GNOME/sources/gdk-pixbuf/2.28/gdk-pixbuf-2.28.2.tar.xz 41 | tar xvfJ gdk-pixbuf-2.28.2.tar.xz 42 | cd gdk-pixbuf-2.28.2 43 | ./configure --prefix=/usr --without-libtiff 44 | make 45 | sudo make install 46 | cd .. 47 | fi 48 | 49 | if [ ! -f /usr/lib/pkgconfig/fontconfig.pc ]; then 50 | wget http://www.freedesktop.org/software/fontconfig/release/fontconfig-2.13.93.tar.gz 51 | tar xvf fontconfig-2.13.93.tar.gz 52 | cd fontconfig-2.13.93 53 | ./configure --prefix=/usr --enable-libxml2 54 | make 55 | sudo make install 56 | cd .. 57 | fi 58 | 59 | if [ ! -f /usr/lib/libcairo.so ]; then 60 | wget http://cairographics.org/releases/cairo-1.16.0.tar.xz 61 | tar xvfJ cairo-1.16.0.tar.xz 62 | cd cairo-1.16.0 63 | ./configure --prefix=/usr 64 | make 65 | sudo make install 66 | cd .. 67 | fi 68 | 69 | if [ ! -f /usr/lib/libpango-1.0.so ]; then 70 | wget http://ftp.gnome.org/pub/GNOME/sources/pango/1.48/pango-1.48.4.tar.xz 71 | tar xvfJ pango-1.48.4.tar.xz 72 | cd pango-1.48.4 73 | ./configure --prefix=/usr 74 | make 75 | sudo make install 76 | cd .. 77 | fi 78 | 79 | if [ ! -f /usr/lib/librsvg-2.so ]; then 80 | wget http://ftp.gnome.org/pub/GNOME/sources/librsvg/2.40/librsvg-2.40.6.tar.xz 81 | tar xvfJ librsvg-2.40.6.tar.xz 82 | cd librsvg-2.40.6 83 | ./configure --prefix=/usr 84 | make 85 | sudo make install 86 | cd .. 87 | fi 88 | 89 | sudo ldconfig /usr/lib 90 | 91 | 92 | 93 | -------------------------------------------------------------------------------- /main.py: -------------------------------------------------------------------------------- 1 | import time 2 | 3 | import openai 4 | from fastapi import FastAPI, Request, WebSocket, WebSocketDisconnect 5 | from fastapi.staticfiles import StaticFiles 6 | from fastapi.templating import Jinja2Templates 7 | from pydantic import BaseModel 8 | import json 9 | import os 10 | 11 | from agent.llm_utils import choose_agent 12 | from agent.run import WebSocketManager 13 | 14 | 15 | class ResearchRequest(BaseModel): 16 | task: str 17 | report_type: str 18 | agent: str 19 | 20 | 21 | 22 | app = FastAPI() 23 | print("Starting server...") 24 | app.mount("/site", StaticFiles(directory="client"), name="site") 25 | app.mount("/static", StaticFiles(directory="client/static"), name="static") 26 | # Dynamic directory for outputs once first research is run 27 | @app.on_event("startup") 28 | def startup_event(): 29 | if not os.path.isdir("outputs"): 30 | os.makedirs("outputs") 31 | app.mount("/outputs", StaticFiles(directory="outputs"), name="outputs") 32 | 33 | templates = Jinja2Templates(directory="client") 34 | 35 | manager = WebSocketManager() 36 | 37 | 38 | @app.get("/") 39 | async def read_root(request: Request): 40 | return templates.TemplateResponse('index.html', {"request": request, "report": None}) 41 | 42 | 43 | @app.websocket("/ws") 44 | async def websocket_endpoint(websocket: WebSocket): 45 | await manager.connect(websocket) 46 | print("Client connected") # New log 47 | try: 48 | while True: 49 | data = await websocket.receive_text() 50 | if data.startswith("start"): 51 | json_data = json.loads(data[6:]) 52 | task = json_data.get("task") 53 | report_type = json_data.get("report_type") 54 | agent = json_data.get("agent") 55 | api_key = json_data.get("api_key") 56 | openai.api_key = api_key 57 | # temporary so "normal agents" can still be used and not just auto generated, will be removed when we move to auto generated 58 | if agent == "Auto Agent": 59 | agent_dict = choose_agent(task) 60 | agent = agent_dict.get("agent") 61 | agent_role_prompt = agent_dict.get("agent_role_prompt") 62 | else: 63 | agent_role_prompt = None 64 | 65 | await websocket.send_json({"type": "logs", "output": f"Initiated an Agent: {agent}"}) 66 | if task and report_type and agent: 67 | print("check") 68 | await manager.start_streaming(task, report_type, agent, websocket, agent_role_prompt, api_key) 69 | else: 70 | print("Error: not enough parameters provided.") 71 | 72 | except WebSocketDisconnect: 73 | await manager.disconnect(websocket) 74 | 75 | if __name__ == "__main__": 76 | import uvicorn 77 | 78 | uvicorn.run(app, host="0.0.0.0", port=8000) -------------------------------------------------------------------------------- /config/config.py: -------------------------------------------------------------------------------- 1 | """Configuration class to store the state of bools for different scripts access.""" 2 | import os 3 | 4 | import openai 5 | from colorama import Fore 6 | from dotenv import load_dotenv 7 | 8 | from config.singleton import Singleton 9 | 10 | load_dotenv(verbose=True) 11 | 12 | 13 | class Config(metaclass=Singleton): 14 | """ 15 | Configuration class to store the state of bools for different scripts access. 16 | """ 17 | 18 | def __init__(self) -> None: 19 | """Initialize the Config class""" 20 | self.debug_mode = False 21 | self.allow_downloads = False 22 | 23 | self.selenium_web_browser = os.getenv("USE_WEB_BROWSER", "chrome") 24 | self.fast_llm_model = os.getenv("FAST_LLM_MODEL", "gpt-3.5-turbo-16k") 25 | self.smart_llm_model = os.getenv("SMART_LLM_MODEL", "gpt-4") 26 | self.fast_token_limit = int(os.getenv("FAST_TOKEN_LIMIT", 4000)) 27 | self.smart_token_limit = int(os.getenv("SMART_TOKEN_LIMIT", 8000)) 28 | self.browse_chunk_max_length = int(os.getenv("BROWSE_CHUNK_MAX_LENGTH", 8192)) 29 | 30 | #self.openai_api_key = os.getenv("OPENAI_API_KEY") 31 | self.temperature = float(os.getenv("TEMPERATURE", "1")) 32 | 33 | self.user_agent = os.getenv( 34 | "USER_AGENT", 35 | "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_4) AppleWebKit/537.36" 36 | " (KHTML, like Gecko) Chrome/83.0.4103.97 Safari/537.36", 37 | ) 38 | 39 | self.memory_backend = os.getenv("MEMORY_BACKEND", "local") 40 | # Initialize the OpenAI API client 41 | #openai.api_key = self.openai_api_key 42 | 43 | def set_fast_llm_model(self, value: str) -> None: 44 | """Set the fast LLM model value.""" 45 | self.fast_llm_model = value 46 | 47 | def set_smart_llm_model(self, value: str) -> None: 48 | """Set the smart LLM model value.""" 49 | self.smart_llm_model = value 50 | 51 | def set_fast_token_limit(self, value: int) -> None: 52 | """Set the fast token limit value.""" 53 | self.fast_token_limit = value 54 | 55 | def set_smart_token_limit(self, value: int) -> None: 56 | """Set the smart token limit value.""" 57 | self.smart_token_limit = value 58 | 59 | def set_browse_chunk_max_length(self, value: int) -> None: 60 | """Set the browse_website command chunk max length value.""" 61 | self.browse_chunk_max_length = value 62 | 63 | def set_openai_api_key(self, value: str) -> None: 64 | """Set the OpenAI API key value.""" 65 | self.openai_api_key = value 66 | 67 | def set_debug_mode(self, value: bool) -> None: 68 | """Set the debug mode value.""" 69 | self.debug_mode = value 70 | 71 | 72 | def check_openai_api_key() -> None: 73 | """Check if the OpenAI API key is set in config.py or as an environment variable.""" 74 | cfg = Config() 75 | if not cfg.openai_api_key: 76 | return False 77 | else: 78 | return True 79 | -------------------------------------------------------------------------------- /agent/run.py: -------------------------------------------------------------------------------- 1 | import asyncio 2 | import datetime 3 | import uuid 4 | 5 | import openai 6 | from typing import List, Dict 7 | from fastapi import WebSocket 8 | from utils.utils import * 9 | # from config import check_openai_api_key 10 | from agent.research_agent import ResearchAgent 11 | 12 | 13 | class WebSocketManager: 14 | def __init__(self): 15 | self.active_connections: List[WebSocket] = [] 16 | self.sender_tasks: Dict[WebSocket, asyncio.Task] = {} 17 | self.message_queues: Dict[WebSocket, asyncio.Queue] = {} 18 | 19 | async def start_sender(self, websocket: WebSocket): 20 | queue = self.message_queues[websocket] 21 | while True: 22 | message = await queue.get() 23 | if websocket in self.active_connections: 24 | await websocket.send_text(message) 25 | else: 26 | break 27 | 28 | async def connect(self, websocket: WebSocket): 29 | await websocket.accept() 30 | self.active_connections.append(websocket) 31 | self.message_queues[websocket] = asyncio.Queue() 32 | self.sender_tasks[websocket] = asyncio.create_task(self.start_sender(websocket)) 33 | 34 | async def disconnect(self, websocket: WebSocket): 35 | self.active_connections.remove(websocket) 36 | self.sender_tasks[websocket].cancel() 37 | del self.sender_tasks[websocket] 38 | del self.message_queues[websocket] 39 | 40 | async def start_streaming(self, task, report_type, agent, websocket,agent_role_prompt, api_key): 41 | report, path = await run_agent(task, report_type, agent, websocket,agent_role_prompt, api_key) 42 | return report, path 43 | 44 | 45 | async def run_agent(task, report_type, agent, websocket,agent_role_prompt, api_key): 46 | openai.api_key = api_key 47 | start_time = datetime.now() 48 | print(f"Start time: {start_time}") 49 | document_id = query2db(task, agent, report_type, start_time) 50 | assistant = ResearchAgent(task, agent, agent_role_prompt, websocket) 51 | result, error = await assistant.conduct_research() 52 | if result == "Error": 53 | await websocket.send_json({"type": "logs", "output": error}) 54 | end_time = datetime.now() 55 | total_time = end_time - start_time 56 | update_query(document_id=document_id, status="failed", end_time=end_time, total_time=total_time,error=error) 57 | 58 | return None, None 59 | report, encoded_path, path = await assistant.write_report(report_type, websocket) 60 | await websocket.send_json({"type": "path", "output": encoded_path}) 61 | 62 | end_time = datetime.now() 63 | total_time = end_time - start_time 64 | await websocket.send_json({"type": "logs", "output": f"\nEnd time: {end_time}\n"}) 65 | await websocket.send_json({"type": "logs", "output": f"\nTotal run time: {total_time}\n"}) 66 | 67 | file_name = str(uuid.uuid4()) + '.pdf' 68 | 69 | url = upload_to_s3(path, "tavily-reports", file_name) 70 | update_query(document_id=document_id, path=url, status="finished", end_time=end_time, total_time=total_time,) 71 | 72 | return report, path 73 | 74 | -------------------------------------------------------------------------------- /client/scripts.js: -------------------------------------------------------------------------------- 1 | const GPTResearcher = (() => { 2 | const startResearch = () => { 3 | document.getElementById("output").innerHTML = ""; 4 | document.getElementById("reportContainer").innerHTML = ""; 5 | 6 | addAgentResponse({ output: "🤔 Thinking about research questions for the task..." }); 7 | 8 | listenToSockEvents(); 9 | }; 10 | 11 | const listenToSockEvents = () => { 12 | const { protocol, host, pathname } = window.location; 13 | const ws_uri = 'wss://app.tavily.com/ws' 14 | //const ws_uri = `${protocol === 'https:' ? 'wss:' : 'ws:'}//${host}${pathname}ws`; 15 | const converter = new showdown.Converter(); 16 | const socket = new WebSocket(ws_uri); 17 | 18 | socket.onmessage = (event) => { 19 | const data = JSON.parse(event.data); 20 | if (data.type === 'logs') { 21 | addAgentResponse(data); 22 | } else if (data.type === 'report') { 23 | writeReport(data, converter); 24 | } else if (data.type === 'path') { 25 | updateDownloadLink(data); 26 | } 27 | }; 28 | 29 | socket.onopen = (event) => { 30 | const task = document.querySelector('input[name="task"]').value; 31 | const report_type = document.querySelector('select[name="report_type"]').value; 32 | const api_key = document.querySelector('input[name="api_key"]').value; // Get the API key from the input field 33 | const agent = document.querySelector('input[name="agent"]:checked').value; 34 | 35 | const requestData = { 36 | task: task, 37 | report_type: report_type, 38 | api_key: api_key, 39 | agent: agent, 40 | }; 41 | 42 | socket.send(`start ${JSON.stringify(requestData)}`); 43 | }; 44 | }; 45 | 46 | const addAgentResponse = (data) => { 47 | const output = document.getElementById("output"); 48 | output.innerHTML += '
' + data.output + '
'; 49 | output.scrollTop = output.scrollHeight; 50 | output.style.display = "block"; 51 | updateScroll(); 52 | }; 53 | 54 | const writeReport = (data, converter) => { 55 | const reportContainer = document.getElementById("reportContainer"); 56 | const markdownOutput = converter.makeHtml(data.output); 57 | reportContainer.innerHTML += markdownOutput; 58 | updateScroll(); 59 | }; 60 | 61 | const updateDownloadLink = (data) => { 62 | const path = data.output; 63 | const downloadLink = document.getElementById("downloadLink"); 64 | downloadLink.href = path; 65 | }; 66 | 67 | const updateScroll = () => { 68 | window.scrollTo(0, document.body.scrollHeight); 69 | }; 70 | 71 | const copyToClipboard = () => { 72 | const textarea = document.createElement('textarea'); 73 | textarea.id = 'temp_element'; 74 | textarea.style.height = 0; 75 | document.body.appendChild(textarea); 76 | textarea.value = document.getElementById('reportContainer').innerText; 77 | const selector = document.querySelector('#temp_element'); 78 | selector.select(); 79 | document.execCommand('copy'); 80 | document.body.removeChild(textarea); 81 | }; 82 | 83 | return { 84 | startResearch, 85 | copyToClipboard, 86 | }; 87 | })(); 88 | -------------------------------------------------------------------------------- /client/styles.css: -------------------------------------------------------------------------------- 1 | @keyframes gradientBG { 2 | 0% {background-position: 0% 50%;} 3 | 50% {background-position: 100% 50%;} 4 | 100% {background-position: 0% 50%;} 5 | } 6 | 7 | body { 8 | font-family: 'Montserrat', sans-serif; 9 | color: #fff; 10 | line-height: 1.6; 11 | background-size: 200% 200%; 12 | background-image: linear-gradient(45deg, #151A2D, #2D284D, #151A2D); 13 | animation: gradientBG 10s ease infinite; 14 | } 15 | 16 | .landing { 17 | display: flex; 18 | justify-content: center; 19 | align-items: center; 20 | height: 100vh; 21 | text-align: center; 22 | } 23 | 24 | .landing h1 { 25 | font-size: 3.5rem; 26 | font-weight: 700; 27 | margin-bottom: 2rem; 28 | } 29 | 30 | .landing p { 31 | font-size: 1.5rem; 32 | font-weight: 400; 33 | max-width: 500px; 34 | margin: auto; 35 | margin-bottom: 2rem; 36 | } 37 | 38 | .container { 39 | max-width: 900px; 40 | margin: auto; 41 | padding: 20px; 42 | background-color: rgba(255, 255, 255, 0.1); 43 | border-radius: 12px; 44 | box-shadow: 0px 10px 25px rgba(0, 0, 0, 0.1); 45 | transition: all .3s ease-in-out; 46 | margin-bottom: 180px; 47 | } 48 | 49 | .container:hover { 50 | transform: scale(1.01); 51 | box-shadow: 0px 15px 30px rgba(0, 0, 0, 0.2); 52 | } 53 | 54 | input, select, #output, #reportContainer { 55 | background-color: rgba(255,255,255,0.1); 56 | border: none; 57 | color: #fff; 58 | transition: all .3s ease-in-out; 59 | } 60 | 61 | input:hover, input:focus, select:hover, select:focus { 62 | background-color: #dfe4ea; 63 | border: 1px solid rgba(255, 255, 255, 0.5); 64 | box-shadow: 0px 4px 8px rgba(0, 0, 0, 0.1); 65 | transition: all 0.3s ease-in-out; 66 | } 67 | 68 | .btn-primary { 69 | background: linear-gradient(to right, #0062cc, #007bff); 70 | border: none; 71 | transition: all .3s ease-in-out; 72 | } 73 | 74 | .btn-secondary { 75 | background: linear-gradient(to right, #6c757d, #6c757d); 76 | border: none; 77 | transition: all .3s ease-in-out; 78 | } 79 | 80 | .btn:hover { 81 | opacity: 0.8; 82 | transform: scale(1.1); 83 | box-shadow: 0px 10px 20px rgba(0, 0, 0, 0.3); 84 | } 85 | 86 | .agent_question { 87 | font-size: 1.2rem; 88 | font-weight: 500; 89 | margin-bottom: 0.5rem; 90 | } 91 | 92 | footer { 93 | position: fixed; 94 | left: 0; 95 | bottom: 0; 96 | width: 100%; 97 | background: linear-gradient(to right, #151A2D, #111827); 98 | color: white; 99 | text-align: center; 100 | padding: 10px 0; 101 | } 102 | .margin-div { 103 | margin-top: 20px; 104 | margin-bottom: 20px; 105 | padding: 10px; 106 | } 107 | 108 | .agent_response { 109 | background-color: #747d8c; 110 | margin: 10px; 111 | padding: 10px; 112 | border-radius: 12px; 113 | } 114 | 115 | #output { 116 | height: 300px; 117 | overflow: auto; 118 | padding: 10px; 119 | margin-bottom: 10px; 120 | margin-top: 10px; 121 | } 122 | #reportContainer { 123 | background-color: rgba(255,255,255,0.1); 124 | border: none; 125 | color: #fff; 126 | transition: all .3s ease-in-out; 127 | padding: 10px; 128 | border-radius: 12px; 129 | 130 | } -------------------------------------------------------------------------------- /utils/utils.py: -------------------------------------------------------------------------------- 1 | import os 2 | from pymongo import MongoClient 3 | from datetime import datetime, timedelta 4 | import boto3 5 | from botocore.exceptions import NoCredentialsError 6 | 7 | 8 | def query2db(query, agent, report_type, start_time): 9 | # Connect to the MongoDB server 10 | usr = os.getenv('MONGO_USER') 11 | pwd = os.getenv('MONGO_PWD') 12 | client = MongoClient(f"mongodb+srv://{usr}:{pwd}@cluster0.47o7jxs.mongodb.net/?retryWrites=true&w=majority") 13 | 14 | # Connect to the "Tavily" database 15 | db = client['Tavily'] 16 | 17 | # Get the "ResearchQueries" collection 18 | collection = db['ResearchQueries'] 19 | 20 | # Create the document to be inserted 21 | document = { 22 | 'query': query, 23 | 'agent': agent, 24 | 'report_type': report_type, 25 | 'start_time': start_time, 26 | 'end_time': None, 27 | 'total_time': None, 28 | 'report_path': None, 29 | 'status': 'started', 30 | 'created': datetime.now() 31 | } 32 | 33 | # Insert the document into the collection 34 | result = collection.insert_one(document) 35 | 36 | # Get the _id of the inserted document 37 | document_id = result.inserted_id 38 | 39 | # Close the connection 40 | client.close() 41 | 42 | # Return the _id of the inserted document 43 | return document_id 44 | 45 | 46 | 47 | def update_query(document_id, path=None ,status=None, end_time=None, total_time=None, error=None): 48 | # Convert total_time to seconds 49 | total_time_seconds = total_time.total_seconds() 50 | 51 | # Connect to the MongoDB server 52 | usr = os.getenv('MONGO_USER') 53 | pwd = os.getenv('MONGO_PWD') 54 | client = MongoClient(f"mongodb+srv://{usr}:{pwd}@cluster0.47o7jxs.mongodb.net/?retryWrites=true&w=majority") 55 | 56 | # Connect to the "Tavily" database 57 | db = client['Tavily'] 58 | 59 | # Get the "ResearchQueries" collection 60 | collection = db['ResearchQueries'] 61 | 62 | # Create the update query 63 | update_query = { 64 | '$set': { 65 | 'end_time': end_time, 66 | 'total_time': total_time_seconds, 67 | 'report_path': path, 68 | 'status': status, 69 | 'error': error 70 | 71 | } 72 | } 73 | 74 | # Update the document in the collection 75 | collection.update_one({'_id': document_id}, update_query) 76 | 77 | # Close the connection 78 | client.close() 79 | 80 | 81 | 82 | # Initialize the S3 client 83 | s3 = boto3.client('s3') 84 | 85 | 86 | def upload_to_s3(file_path, bucket, file_name): 87 | try: 88 | # Upload the file 89 | s3.upload_file( 90 | Filename=file_path, 91 | Bucket=bucket, 92 | Key=file_name, 93 | ExtraArgs={ 94 | 'ACL': 'public-read', # this will make the file publicly readable 95 | 'ContentType': 'application/pdf' 96 | } 97 | ) 98 | 99 | print("Upload Successful") 100 | return f"https://{bucket}.s3.amazonaws.com/{file_name}" 101 | 102 | except FileNotFoundError: 103 | print("The file was not found") 104 | return None 105 | except NoCredentialsError: 106 | print("Credentials not available") 107 | return None 108 | 109 | -------------------------------------------------------------------------------- /processing/text.py: -------------------------------------------------------------------------------- 1 | """Text processing functions""" 2 | import urllib 3 | from typing import Dict, Generator, Optional 4 | import string 5 | 6 | from selenium.webdriver.remote.webdriver import WebDriver 7 | 8 | from config import Config 9 | from agent.llm_utils import create_chat_completion 10 | import os 11 | from md2pdf.core import md2pdf 12 | 13 | CFG = Config() 14 | 15 | 16 | def split_text(text: str, max_length: int = 8192) -> Generator[str, None, None]: 17 | """Split text into chunks of a maximum length 18 | 19 | Args: 20 | text (str): The text to split 21 | max_length (int, optional): The maximum length of each chunk. Defaults to 8192. 22 | 23 | Yields: 24 | str: The next chunk of text 25 | 26 | Raises: 27 | ValueError: If the text is longer than the maximum length 28 | """ 29 | paragraphs = text.split("\n") 30 | current_length = 0 31 | current_chunk = [] 32 | 33 | for paragraph in paragraphs: 34 | if current_length + len(paragraph) + 1 <= max_length: 35 | current_chunk.append(paragraph) 36 | current_length += len(paragraph) + 1 37 | else: 38 | yield "\n".join(current_chunk) 39 | current_chunk = [paragraph] 40 | current_length = len(paragraph) + 1 41 | 42 | if current_chunk: 43 | yield "\n".join(current_chunk) 44 | 45 | 46 | def summarize_text( 47 | url: str, text: str, question: str, driver: Optional[WebDriver] = None 48 | ) -> str: 49 | """Summarize text using the OpenAI API 50 | 51 | Args: 52 | url (str): The url of the text 53 | text (str): The text to summarize 54 | question (str): The question to ask the model 55 | driver (WebDriver): The webdriver to use to scroll the page 56 | 57 | Returns: 58 | str: The summary of the text 59 | """ 60 | if not text: 61 | return "Error: No text to summarize" 62 | 63 | summaries = [] 64 | chunks = list(split_text(text)) 65 | scroll_ratio = 1 / len(chunks) 66 | 67 | for i, chunk in enumerate(chunks): 68 | if driver: 69 | scroll_to_percentage(driver, scroll_ratio * i) 70 | 71 | memory_to_add = f"Source: {url}\n" f"Raw content part#{i + 1}: {chunk}" 72 | 73 | #MEMORY.add_documents([Document(page_content=memory_to_add)]) 74 | 75 | messages = [create_message(chunk, question)] 76 | 77 | summary = create_chat_completion( 78 | model=CFG.fast_llm_model, 79 | messages=messages, 80 | ) 81 | summaries.append(summary) 82 | memory_to_add = f"Source: {url}\n" f"Content summary part#{i + 1}: {summary}" 83 | 84 | #MEMORY.add_documents([Document(page_content=memory_to_add)]) 85 | 86 | 87 | combined_summary = "\n".join(summaries) 88 | messages = [create_message(combined_summary, question)] 89 | 90 | return create_chat_completion( 91 | model=CFG.fast_llm_model, 92 | messages=messages, 93 | ) 94 | 95 | 96 | def scroll_to_percentage(driver: WebDriver, ratio: float) -> None: 97 | """Scroll to a percentage of the page 98 | 99 | Args: 100 | driver (WebDriver): The webdriver to use 101 | ratio (float): The percentage to scroll to 102 | 103 | Raises: 104 | ValueError: If the ratio is not between 0 and 1 105 | """ 106 | if ratio < 0 or ratio > 1: 107 | raise ValueError("Percentage should be between 0 and 1") 108 | driver.execute_script(f"window.scrollTo(0, document.body.scrollHeight * {ratio});") 109 | 110 | 111 | def create_message(chunk: str, question: str) -> Dict[str, str]: 112 | """Create a message for the chat completion 113 | 114 | Args: 115 | chunk (str): The chunk of text to summarize 116 | question (str): The question to answer 117 | 118 | Returns: 119 | Dict[str, str]: The message to send to the chat completion 120 | """ 121 | return { 122 | "role": "user", 123 | "content": f'"""{chunk}""" Using the above text, answer the following' 124 | f' question: "{question}" -- if the question cannot be answered using the text,' 125 | " simply summarize the text in depth. " 126 | "Include all factual information, numbers, stats etc if available.", 127 | } 128 | 129 | def write_to_file(filename: str, text: str) -> None: 130 | """Write text to a file 131 | 132 | Args: 133 | text (str): The text to write 134 | filename (str): The filename to write to 135 | """ 136 | with open(filename, "w") as file: 137 | file.write(text) 138 | 139 | async def write_md_to_pdf(task: str, directory_name: str, text: str) -> None: 140 | file_path = f"./outputs/{directory_name}/{task}" 141 | write_to_file(f"{file_path}.md", text) 142 | md_to_pdf(f"{file_path}.md", f"{file_path}.pdf") 143 | print(f"{task} written to {file_path}.pdf") 144 | 145 | encoded_file_path = urllib.parse.quote(f"{file_path}.pdf") 146 | 147 | return encoded_file_path, f"{file_path}.pdf" 148 | 149 | def read_txt_files(directory): 150 | all_text = '' 151 | 152 | for filename in os.listdir(directory): 153 | if filename.endswith('.txt'): 154 | with open(os.path.join(directory, filename), 'r') as file: 155 | all_text += file.read() + '\n' 156 | 157 | return all_text 158 | 159 | 160 | def md_to_pdf(input_file, output_file): 161 | md2pdf(output_file, 162 | md_content=None, 163 | md_file_path=input_file, 164 | css_file_path="./processing/style.css", 165 | base_url=None) 166 | -------------------------------------------------------------------------------- /client/index.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | GPT Researcher 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 39 | 40 | 41 | 42 | 43 |
44 |
45 |

46 | Say Goodbye to
47 | Hours 49 | of Research 50 |

51 |

52 | Say Hello to GPT Researcher, your AI mate for rapid insights and comprehensive research. GPT Researcher 53 | takes care of everything from accurate source gathering to organization of research results - all in one 54 | platform designed to make your research process a breeze. 55 |

56 | Get Started 57 |
58 |
59 | 60 |
61 |
Auto Agent
63 |
64 |
65 | 66 | 67 |
68 |
69 | 70 | 71 | 72 |
73 |
74 |
75 | 76 | 77 |
78 | 79 |
80 |
81 | 82 | 87 |
88 | 89 |
90 | 91 |
92 |

Agent Output

93 |

An agent tailored specifically to your task 94 | will be generated to provide the most precise and relevant research results.

95 |
96 |
97 |
98 |

Research Report

99 |
100 | 101 | Download as PDF 102 |
103 |
104 | 105 | 109 | 110 | 111 | 112 | 132 | 133 | 134 | 135 | -------------------------------------------------------------------------------- /actions/web_scrape.py: -------------------------------------------------------------------------------- 1 | """Selenium web scraping module.""" 2 | from __future__ import annotations 3 | 4 | import logging 5 | import asyncio 6 | from pathlib import Path 7 | from sys import platform 8 | 9 | from bs4 import BeautifulSoup 10 | from webdriver_manager.chrome import ChromeDriverManager 11 | from webdriver_manager.firefox import GeckoDriverManager 12 | from selenium import webdriver 13 | from selenium.webdriver.chrome.service import Service 14 | from selenium.webdriver.chrome.options import Options as ChromeOptions 15 | from selenium.webdriver.common.by import By 16 | from selenium.webdriver.firefox.options import Options as FirefoxOptions 17 | from selenium.webdriver.remote.webdriver import WebDriver 18 | from selenium.webdriver.safari.options import Options as SafariOptions 19 | from selenium.webdriver.support import expected_conditions as EC 20 | from selenium.webdriver.support.wait import WebDriverWait 21 | from fastapi import WebSocket 22 | 23 | import processing.text as summary 24 | 25 | from config import Config 26 | from processing.html import extract_hyperlinks, format_hyperlinks 27 | 28 | from concurrent.futures import ThreadPoolExecutor 29 | 30 | executor = ThreadPoolExecutor() 31 | 32 | FILE_DIR = Path(__file__).parent.parent 33 | CFG = Config() 34 | 35 | 36 | async def async_browse(url: str, question: str, websocket: WebSocket) -> str: 37 | """Browse a website and return the answer and links to the user 38 | 39 | Args: 40 | url (str): The url of the website to browse 41 | question (str): The question asked by the user 42 | websocket (WebSocketManager): The websocket manager 43 | 44 | Returns: 45 | str: The answer and links to the user 46 | """ 47 | loop = asyncio.get_event_loop() 48 | executor = ThreadPoolExecutor(max_workers=8) 49 | 50 | print(f"Scraping url {url} with question {question}") 51 | await websocket.send_json( 52 | {"type": "logs", "output": f"🔎 Browsing the {url} for relevant about: {question}..."}) 53 | 54 | try: 55 | driver, text = await loop.run_in_executor(executor, scrape_text_with_selenium, url) 56 | await loop.run_in_executor(executor, add_header, driver) 57 | summary_text = await loop.run_in_executor(executor, summary.summarize_text, url, text, question, driver) 58 | 59 | await websocket.send_json( 60 | {"type": "logs", "output": f"📝 Information gathered from url {url}: {summary_text}"}) 61 | 62 | return f"Information gathered from url {url}: {summary_text}" 63 | except Exception as e: 64 | print(f"An error occurred while processing the url {url}: {e}") 65 | return f"Error processing the url {url}: {e}" 66 | 67 | 68 | 69 | def browse_website(url: str, question: str) -> tuple[str, WebDriver]: 70 | """Browse a website and return the answer and links to the user 71 | 72 | Args: 73 | url (str): The url of the website to browse 74 | question (str): The question asked by the user 75 | 76 | Returns: 77 | Tuple[str, WebDriver]: The answer and links to the user and the webdriver 78 | """ 79 | 80 | if not url: 81 | return "A URL was not specified, cancelling request to browse website.", None 82 | 83 | driver, text = scrape_text_with_selenium(url) 84 | add_header(driver) 85 | summary_text = summary.summarize_text(url, text, question, driver) 86 | 87 | links = scrape_links_with_selenium(driver, url) 88 | 89 | # Limit links to 5 90 | if len(links) > 5: 91 | links = links[:5] 92 | 93 | # write_to_file('research-{0}.txt'.format(url), summary_text + "\nSource Links: {0}\n\n".format(links)) 94 | 95 | close_browser(driver) 96 | return f"Answer gathered from website: {summary_text} \n \n Links: {links}", driver 97 | 98 | 99 | def scrape_text_with_selenium(url: str) -> tuple[WebDriver, str]: 100 | """Scrape text from a website using selenium 101 | 102 | Args: 103 | url (str): The url of the website to scrape 104 | 105 | Returns: 106 | Tuple[WebDriver, str]: The webdriver and the text scraped from the website 107 | """ 108 | logging.getLogger("selenium").setLevel(logging.CRITICAL) 109 | 110 | options = ChromeOptions() 111 | options.add_argument("--headless") 112 | options.add_argument("--no-sandbox") 113 | options.add_argument("--disable-dev-shm-usage") # Overcomes limited resource problems 114 | options.add_argument(f'user-agent={CFG.user_agent}') 115 | options.add_experimental_option("prefs", {"download_restrictions": 3}) 116 | 117 | driver = webdriver.Chrome(options=options) 118 | driver.get(url) 119 | 120 | WebDriverWait(driver, 10).until( 121 | EC.presence_of_element_located((By.TAG_NAME, "body")) 122 | ) 123 | 124 | # Get the HTML content directly from the browser's DOM 125 | page_source = driver.execute_script("return document.body.outerHTML;") 126 | soup = BeautifulSoup(page_source, "html.parser") 127 | 128 | for script in soup(["script", "style"]): 129 | script.extract() 130 | 131 | # text = soup.get_text() 132 | text = get_text(soup) 133 | 134 | lines = (line.strip() for line in text.splitlines()) 135 | chunks = (phrase.strip() for line in lines for phrase in line.split(" ")) 136 | text = "\n".join(chunk for chunk in chunks if chunk) 137 | return driver, text 138 | 139 | 140 | def get_text(soup): 141 | """Get the text from the soup 142 | 143 | Args: 144 | soup (BeautifulSoup): The soup to get the text from 145 | 146 | Returns: 147 | str: The text from the soup 148 | """ 149 | text = "" 150 | tags = ['h1', 'h2', 'h3', 'h4', 'h5', 'p'] 151 | for element in soup.find_all(tags): # Find all the

elements 152 | text += element.text + "\n\n" 153 | return text 154 | 155 | 156 | def scrape_links_with_selenium(driver: WebDriver, url: str) -> list[str]: 157 | """Scrape links from a website using selenium 158 | 159 | Args: 160 | driver (WebDriver): The webdriver to use to scrape the links 161 | 162 | Returns: 163 | List[str]: The links scraped from the website 164 | """ 165 | page_source = driver.page_source 166 | soup = BeautifulSoup(page_source, "html.parser") 167 | 168 | for script in soup(["script", "style"]): 169 | script.extract() 170 | 171 | hyperlinks = extract_hyperlinks(soup, url) 172 | 173 | return format_hyperlinks(hyperlinks) 174 | 175 | 176 | def close_browser(driver: WebDriver) -> None: 177 | """Close the browser 178 | 179 | Args: 180 | driver (WebDriver): The webdriver to close 181 | 182 | Returns: 183 | None 184 | """ 185 | driver.quit() 186 | 187 | 188 | def add_header(driver: WebDriver) -> None: 189 | """Add a header to the website 190 | 191 | Args: 192 | driver (WebDriver): The webdriver to use to add the header 193 | 194 | Returns: 195 | None 196 | """ 197 | driver.execute_script(open(f"{FILE_DIR}/js/overlay.js", "r").read()) -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # 🔎 GPT Researcher 2 | [![Official Website](https://img.shields.io/badge/Official%20Website-tavily.com-blue?style=flat&logo=world&logoColor=white)](https://tavily.com) 3 | [![Discord Follow](https://dcbadge.vercel.app/api/server/rqw8dnM8?style=flat)](https://discord.com/invite/rqw8dnM8) 4 | [![GitHub Repo stars](https://img.shields.io/github/stars/assafelovic/gpt-researcher?style=social)](https://github.com/assafelovic/gpt-researcher) 5 | [![Twitter Follow](https://img.shields.io/twitter/follow/assaf_elovic?style=social)](https://twitter.com/assaf_elovic) 6 | 7 | **GPT Researcher is an autonomous agent designed for comprehensive online research on a variety of tasks.** 8 | 9 | The agent can produce detailed, factual and unbiased research reports, with customization options for focusing on relevant resources, outlines, and lessons. Inspired by [AutoGPT](https://github.com/Significant-Gravitas/Auto-GPT) and the recent [Plan-and-Solve](https://arxiv.org/abs/2305.04091) paper, GPT Researcher addresses issues of speed and determinism, offering a more stable performance and increased speed through parallelized agent work, as opposed to synchronous operations. 10 | 11 | **Our mission is to empower individuals and organizations with accurate, unbiased, and factual information by leveraging the power of AI.** 12 | 13 | ## Why GPT Researcher? 14 | 15 | - To form objective conclusions for manual research tasks can take time, sometimes weeks to find the right resources and information. 16 | - Current LLMs are trained on past and outdated information, with heavy risks of hallucinations, making them almost irrelevant for research tasks. 17 | - Solutions that enable web search (such as ChatGPT + Web Plugin), only consider limited resources that in some cases result in superficial conclusions or biased answers. 18 | - Using only a selection of resources can create bias in determining the right conclusions for research questions or tasks. 19 | 20 | ## Architecture 21 | The main idea is to run "planner" and "execution" agents, whereas the planner generates questions to research, and the execution agents seek the most related information based on each generated research question. Finally, the planner filters and aggregates all related information and creates a research report. The agents leverage both gpt3.5-turbo-16k and gpt-4 to complete a research task. 22 | 23 |

24 | 25 |
26 | 27 | 28 | More specifcally: 29 | * Generate a set of research questions that together form an objective opinion on any given task. 30 | * For each research question, trigger a crawler agent that scrapes online resources for information relevant to the given task. 31 | * For each scraped resources, summarize based on relevant information and keep track of its sources. 32 | * Finally, filter and aggregate all summarized sources and generate a final research report. 33 | 34 | ## Demo 35 | https://github.com/assafelovic/gpt-researcher/assets/13554167/a00c89a6-a295-4dd0-b58d-098a31c40fda 36 | 37 | ## Features 38 | - 📝 Generate research, outlines, resources and lessons reports 39 | - 🌐 Aggregates over 20 web sources per research to form objective and factual conclusions 40 | - 🖥️ Includes an easy-to-use web interface (HTML/CSS/JS) 41 | - 🔍 Scrapes web sources with javascript support 42 | - 📂 Keeps track and context of visited and used web sources 43 | - 📄 Export research reports to PDF and more... 44 | 45 | ## Quickstart 46 | > **Step 0** - Install Python 3.11 or later. [See here](https://www.tutorialsteacher.com/python/install-python) for a step-by-step guide. 47 | 48 |
49 | 50 | > **Step 1** - Download the project 51 | 52 | ```bash 53 | $ git clone https://github.com/assafelovic/gpt-researcher.git 54 | $ cd gpt-researcher 55 | ``` 56 | 57 |
58 | 59 | > **Step 2** - Install dependencies 60 | ```bash 61 | $ pip install -r requirements.txt 62 | ``` 63 |
64 | 65 | > **Step 3** - Create .env file with your OpenAI Key or simply export it 66 | 67 | ```bash 68 | $ export OPENAI_API_KEY={Your API Key here} 69 | ``` 70 |
71 | 72 | > **Step 4** - Run the agent with FastAPI 73 | 74 | ```bash 75 | $ uvicorn main:app --reload 76 | ``` 77 |
78 | 79 | > **Step 5** - Go to http://localhost:8000 on any browser and enjoy researching! 80 | 81 | - **update:** if you are having issues with weasyprint, please visit their website and follow the installation instructions: https://doc.courtbouillon.org/weasyprint/stable/first_steps.html 82 | 83 | ## Try it with Docker 84 | 85 | > **Step 1** - Install Docker 86 | 87 | Follow instructions at https://docs.docker.com/engine/install/ 88 | 89 | > **Step 2** - Create .env file with your OpenAI Key or simply export it 90 | 91 | ```bash 92 | $ export OPENAI_API_KEY={Your API Key here} 93 | ``` 94 | 95 | > **Step 3** - Run the application 96 | 97 | ```bash 98 | $ docker-compose up 99 | ``` 100 | 101 | > **Step 4** - Go to http://localhost:8000 on any browser and enjoy researching! 102 | 103 | - **update:** if you are having issues with weasyprint, please visit their website and follow the installation instructions: https://doc.courtbouillon.org/weasyprint/stable/first_steps.html 104 | 105 | ## 🛡 Disclaimer 106 | 107 | This project, GPT Researcher, is an experimental application and is provided "as-is" without any warranty, express or implied. We are sharing codes for academic purposes under the MIT education license. Nothing herein is academic advice, and NOT a recommendation to use in academic or research papers. 108 | 109 | Our view on unbiased research claims: 110 | 1. The whole point of our scraping system is to reduce incorrect fact. How? The more sites we scrape the less chances of incorrect data. We are scraping 20 per research, the chances that they are all wrong is extremely low. 111 | 2. We do not aim to eliminate biases; we aim to reduce it as much as possible. **We are here as a community to figure out the most effective human/llm interactions.** 112 | 3. In research, people also tend towards biases as most have already opinions on the topics they research about. This tool scrapes many opinions and will evenly explain diverse views that a biased person would never have read. 113 | 114 | **Please note that the use of the GPT-4 language model can be expensive due to its token usage.** By utilizing this project, you acknowledge that you are responsible for monitoring and managing your own token usage and the associated costs. It is highly recommended to check your OpenAI API usage regularly and set up any necessary limits or alerts to prevent unexpected charges. 115 | 116 | ## 🔧 Troubleshooting 117 | We're constantly working to provide a more stable version. In the meantime, see here for known issues: 118 | 119 | **cannot load library 'gobject-2.0-0'** 120 | 121 | The issue relates to the library WeasyPrint (which is used to generate PDFs from the research report). Please follow this guide to resolve it: https://doc.courtbouillon.org/weasyprint/stable/first_steps.html 122 | 123 | **Error processing the url** 124 | 125 | We're using [Selenium](https://www.selenium.dev) for site scraping. Some sites fail to be scraped. In these cases, restart and try running again. 126 | 127 | 128 | 129 | -------------------------------------------------------------------------------- /agent/prompts.py: -------------------------------------------------------------------------------- 1 | def generate_agent_role_prompt(agent): 2 | """ Generates the agent role prompt. 3 | Args: agent (str): The type of the agent. 4 | Returns: str: The agent role prompt. 5 | """ 6 | prompts = { 7 | "Finance Agent": "You are a seasoned finance analyst AI assistant. Your primary goal is to compose comprehensive, astute, impartial, and methodically arranged financial reports based on provided data and trends.", 8 | "Travel Agent": "You are a world-travelled AI tour guide assistant. Your main purpose is to draft engaging, insightful, unbiased, and well-structured travel reports on given locations, including history, attractions, and cultural insights.", 9 | "Academic Research Agent": "You are an AI academic research assistant. Your primary responsibility is to create thorough, academically rigorous, unbiased, and systematically organized reports on a given research topic, following the standards of scholarly work.", 10 | "Business Analyst": "You are an experienced AI business analyst assistant. Your main objective is to produce comprehensive, insightful, impartial, and systematically structured business reports based on provided business data, market trends, and strategic analysis.", 11 | "Computer Security Analyst Agent": "You are an AI specializing in computer security analysis. Your principal duty is to generate comprehensive, meticulously detailed, impartial, and systematically structured reports on computer security topics. This includes Exploits, Techniques, Threat Actors, and Advanced Persistent Threat (APT) Groups. All produced reports should adhere to the highest standards of scholarly work and provide in-depth insights into the complexities of computer security.", 12 | "Default Agent": "You are an AI critical thinker research assistant. Your sole purpose is to write well written, critically acclaimed, objective and structured reports on given text." 13 | } 14 | 15 | return prompts.get(agent, "No such agent") 16 | 17 | 18 | def generate_report_prompt(question, research_summary): 19 | """ Generates the report prompt for the given question and research summary. 20 | Args: question (str): The question to generate the report prompt for 21 | research_summary (str): The research summary to generate the report prompt for 22 | Returns: str: The report prompt for the given question and research summary 23 | """ 24 | 25 | return f'"""{research_summary}""" Using the above information, answer the following'\ 26 | f' question or topic: "{question}" in a detailed report --'\ 27 | " The report should focus on the answer to the question, should be well structured, informative," \ 28 | " in depth, with facts and numbers if available, a minimum of 1,200 words and with markdown syntax and apa format. "\ 29 | "Write all source urls at the end of the report in apa format" 30 | 31 | def generate_search_queries_prompt(question): 32 | """ Generates the search queries prompt for the given question. 33 | Args: question (str): The question to generate the search queries prompt for 34 | Returns: str: The search queries prompt for the given question 35 | """ 36 | 37 | return f'Write 4 google search queries to search online that form an objective opinion from the following: "{question}"'\ 38 | f'You must respond with a list of strings in the following format: ["query 1", "query 2", "query 3", "query 4"]' 39 | 40 | 41 | def generate_resource_report_prompt(question, research_summary): 42 | """Generates the resource report prompt for the given question and research summary. 43 | 44 | Args: 45 | question (str): The question to generate the resource report prompt for. 46 | research_summary (str): The research summary to generate the resource report prompt for. 47 | 48 | Returns: 49 | str: The resource report prompt for the given question and research summary. 50 | """ 51 | return f'"""{research_summary}""" Based on the above information, generate a bibliography recommendation report for the following' \ 52 | f' question or topic: "{question}". The report should provide a detailed analysis of each recommended resource,' \ 53 | ' explaining how each source can contribute to finding answers to the research question.' \ 54 | ' Focus on the relevance, reliability, and significance of each source.' \ 55 | ' Ensure that the report is well-structured, informative, in-depth, and follows Markdown syntax.' \ 56 | ' Include relevant facts, figures, and numbers whenever available.' \ 57 | ' The report should have a minimum length of 1,200 words.' 58 | 59 | 60 | def generate_outline_report_prompt(question, research_summary): 61 | """ Generates the outline report prompt for the given question and research summary. 62 | Args: question (str): The question to generate the outline report prompt for 63 | research_summary (str): The research summary to generate the outline report prompt for 64 | Returns: str: The outline report prompt for the given question and research summary 65 | """ 66 | 67 | return f'"""{research_summary}""" Using the above information, generate an outline for a research report in Markdown syntax'\ 68 | f' for the following question or topic: "{question}". The outline should provide a well-structured framework'\ 69 | ' for the research report, including the main sections, subsections, and key points to be covered.' \ 70 | ' The research report should be detailed, informative, in-depth, and a minimum of 1,200 words.' \ 71 | ' Use appropriate Markdown syntax to format the outline and ensure readability.' 72 | 73 | def generate_concepts_prompt(question, research_summary): 74 | """ Generates the concepts prompt for the given question. 75 | Args: question (str): The question to generate the concepts prompt for 76 | research_summary (str): The research summary to generate the concepts prompt for 77 | Returns: str: The concepts prompt for the given question 78 | """ 79 | 80 | return f'"""{research_summary}""" Using the above information, generate a list of 5 main concepts to learn for a research report'\ 81 | f' on the following question or topic: "{question}". The outline should provide a well-structured framework'\ 82 | 'You must respond with a list of strings in the following format: ["concepts 1", "concepts 2", "concepts 3", "concepts 4, concepts 5"]' 83 | 84 | 85 | def generate_lesson_prompt(concept): 86 | """ 87 | Generates the lesson prompt for the given question. 88 | Args: 89 | concept (str): The concept to generate the lesson prompt for. 90 | Returns: 91 | str: The lesson prompt for the given concept. 92 | """ 93 | 94 | prompt = f'generate a comprehensive lesson about {concept} in Markdown syntax. This should include the definition'\ 95 | f'of {concept}, its historical background and development, its applications or uses in different'\ 96 | f'fields, and notable events or facts related to {concept}.' 97 | 98 | return prompt 99 | 100 | def get_report_by_type(report_type): 101 | report_type_mapping = { 102 | 'research_report': generate_report_prompt, 103 | 'resource_report': generate_resource_report_prompt, 104 | 'outline_report': generate_outline_report_prompt 105 | } 106 | return report_type_mapping[report_type] 107 | 108 | -------------------------------------------------------------------------------- /agent/research_agent.py: -------------------------------------------------------------------------------- 1 | # Description: Research assistant class that handles the research process for a given question. 2 | 3 | # libraries 4 | import asyncio 5 | import json 6 | from actions.web_search import web_search 7 | from actions.web_scrape import async_browse 8 | from processing.text import \ 9 | write_to_file, \ 10 | create_message, \ 11 | create_chat_completion, \ 12 | read_txt_files, \ 13 | write_md_to_pdf 14 | from config import Config 15 | from agent import prompts 16 | import os 17 | import string 18 | 19 | 20 | CFG = Config() 21 | 22 | 23 | class ResearchAgent: 24 | def __init__(self, question, agent, agent_role_prompt, websocket): 25 | """ Initializes the research assistant with the given question. 26 | Args: question (str): The question to research 27 | Returns: None 28 | """ 29 | 30 | self.question = question 31 | self.agent = agent 32 | self.agent_role_prompt = agent_role_prompt 33 | self.visited_urls = set() 34 | self.research_summary = "" 35 | self.directory_name = ''.join(c for c in question if c.isascii() and c not in string.punctuation)[:100] 36 | self.dir_path = os.path.dirname(f"./outputs/{self.directory_name}/") 37 | self.websocket = websocket 38 | 39 | async def summarize(self, text, topic): 40 | """ Summarizes the given text for the given topic. 41 | Args: text (str): The text to summarize 42 | topic (str): The topic to summarize the text for 43 | Returns: str: The summarized text 44 | """ 45 | 46 | messages = [create_message(text, topic)] 47 | await self.websocket.send_json({"type": "logs", "output": f"📝 Summarizing text for query: {text}"}) 48 | 49 | return create_chat_completion( 50 | model=CFG.fast_llm_model, 51 | messages=messages, 52 | ) 53 | 54 | async def get_new_urls(self, url_set_input): 55 | """ Gets the new urls from the given url set. 56 | Args: url_set_input (set[str]): The url set to get the new urls from 57 | Returns: list[str]: The new urls from the given url set 58 | """ 59 | 60 | new_urls = [] 61 | for url in url_set_input: 62 | if url not in self.visited_urls: 63 | await self.websocket.send_json({"type": "logs", "output": f"✅ Adding source url to research: {url}\n"}) 64 | self.visited_urls.add(url) 65 | new_urls.append(url) 66 | 67 | return new_urls 68 | 69 | async def call_agent(self, action, stream=False, websocket=None): 70 | messages = [{ 71 | "role": "system", 72 | "content": self.agent_role_prompt if self.agent_role_prompt else prompts.generate_agent_role_prompt(self.agent) 73 | }, { 74 | "role": "user", 75 | "content": action, 76 | }] 77 | answer = create_chat_completion( 78 | model=CFG.smart_llm_model, 79 | messages=messages, 80 | stream=stream, 81 | websocket=websocket, 82 | ) 83 | return answer 84 | 85 | async def create_search_queries(self): 86 | """ Creates the search queries for the given question. 87 | Args: None 88 | Returns: list[str]: The search queries for the given question 89 | """ 90 | result = await self.call_agent(prompts.generate_search_queries_prompt(self.question)) 91 | print(result) 92 | await self.websocket.send_json({"type": "logs", "output": f"🧠 I will conduct my research based on the following queries: {result}..."}) 93 | return json.loads(result) 94 | 95 | async def async_search(self, query): 96 | """ Runs the async search for the given query. 97 | Args: query (str): The query to run the async search for 98 | Returns: list[str]: The async search for the given query 99 | """ 100 | search_results = json.loads(web_search(query)) 101 | new_search_urls = self.get_new_urls([url.get("href") for url in search_results]) 102 | 103 | await self.websocket.send_json( 104 | {"type": "logs", "output": f"🌐 Browsing the following sites for relevant information: {new_search_urls}..."}) 105 | 106 | # Create a list to hold the coroutine objects 107 | tasks = [async_browse(url, query, self.websocket) for url in await new_search_urls] 108 | 109 | # Gather the results as they become available 110 | responses = await asyncio.gather(*tasks, return_exceptions=True) 111 | 112 | return responses 113 | 114 | async def run_search_summary(self, query): 115 | """ Runs the search summary for the given query. 116 | Args: query (str): The query to run the search summary for 117 | Returns: str: The search summary for the given query 118 | """ 119 | 120 | await self.websocket.send_json({"type": "logs", "output": f"🔎 Running research for '{query}'..."}) 121 | 122 | responses = await self.async_search(query) 123 | 124 | result = "\n".join(responses) 125 | os.makedirs(os.path.dirname(f"./outputs/{self.directory_name}/research-{query}.txt"), exist_ok=True) 126 | write_to_file(f"./outputs/{self.directory_name}/research-{query}.txt", result) 127 | return result 128 | 129 | async def conduct_research(self): 130 | """ Conducts the research for the given question. 131 | Args: None 132 | Returns: str: The research for the given question 133 | """ 134 | try: 135 | #self.research_summary = read_txt_files(self.dir_path) if os.path.isdir(self.dir_path) else "" 136 | 137 | #if not self.research_summary: 138 | search_queries = await self.create_search_queries() 139 | for query in search_queries: 140 | research_result = await self.run_search_summary(query) 141 | self.research_summary += f"{research_result}\n\n" 142 | 143 | await self.websocket.send_json( 144 | {"type": "logs", "output": f"Total research words: {len(self.research_summary.split(' '))}"}) 145 | 146 | return self.research_summary, None 147 | except Exception as e: 148 | return None, e 149 | 150 | 151 | async def create_concepts(self): 152 | """ Creates the concepts for the given question. 153 | Args: None 154 | Returns: list[str]: The concepts for the given question 155 | """ 156 | result = self.call_agent(prompts.generate_concepts_prompt(self.question, self.research_summary)) 157 | 158 | await self.websocket.send_json({"type": "logs", "output": f"I will research based on the following concepts: {result}\n"}) 159 | return json.loads(result) 160 | 161 | async def write_report(self, report_type, websocket): 162 | """ Writes the report for the given question. 163 | Args: None 164 | Returns: str: The report for the given question 165 | """ 166 | report_type_func = prompts.get_report_by_type(report_type) 167 | await websocket.send_json( 168 | {"type": "logs", "output": f"✍️ Writing {report_type} for research task: {self.question}..."}) 169 | answer = await self.call_agent(report_type_func(self.question, self.research_summary), stream=True, 170 | websocket=websocket) 171 | 172 | encoded_path, path = await write_md_to_pdf(report_type, self.directory_name, await answer) 173 | 174 | return answer, encoded_path, path 175 | 176 | async def write_lessons(self): 177 | """ Writes lessons on essential concepts of the research. 178 | Args: None 179 | Returns: None 180 | """ 181 | concepts = await self.create_concepts() 182 | for concept in concepts: 183 | answer = await self.call_agent(prompts.generate_lesson_prompt(concept), stream=True) 184 | write_md_to_pdf("Lesson", self.directory_name, answer) 185 | -------------------------------------------------------------------------------- /agent/llm_utils.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | import json 4 | 5 | from fastapi import WebSocket 6 | import time 7 | 8 | import openai 9 | from colorama import Fore, Style 10 | from openai.error import APIError, RateLimitError 11 | 12 | from config import Config 13 | 14 | CFG = Config() 15 | 16 | #openai.api_key = CFG.openai_api_key 17 | 18 | from typing import Optional 19 | import logging 20 | 21 | 22 | def create_chat_completion( 23 | messages: list, # type: ignore 24 | model: Optional[str] = None, 25 | temperature: float = CFG.temperature, 26 | max_tokens: Optional[int] = None, 27 | stream: Optional[bool] = False, 28 | websocket: WebSocket | None = None, 29 | ) -> str: 30 | """Create a chat completion using the OpenAI API 31 | Args: 32 | messages (list[dict[str, str]]): The messages to send to the chat completion 33 | model (str, optional): The model to use. Defaults to None. 34 | temperature (float, optional): The temperature to use. Defaults to 0.9. 35 | max_tokens (int, optional): The max tokens to use. Defaults to None. 36 | stream (bool, optional): Whether to stream the response. Defaults to False. 37 | Returns: 38 | str: The response from the chat completion 39 | """ 40 | 41 | # validate input 42 | if model is None: 43 | raise ValueError("Model cannot be None") 44 | if max_tokens is not None and max_tokens > 8001: 45 | raise ValueError(f"Max tokens cannot be more than 8001, but got {max_tokens}") 46 | if stream and websocket is None: 47 | raise ValueError("Websocket cannot be None when stream is True") 48 | 49 | # create response 50 | for attempt in range(10): # maximum of 10 attempts 51 | try: 52 | response = send_chat_completion_request( 53 | messages, model, temperature, max_tokens, stream, websocket 54 | ) 55 | return response 56 | except RateLimitError: 57 | logging.warning("Rate limit reached, backing off...") 58 | time.sleep(2 ** (attempt + 2)) # exponential backoff 59 | except APIError as e: 60 | if e.http_status != 502 or attempt == 9: # if not Bad Gateway error or final attempt 61 | raise 62 | logging.error("API Error: Bad gateway, backing off...") 63 | time.sleep(2 ** (attempt + 2)) # exponential backoff 64 | 65 | logging.error("Failed to get response after 10 attempts") 66 | raise RuntimeError("Failed to get response from OpenAI API") 67 | 68 | 69 | def send_chat_completion_request( 70 | messages, model, temperature, max_tokens, stream, websocket 71 | ): 72 | if not stream: 73 | result = openai.ChatCompletion.create( 74 | model=model, 75 | messages=messages, 76 | temperature=temperature, 77 | max_tokens=max_tokens, 78 | ) 79 | return result.choices[0].message["content"] 80 | else: 81 | return stream_response(model, messages, temperature, max_tokens, websocket) 82 | 83 | 84 | async def stream_response(model, messages, temperature, max_tokens, websocket): 85 | paragraph = "" 86 | response = "" 87 | print(f"streaming response...") 88 | 89 | for chunk in openai.ChatCompletion.create( 90 | model=model, 91 | messages=messages, 92 | temperature=temperature, 93 | max_tokens=max_tokens, 94 | stream=True, 95 | ): 96 | content = chunk["choices"][0].get("delta", {}).get("content") 97 | if content is not None: 98 | response += content 99 | paragraph += content 100 | if "\n" in paragraph: 101 | await websocket.send_json({"type": "report", "output": paragraph}) 102 | paragraph = "" 103 | print(f"streaming response complete") 104 | return response 105 | 106 | 107 | def choose_agent(task: str) -> str: 108 | """Determines what agent should be used 109 | Args: 110 | task (str): The research question the user asked 111 | Returns: 112 | agent - The agent that will be used 113 | agent_role_prompt (str): The prompt for the agent 114 | """ 115 | try: 116 | configuration = choose_agent_configuration() 117 | 118 | response = openai.ChatCompletion.create( 119 | model=CFG.smart_llm_model, 120 | messages=[ 121 | {"role": "user", "content": f"{task}"}], 122 | functions=configuration, 123 | temperature=0, 124 | ) 125 | message = response["choices"][0]["message"] 126 | 127 | if message.get("function_call"): 128 | function_name = message["function_call"]["name"] 129 | return {"agent": json.loads(message["function_call"]["arguments"]).get("agent"), 130 | "agent_role_prompt": json.loads(message["function_call"]["arguments"]).get("instructions")} 131 | else: 132 | return {"agent": "Default Agent", 133 | "agent_role_prompt": "You are an AI critical thinker research assistant. Your sole purpose is to write well written, critically acclaimed, objective and structured reports on given text."} 134 | except Exception as e: 135 | print(f"{Fore.RED}Error in choose_agent: {e}{Style.RESET_ALL}") 136 | return {"agent": "Default Agent", 137 | "agent_role_prompt": "You are an AI critical thinker research assistant. Your sole purpose is to write well written, critically acclaimed, objective and structured reports on given text."} 138 | 139 | 140 | def choose_agent_configuration(): 141 | configuration = [ 142 | { 143 | "name": "research", 144 | "description": "Researches the given topic even if it can't be answered", 145 | "parameters": { 146 | "type": "object", 147 | "properties": { 148 | "agent": { 149 | "type": "string", 150 | "description": 151 | """ 152 | Determines the field of the topic and the name of the agent we could use in order to research 153 | about the topic provided. 154 | 155 | Example of agents: 156 | "Business Analyst Agent", "Finance Agent", "Travel Agent", 157 | "Academic Research Agent", "Computer Security Analyst Agent" 158 | 159 | if an agent for the field required doesn't exist make one up 160 | fit an emoji to every agent before the agent name 161 | """, 162 | }, 163 | "instructions": { 164 | "type": "string", 165 | "description": 166 | """ 167 | each provided agent needs instructions in order to start working, 168 | examples for agents and their instructions: 169 | "Finance Agent": "You are a seasoned finance analyst AI assistant. Your primary goal is to compose comprehensive, astute, impartial, and methodically arranged financial reports based on provided data and trends.", 170 | "Travel Agent": "You are a world-travelled AI tour guide assistant. Your main purpose is to draft engaging, insightful, unbiased, and well-structured travel reports on given locations, including history, attractions, and cultural insights.", 171 | "Academic Research Agent": "You are an AI academic research assistant. Your primary responsibility is to create thorough, academically rigorous, unbiased, and systematically organized reports on a given research topic, following the standards of scholarly work.", 172 | "Business Analyst": "You are an experienced AI business analyst assistant. Your main objective is to produce comprehensive, insightful, impartial, and systematically structured business reports based on provided business data, market trends, and strategic analysis.", 173 | "Computer Security Analyst Agent": "You are an AI specializing in computer security analysis. Your principal duty is to generate comprehensive, meticulously detailed, impartial, and systematically structured reports on computer security topics. This includes Exploits, Techniques, Threat Actors, and Advanced Persistent Threat (APT) Groups. All produced reports should adhere to the highest standards of scholarly work and provide in-depth insights into the complexities of computer security.", 174 | 175 | """, 176 | }, 177 | }, 178 | "required": ["agent", "instructions"], 179 | }, 180 | } 181 | ] 182 | return configuration 183 | 184 | 185 | --------------------------------------------------------------------------------