├── processing
    ├── __init__.py
    ├── html.py
    ├── style.css
    └── text.py
├── Procfile
├── client
    ├── static
    │   ├── favicon.ico
    │   ├── mathAgentAvatar.png
    │   ├── travelAgentAvatar.png
    │   ├── defaultAgentAvatar.JPG
    │   ├── financeAgentAvatar.png
    │   ├── academicResearchAgentAvatar.png
    │   ├── businessAnalystAgentAvatar.png
    │   └── computerSecurityanalystAvatar.png
    ├── scripts.js
    ├── styles.css
    └── index.html
├── .platform
    └── hooks
    │   ├── nginx
    │       └── conf.d
    │       │   └── timeout.conf
    │   └── predeploy
    │       ├── 01_chrome.sh
    │       └── 01_weasyprint.sh
├── .ebextensions
    ├── 01_fastapi.config
    └── 02_install_fonts.config
├── config
    ├── __init__.py
    ├── singleton.py
    └── config.py
├── requirements.txt
├── .github
    └── dependabot.yml
├── actions
    ├── web_search.py
    └── web_scrape.py
├── js
    └── overlay.js
├── LICENSE
├── main.py
├── agent
    ├── run.py
    ├── prompts.py
    ├── research_agent.py
    └── llm_utils.py
├── utils
    └── utils.py
└── README.md


/processing/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/Procfile:
--------------------------------------------------------------------------------
1 | web: gunicorn main:app --workers=4 --worker-class=uvicorn.workers.UvicornWorker --timeout 600
2 | 


--------------------------------------------------------------------------------
/client/static/favicon.ico:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rotemweiss57/gpt-researcher/HEAD/client/static/favicon.ico


--------------------------------------------------------------------------------
/client/static/mathAgentAvatar.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rotemweiss57/gpt-researcher/HEAD/client/static/mathAgentAvatar.png


--------------------------------------------------------------------------------
/client/static/travelAgentAvatar.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rotemweiss57/gpt-researcher/HEAD/client/static/travelAgentAvatar.png


--------------------------------------------------------------------------------
/client/static/defaultAgentAvatar.JPG:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rotemweiss57/gpt-researcher/HEAD/client/static/defaultAgentAvatar.JPG


--------------------------------------------------------------------------------
/client/static/financeAgentAvatar.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rotemweiss57/gpt-researcher/HEAD/client/static/financeAgentAvatar.png


--------------------------------------------------------------------------------
/client/static/academicResearchAgentAvatar.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rotemweiss57/gpt-researcher/HEAD/client/static/academicResearchAgentAvatar.png


--------------------------------------------------------------------------------
/client/static/businessAnalystAgentAvatar.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rotemweiss57/gpt-researcher/HEAD/client/static/businessAnalystAgentAvatar.png


--------------------------------------------------------------------------------
/client/static/computerSecurityanalystAvatar.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rotemweiss57/gpt-researcher/HEAD/client/static/computerSecurityanalystAvatar.png


--------------------------------------------------------------------------------
/.platform/hooks/nginx/conf.d/timeout.conf:
--------------------------------------------------------------------------------
1 | proxy_connect_timeout 600s;
2 | proxy_send_timeout 600s;
3 | proxy_read_timeout 600s;
4 | fastcgi_send_timeout 600s;
5 | fastcgi_read_timeout 600s;
6 | 


--------------------------------------------------------------------------------
/.ebextensions/01_fastapi.config:
--------------------------------------------------------------------------------
1 | option_settings:
2 |   aws:elasticbeanstalk:application:environment:
3 |     PYTHONPATH: "/var/app/current:$PYTHONPATH"
4 |   aws:elasticbeanstalk:container:python:
5 |     WSGIPath: "main:app"
6 | 


--------------------------------------------------------------------------------
/config/__init__.py:
--------------------------------------------------------------------------------
 1 | from config.config import Config, check_openai_api_key
 2 | from config.singleton import AbstractSingleton, Singleton
 3 | 
 4 | __all__ = [
 5 |     "check_openai_api_key",
 6 |     "AbstractSingleton",
 7 |     "Config",
 8 |     "Singleton",
 9 | ]
10 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | # dependencies
 2 | asyncio==3.4.3
 3 | beautifulsoup4==4.12.2
 4 | colorama==0.4.6
 5 | duckduckgo_search==3.8.3
 6 | md2pdf==1.0.1
 7 | openai~=0.27.8
 8 | playwright==1.35.0
 9 | python-dotenv~=1.0.0
10 | pyyaml==6.0
11 | selenium
12 | webdriver-manager==3.9.1
13 | boto3
14 | flask
15 | uvicorn
16 | pydantic
17 | fastapi
18 | python-multipart
19 | markdown
20 | pymongo
21 | seleniumbase
22 | 


--------------------------------------------------------------------------------
/.ebextensions/02_install_fonts.config:
--------------------------------------------------------------------------------
1 | container_commands:
2 |   01_download_librebaskerville_font:
3 |     command: wget -P /tmp/ https://github.com/google/fonts/raw/main/ofl/librebaskerville/LibreBaskerville-Regular.ttf
4 |   02_create_fontdir:
5 |     command: sudo mkdir -p /usr/share/fonts/librebaskerville
6 |   03_mv_font:
7 |     command: sudo mv /tmp/LibreBaskerville-Regular.ttf /usr/share/fonts/librebaskerville
8 | 


--------------------------------------------------------------------------------
/.github/dependabot.yml:
--------------------------------------------------------------------------------
 1 | # To get started with Dependabot version updates, you'll need to specify which
 2 | # package ecosystems to update and where the package manifests are located.
 3 | # Please see the documentation for all configuration options:
 4 | # https://docs.github.com/github/administering-a-repository/configuration-options-for-dependency-updates
 5 | 
 6 | version: 2
 7 | updates:
 8 |   - package-ecosystem: "pip" # See documentation for possible values
 9 |     directory: "/" # Location of package manifests
10 |     schedule:
11 |       interval: "weekly"
12 | 


--------------------------------------------------------------------------------
/config/singleton.py:
--------------------------------------------------------------------------------
 1 | """The singleton metaclass for ensuring only one instance of a class."""
 2 | import abc
 3 | 
 4 | 
 5 | class Singleton(abc.ABCMeta, type):
 6 |     """
 7 |     Singleton metaclass for ensuring only one instance of a class.
 8 |     """
 9 | 
10 |     _instances = {}
11 | 
12 |     def __call__(cls, *args, **kwargs):
13 |         """Call method for the singleton metaclass."""
14 |         if cls not in cls._instances:
15 |             cls._instances[cls] = super(Singleton, cls).__call__(*args, **kwargs)
16 |         return cls._instances[cls]
17 | 
18 | 
19 | class AbstractSingleton(abc.ABC, metaclass=Singleton):
20 |     """
21 |     Abstract singleton class for ensuring only one instance of a class.
22 |     """
23 | 
24 |     pass
25 | 


--------------------------------------------------------------------------------
/actions/web_search.py:
--------------------------------------------------------------------------------
 1 | from __future__ import annotations
 2 | import json
 3 | from duckduckgo_search import DDGS
 4 | 
 5 | ddgs = DDGS()
 6 | 
 7 | def web_search(query: str, num_results: int = 5) -> str:
 8 |     """Useful for general internet search queries."""
 9 |     print("Searching with query {0}...".format(query))
10 |     search_results = []
11 |     if not query:
12 |         return json.dumps(search_results)
13 | 
14 |     results = ddgs.text(query)
15 |     if not results:
16 |         return json.dumps(search_results)
17 | 
18 |     total_added = 0
19 |     for j in results:
20 |         search_results.append(j)
21 |         total_added += 1
22 |         if total_added >= num_results:
23 |             break
24 | 
25 |     return json.dumps(search_results, ensure_ascii=False, indent=4)
26 | 


--------------------------------------------------------------------------------
/js/overlay.js:
--------------------------------------------------------------------------------
 1 | const overlay = document.createElement('div');
 2 | Object.assign(overlay.style, {
 3 |     position: 'fixed',
 4 |     zIndex: 999999,
 5 |     top: 0,
 6 |     left: 0,
 7 |     width: '100%',
 8 |     height: '100%',
 9 |     background: 'rgba(0, 0, 0, 0.7)',
10 |     color: '#fff',
11 |     fontSize: '24px',
12 |     fontWeight: 'bold',
13 |     display: 'flex',
14 |     justifyContent: 'center',
15 |     alignItems: 'center',
16 | });
17 | const textContent = document.createElement('div');
18 | Object.assign(textContent.style, {
19 |     textAlign: 'center',
20 | });
21 | textContent.textContent = 'Tavily AI: Analyzing Page';
22 | overlay.appendChild(textContent);
23 | document.body.append(overlay);
24 | document.body.style.overflow = 'hidden';
25 | let dotCount = 0;
26 | setInterval(() => {
27 |     textContent.textContent = 'Tavily AI: Analyzing Page' + '.'.repeat(dotCount);
28 |     dotCount = (dotCount + 1) % 4;
29 | }, 1000);
30 | 


--------------------------------------------------------------------------------
/.platform/hooks/predeploy/01_chrome.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # Check if Google Chrome is already installed
 4 | if ! command -v google-chrome &> /dev/null
 5 | then
 6 |     # Install Google Chrome
 7 |     curl -sS https://intoli.com/install-google-chrome.sh | bash
 8 |     if [ $? -ne 0 ]; then
 9 |         echo "Failed to install Google Chrome"
10 |         exit 1
11 |     fi
12 |     mv -f /usr/bin/google-chrome-stable /usr/bin/google-chrome
13 |     if [ $? -ne 0 ]; then
14 |         echo "Failed to move Google Chrome executable"
15 |         exit 1
16 |     fi
17 | else
18 |     echo "Google Chrome is already installed"
19 | fi
20 | 
21 | # Print the version and location
22 | google-chrome --version && which google-chrome
23 | 
24 | # Check if the temporary file exists before trying to remove it
25 | if [ -f "chromedriver_linux64.zip" ]; then
26 |     rm chromedriver_linux64.zip
27 | fi
28 | 
29 | echo "Google Chrome installation script completed successfully."
30 | 


--------------------------------------------------------------------------------
/processing/html.py:
--------------------------------------------------------------------------------
 1 | """HTML processing functions"""
 2 | from __future__ import annotations
 3 | 
 4 | from bs4 import BeautifulSoup
 5 | from requests.compat import urljoin
 6 | 
 7 | 
 8 | def extract_hyperlinks(soup: BeautifulSoup, base_url: str) -> list[tuple[str, str]]:
 9 |     """Extract hyperlinks from a BeautifulSoup object
10 | 
11 |     Args:
12 |         soup (BeautifulSoup): The BeautifulSoup object
13 |         base_url (str): The base URL
14 | 
15 |     Returns:
16 |         List[Tuple[str, str]]: The extracted hyperlinks
17 |     """
18 |     return [
19 |         (link.text, urljoin(base_url, link["href"]))
20 |         for link in soup.find_all("a", href=True)
21 |     ]
22 | 
23 | 
24 | def format_hyperlinks(hyperlinks: list[tuple[str, str]]) -> list[str]:
25 |     """Format hyperlinks to be displayed to the user
26 | 
27 |     Args:
28 |         hyperlinks (List[Tuple[str, str]]): The hyperlinks to format
29 | 
30 |     Returns:
31 |         List[str]: The formatted hyperlinks
32 |     """
33 |     return [f"{link_text} ({link_url})" for link_text, link_url in hyperlinks]
34 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2023 Assaf Elovic
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/processing/style.css:
--------------------------------------------------------------------------------
 1 | body {
 2 |     font-family: 'Libre Baskerville', serif;
 3 |     font-size: 12pt; /* standard size for academic papers */
 4 |     line-height: 1.6; /* for readability */
 5 |     color: #333; /* softer on the eyes than black */
 6 |     background-color: #fff; /* white background */
 7 |     margin: 0;
 8 |     padding: 0;
 9 | }
10 | 
11 | h1, h2, h3, h4, h5, h6 {
12 |     font-family: 'Libre Baskerville', serif;
13 |     color: #000; /* darker than the body text */
14 |     margin-top: 1em; /* space above headers */
15 | }
16 | 
17 | h1 {
18 |     font-size: 1.8em;
19 | }
20 | 
21 | h2 {
22 |     font-size: 1.4em;
23 | }
24 | 
25 | /* Add some space between paragraphs */
26 | p {
27 |     margin-bottom: 1em;
28 | }
29 | 
30 | /* Style for blockquotes, often used in academic papers */
31 | blockquote {
32 |     font-style: italic;
33 |     margin: 1em 0;
34 |     padding: 1em;
35 |     background-color: #f9f9f9; /* a light grey background */
36 | }
37 | 
38 | /* You might want to style tables, figures, etc. too */
39 | table {
40 |     border-collapse: collapse;
41 |     width: 100%;
42 | }
43 | 
44 | table, th, td {
45 |     border: 1px solid #ddd;
46 |     text-align: left;
47 |     padding: 8px;
48 | }
49 | 
50 | th {
51 |     background-color: #f2f2f2;
52 |     color: black;
53 | }
54 | 


--------------------------------------------------------------------------------
/.platform/hooks/predeploy/01_weasyprint.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | declare -a packages=("libxml2-devel" "libxslt-devel" "python-devel" "redhat-rpm-config" "libffi-devel" "cairo" "pango")
 4 | 
 5 | for package in "${packages[@]}"; do
 6 |     if ! rpm -q $package; then
 7 |         yum install -y $package
 8 |     fi
 9 | done
10 | 
11 | export PKG_CONFIG_PATH=/usr/lib64/pkgconfig:/usr/lib/pkgconfig
12 | export PATH=/usr/bin:$PATH
13 | export LDFLAGS=-L/usr/lib64:/usr/lib
14 | export LD_LIBRARY_PATH=/usr/lib64:/usr/lib
15 | export CPPFLAGS=-I/usr/include
16 | 
17 | sudo yum-config-manager --enable epel
18 | 
19 | sudo yum update -y
20 | 
21 | declare -a packages2=("gcc" "gcc-c++" "glib2-devel" "libxml2-devel" "libpng-devel" "libjpeg-turbo-devel" "gobject-introspection" "gobject-introspection-devel")
22 | 
23 | for package in "${packages2[@]}"; do
24 |     if ! rpm -q $package; then
25 |         yum install -y $package
26 |     fi
27 | done
28 | 
29 | if [ ! -f /usr/lib/libcroco-0.6.8/libcroco.la ]; then
30 |     wget http://ftp.gnome.org/pub/GNOME/sources/libcroco/0.6/libcroco-0.6.8.tar.xz
31 |     tar xvfJ libcroco-0.6.8.tar.xz
32 |     cd libcroco-0.6.8
33 |     ./configure --prefix=/usr
34 |     make
35 |     sudo make install
36 |     cd ..
37 | fi
38 | 
39 | if [ ! -f /usr/lib/gdk-pixbuf-2.0/2.10.0/loaders/libpixbufloader-svg.so ]; then
40 |     wget http://ftp.gnome.org/pub/GNOME/sources/gdk-pixbuf/2.28/gdk-pixbuf-2.28.2.tar.xz
41 |     tar xvfJ gdk-pixbuf-2.28.2.tar.xz
42 |     cd gdk-pixbuf-2.28.2
43 |     ./configure --prefix=/usr --without-libtiff
44 |     make
45 |     sudo make install
46 |     cd ..
47 | fi
48 | 
49 | if [ ! -f /usr/lib/pkgconfig/fontconfig.pc ]; then
50 |     wget http://www.freedesktop.org/software/fontconfig/release/fontconfig-2.13.93.tar.gz
51 |     tar xvf fontconfig-2.13.93.tar.gz
52 |     cd fontconfig-2.13.93
53 |     ./configure --prefix=/usr --enable-libxml2
54 |     make
55 |     sudo make install
56 |     cd ..
57 | fi
58 | 
59 | if [ ! -f /usr/lib/libcairo.so ]; then
60 |     wget http://cairographics.org/releases/cairo-1.16.0.tar.xz
61 |     tar xvfJ cairo-1.16.0.tar.xz
62 |     cd cairo-1.16.0
63 |     ./configure --prefix=/usr
64 |     make
65 |     sudo make install
66 |     cd ..
67 | fi
68 | 
69 | if [ ! -f /usr/lib/libpango-1.0.so ]; then
70 |     wget http://ftp.gnome.org/pub/GNOME/sources/pango/1.48/pango-1.48.4.tar.xz
71 |     tar xvfJ pango-1.48.4.tar.xz
72 |     cd pango-1.48.4
73 |     ./configure --prefix=/usr
74 |     make
75 |     sudo make install
76 |     cd ..
77 | fi
78 | 
79 | if [ ! -f /usr/lib/librsvg-2.so ]; then
80 |     wget http://ftp.gnome.org/pub/GNOME/sources/librsvg/2.40/librsvg-2.40.6.tar.xz
81 |     tar xvfJ librsvg-2.40.6.tar.xz
82 |     cd librsvg-2.40.6
83 |     ./configure --prefix=/usr
84 |     make
85 |     sudo make install
86 |     cd ..
87 | fi
88 | 
89 | sudo ldconfig /usr/lib
90 | 
91 | 
92 | 
93 | 


--------------------------------------------------------------------------------
/main.py:
--------------------------------------------------------------------------------
 1 | import time
 2 | 
 3 | import openai
 4 | from fastapi import FastAPI, Request, WebSocket, WebSocketDisconnect
 5 | from fastapi.staticfiles import StaticFiles
 6 | from fastapi.templating import Jinja2Templates
 7 | from pydantic import BaseModel
 8 | import json
 9 | import os
10 | 
11 | from agent.llm_utils import choose_agent
12 | from agent.run import WebSocketManager
13 | 
14 | 
15 | class ResearchRequest(BaseModel):
16 |     task: str
17 |     report_type: str
18 |     agent: str
19 | 
20 | 
21 | 
22 | app = FastAPI()
23 | print("Starting server...")
24 | app.mount("/site", StaticFiles(directory="client"), name="site")
25 | app.mount("/static", StaticFiles(directory="client/static"), name="static")
26 | # Dynamic directory for outputs once first research is run
27 | @app.on_event("startup")
28 | def startup_event():
29 |     if not os.path.isdir("outputs"):
30 |         os.makedirs("outputs")
31 |     app.mount("/outputs", StaticFiles(directory="outputs"), name="outputs")
32 | 
33 | templates = Jinja2Templates(directory="client")
34 | 
35 | manager = WebSocketManager()
36 | 
37 | 
38 | @app.get("/")
39 | async def read_root(request: Request):
40 |     return templates.TemplateResponse('index.html', {"request": request, "report": None})
41 | 
42 | 
43 | @app.websocket("/ws")
44 | async def websocket_endpoint(websocket: WebSocket):
45 |     await manager.connect(websocket)
46 |     print("Client connected")  # New log
47 |     try:
48 |         while True:
49 |             data = await websocket.receive_text()
50 |             if data.startswith("start"):
51 |                 json_data = json.loads(data[6:])
52 |                 task = json_data.get("task")
53 |                 report_type = json_data.get("report_type")
54 |                 agent = json_data.get("agent")
55 |                 api_key = json_data.get("api_key")
56 |                 openai.api_key = api_key
57 |                 # temporary so "normal agents" can still be used and not just auto generated, will be removed when we move to auto generated
58 |                 if agent == "Auto Agent":
59 |                     agent_dict = choose_agent(task)
60 |                     agent = agent_dict.get("agent")
61 |                     agent_role_prompt = agent_dict.get("agent_role_prompt")
62 |                 else:
63 |                     agent_role_prompt = None
64 | 
65 |                 await websocket.send_json({"type": "logs", "output": f"Initiated an Agent: {agent}"})
66 |                 if task and report_type and agent:
67 |                     print("check")
68 |                     await manager.start_streaming(task, report_type, agent, websocket, agent_role_prompt, api_key)
69 |                 else:
70 |                     print("Error: not enough parameters provided.")
71 | 
72 |     except WebSocketDisconnect:
73 |         await manager.disconnect(websocket)
74 | 
75 | if __name__ == "__main__":
76 |     import uvicorn
77 | 
78 |     uvicorn.run(app, host="0.0.0.0", port=8000)


--------------------------------------------------------------------------------
/config/config.py:
--------------------------------------------------------------------------------
 1 | """Configuration class to store the state of bools for different scripts access."""
 2 | import os
 3 | 
 4 | import openai
 5 | from colorama import Fore
 6 | from dotenv import load_dotenv
 7 | 
 8 | from config.singleton import Singleton
 9 | 
10 | load_dotenv(verbose=True)
11 | 
12 | 
13 | class Config(metaclass=Singleton):
14 |     """
15 |     Configuration class to store the state of bools for different scripts access.
16 |     """
17 | 
18 |     def __init__(self) -> None:
19 |         """Initialize the Config class"""
20 |         self.debug_mode = False
21 |         self.allow_downloads = False
22 | 
23 |         self.selenium_web_browser = os.getenv("USE_WEB_BROWSER", "chrome")
24 |         self.fast_llm_model = os.getenv("FAST_LLM_MODEL", "gpt-3.5-turbo-16k")
25 |         self.smart_llm_model = os.getenv("SMART_LLM_MODEL", "gpt-4")
26 |         self.fast_token_limit = int(os.getenv("FAST_TOKEN_LIMIT", 4000))
27 |         self.smart_token_limit = int(os.getenv("SMART_TOKEN_LIMIT", 8000))
28 |         self.browse_chunk_max_length = int(os.getenv("BROWSE_CHUNK_MAX_LENGTH", 8192))
29 | 
30 |         #self.openai_api_key = os.getenv("OPENAI_API_KEY")
31 |         self.temperature = float(os.getenv("TEMPERATURE", "1"))
32 | 
33 |         self.user_agent = os.getenv(
34 |             "USER_AGENT",
35 |             "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_4) AppleWebKit/537.36"
36 |             " (KHTML, like Gecko) Chrome/83.0.4103.97 Safari/537.36",
37 |         )
38 | 
39 |         self.memory_backend = os.getenv("MEMORY_BACKEND", "local")
40 |         # Initialize the OpenAI API client
41 |         #openai.api_key = self.openai_api_key
42 | 
43 |     def set_fast_llm_model(self, value: str) -> None:
44 |         """Set the fast LLM model value."""
45 |         self.fast_llm_model = value
46 | 
47 |     def set_smart_llm_model(self, value: str) -> None:
48 |         """Set the smart LLM model value."""
49 |         self.smart_llm_model = value
50 | 
51 |     def set_fast_token_limit(self, value: int) -> None:
52 |         """Set the fast token limit value."""
53 |         self.fast_token_limit = value
54 | 
55 |     def set_smart_token_limit(self, value: int) -> None:
56 |         """Set the smart token limit value."""
57 |         self.smart_token_limit = value
58 | 
59 |     def set_browse_chunk_max_length(self, value: int) -> None:
60 |         """Set the browse_website command chunk max length value."""
61 |         self.browse_chunk_max_length = value
62 | 
63 |     def set_openai_api_key(self, value: str) -> None:
64 |         """Set the OpenAI API key value."""
65 |         self.openai_api_key = value
66 | 
67 |     def set_debug_mode(self, value: bool) -> None:
68 |         """Set the debug mode value."""
69 |         self.debug_mode = value
70 | 
71 | 
72 | def check_openai_api_key() -> None:
73 |     """Check if the OpenAI API key is set in config.py or as an environment variable."""
74 |     cfg = Config()
75 |     if not cfg.openai_api_key:
76 |         return False
77 |     else:
78 |         return True
79 | 


--------------------------------------------------------------------------------
/agent/run.py:
--------------------------------------------------------------------------------
 1 | import asyncio
 2 | import datetime
 3 | import uuid
 4 | 
 5 | import openai
 6 | from typing import List, Dict
 7 | from fastapi import WebSocket
 8 | from utils.utils import *
 9 | # from config import check_openai_api_key
10 | from agent.research_agent import ResearchAgent
11 | 
12 | 
13 | class WebSocketManager:
14 |     def __init__(self):
15 |         self.active_connections: List[WebSocket] = []
16 |         self.sender_tasks: Dict[WebSocket, asyncio.Task] = {}
17 |         self.message_queues: Dict[WebSocket, asyncio.Queue] = {}
18 | 
19 |     async def start_sender(self, websocket: WebSocket):
20 |         queue = self.message_queues[websocket]
21 |         while True:
22 |             message = await queue.get()
23 |             if websocket in self.active_connections:
24 |                 await websocket.send_text(message)
25 |             else:
26 |                 break
27 | 
28 |     async def connect(self, websocket: WebSocket):
29 |         await websocket.accept()
30 |         self.active_connections.append(websocket)
31 |         self.message_queues[websocket] = asyncio.Queue()
32 |         self.sender_tasks[websocket] = asyncio.create_task(self.start_sender(websocket))
33 | 
34 |     async def disconnect(self, websocket: WebSocket):
35 |         self.active_connections.remove(websocket)
36 |         self.sender_tasks[websocket].cancel()
37 |         del self.sender_tasks[websocket]
38 |         del self.message_queues[websocket]
39 | 
40 |     async def start_streaming(self, task, report_type, agent, websocket,agent_role_prompt, api_key):
41 |         report, path = await run_agent(task, report_type, agent, websocket,agent_role_prompt, api_key)
42 |         return report, path
43 | 
44 | 
45 | async def run_agent(task, report_type, agent, websocket,agent_role_prompt, api_key):
46 |     openai.api_key = api_key
47 |     start_time = datetime.now()
48 |     print(f"Start time: {start_time}")
49 |     document_id = query2db(task, agent, report_type, start_time)
50 |     assistant = ResearchAgent(task, agent, agent_role_prompt, websocket)
51 |     result, error = await assistant.conduct_research()
52 |     if result == "Error":
53 |         await websocket.send_json({"type": "logs", "output": error})
54 |         end_time = datetime.now()
55 |         total_time = end_time - start_time
56 |         update_query(document_id=document_id, status="failed", end_time=end_time, total_time=total_time,error=error)
57 | 
58 |         return None, None
59 |     report, encoded_path, path = await assistant.write_report(report_type, websocket)
60 |     await websocket.send_json({"type": "path", "output": encoded_path})
61 | 
62 |     end_time = datetime.now()
63 |     total_time = end_time - start_time
64 |     await websocket.send_json({"type": "logs", "output": f"\nEnd time: {end_time}\n"})
65 |     await websocket.send_json({"type": "logs", "output": f"\nTotal run time: {total_time}\n"})
66 | 
67 |     file_name = str(uuid.uuid4()) + '.pdf'
68 | 
69 |     url = upload_to_s3(path, "tavily-reports", file_name)
70 |     update_query(document_id=document_id, path=url, status="finished", end_time=end_time, total_time=total_time,)
71 | 
72 |     return report, path
73 | 
74 | 


--------------------------------------------------------------------------------
/client/scripts.js:
--------------------------------------------------------------------------------
 1 | const GPTResearcher = (() => {
 2 |     const startResearch = () => {
 3 |       document.getElementById("output").innerHTML = "";
 4 |       document.getElementById("reportContainer").innerHTML = "";
 5 | 
 6 |       addAgentResponse({ output: "🤔 Thinking about research questions for the task..." });
 7 | 
 8 |       listenToSockEvents();
 9 |     };
10 | 
11 |     const listenToSockEvents = () => {
12 |       const { protocol, host, pathname } = window.location;
13 |       const ws_uri = 'wss://app.tavily.com/ws'
14 |         //const ws_uri = `${protocol === 'https:' ? 'wss:' : 'ws:'}//${host}${pathname}ws`;
15 |       const converter = new showdown.Converter();
16 |       const socket = new WebSocket(ws_uri);
17 | 
18 |       socket.onmessage = (event) => {
19 |         const data = JSON.parse(event.data);
20 |         if (data.type === 'logs') {
21 |           addAgentResponse(data);
22 |         } else if (data.type === 'report') {
23 |           writeReport(data, converter);
24 |         } else if (data.type === 'path') {
25 |           updateDownloadLink(data);
26 |         }
27 |       };
28 | 
29 |       socket.onopen = (event) => {
30 |         const task = document.querySelector('input[name="task"]').value;
31 |         const report_type = document.querySelector('select[name="report_type"]').value;
32 |         const api_key = document.querySelector('input[name="api_key"]').value; // Get the API key from the input field
33 |         const agent = document.querySelector('input[name="agent"]:checked').value;
34 | 
35 |         const requestData = {
36 |           task: task,
37 |           report_type: report_type,
38 |             api_key: api_key,
39 |           agent: agent,
40 |         };
41 | 
42 |         socket.send(`start ${JSON.stringify(requestData)}`);
43 |       };
44 |     };
45 | 
46 |     const addAgentResponse = (data) => {
47 |       const output = document.getElementById("output");
48 |       output.innerHTML += '<div class="agent_response">' + data.output + '</div>';
49 |       output.scrollTop = output.scrollHeight;
50 |       output.style.display = "block";
51 |       updateScroll();
52 |     };
53 | 
54 |     const writeReport = (data, converter) => {
55 |       const reportContainer = document.getElementById("reportContainer");
56 |       const markdownOutput = converter.makeHtml(data.output);
57 |       reportContainer.innerHTML += markdownOutput;
58 |       updateScroll();
59 |     };
60 | 
61 |     const updateDownloadLink = (data) => {
62 |       const path = data.output;
63 |       const downloadLink = document.getElementById("downloadLink");
64 |       downloadLink.href = path;
65 |     };
66 | 
67 |     const updateScroll = () => {
68 |       window.scrollTo(0, document.body.scrollHeight);
69 |     };
70 | 
71 |     const copyToClipboard = () => {
72 |       const textarea = document.createElement('textarea');
73 |       textarea.id = 'temp_element';
74 |       textarea.style.height = 0;
75 |       document.body.appendChild(textarea);
76 |       textarea.value = document.getElementById('reportContainer').innerText;
77 |       const selector = document.querySelector('#temp_element');
78 |       selector.select();
79 |       document.execCommand('copy');
80 |       document.body.removeChild(textarea);
81 |     };
82 | 
83 |     return {
84 |       startResearch,
85 |       copyToClipboard,
86 |     };
87 |   })();
88 | 


--------------------------------------------------------------------------------
/client/styles.css:
--------------------------------------------------------------------------------
  1 | @keyframes gradientBG {
  2 |     0% {background-position: 0% 50%;}
  3 |     50% {background-position: 100% 50%;}
  4 |     100% {background-position: 0% 50%;}
  5 | }
  6 | 
  7 | body {
  8 |     font-family: 'Montserrat', sans-serif;
  9 |     color: #fff;
 10 |     line-height: 1.6;
 11 |     background-size: 200% 200%;
 12 |     background-image: linear-gradient(45deg, #151A2D, #2D284D, #151A2D);
 13 |     animation: gradientBG 10s ease infinite;
 14 | }
 15 | 
 16 | .landing {
 17 |     display: flex;
 18 |     justify-content: center;
 19 |     align-items: center;
 20 |     height: 100vh;
 21 |     text-align: center;
 22 | }
 23 | 
 24 | .landing h1 {
 25 |     font-size: 3.5rem;
 26 |     font-weight: 700;
 27 |     margin-bottom: 2rem;
 28 | }
 29 | 
 30 | .landing p {
 31 |     font-size: 1.5rem;
 32 |     font-weight: 400;
 33 |     max-width: 500px;
 34 |     margin: auto;
 35 |     margin-bottom: 2rem;
 36 | }
 37 | 
 38 | .container {
 39 |     max-width: 900px;
 40 |     margin: auto;
 41 |     padding: 20px;
 42 |     background-color: rgba(255, 255, 255, 0.1);
 43 |     border-radius: 12px;
 44 |     box-shadow: 0px 10px 25px rgba(0, 0, 0, 0.1);
 45 |     transition: all .3s ease-in-out;
 46 |     margin-bottom: 180px;
 47 | }
 48 | 
 49 | .container:hover {
 50 |     transform: scale(1.01);
 51 |     box-shadow: 0px 15px 30px rgba(0, 0, 0, 0.2);
 52 | }
 53 | 
 54 | input, select, #output, #reportContainer {
 55 |     background-color: rgba(255,255,255,0.1);
 56 |     border: none;
 57 |     color: #fff;
 58 |     transition: all .3s ease-in-out;
 59 | }
 60 | 
 61 | input:hover, input:focus, select:hover, select:focus {
 62 |     background-color: #dfe4ea;
 63 |     border: 1px solid rgba(255, 255, 255, 0.5);
 64 |     box-shadow: 0px 4px 8px rgba(0, 0, 0, 0.1);
 65 |     transition: all 0.3s ease-in-out;
 66 | }
 67 | 
 68 | .btn-primary {
 69 |     background: linear-gradient(to right, #0062cc, #007bff);
 70 |     border: none;
 71 |     transition: all .3s ease-in-out;
 72 | }
 73 | 
 74 | .btn-secondary {
 75 |     background: linear-gradient(to right, #6c757d, #6c757d);
 76 |     border: none;
 77 |     transition: all .3s ease-in-out;
 78 | }
 79 | 
 80 | .btn:hover {
 81 |     opacity: 0.8;
 82 |     transform: scale(1.1);
 83 |     box-shadow: 0px 10px 20px rgba(0, 0, 0, 0.3);
 84 | }
 85 | 
 86 | .agent_question {
 87 |     font-size: 1.2rem;
 88 |     font-weight: 500;
 89 |     margin-bottom: 0.5rem;
 90 | }
 91 | 
 92 | footer {
 93 |     position: fixed;
 94 |     left: 0;
 95 |     bottom: 0;
 96 |     width: 100%;
 97 |     background: linear-gradient(to right, #151A2D, #111827);
 98 |     color: white;
 99 |     text-align: center;
100 |     padding: 10px 0;
101 | }
102 | .margin-div {
103 |     margin-top: 20px;
104 |     margin-bottom: 20px;
105 |     padding: 10px;
106 | }
107 | 
108 | .agent_response {
109 |     background-color: #747d8c;
110 |     margin: 10px;
111 |     padding: 10px;
112 |     border-radius: 12px;
113 | }
114 | 
115 |  #output {
116 |     height: 300px;
117 |     overflow: auto;
118 |     padding: 10px;
119 |     margin-bottom: 10px;
120 |     margin-top: 10px;
121 | }
122 | #reportContainer {
123 |     background-color: rgba(255,255,255,0.1);
124 |     border: none;
125 |     color: #fff;
126 |     transition: all .3s ease-in-out;
127 |     padding: 10px;
128 |     border-radius: 12px;
129 | 
130 | }


--------------------------------------------------------------------------------
/utils/utils.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | from pymongo import MongoClient
  3 | from datetime import datetime, timedelta
  4 | import boto3
  5 | from botocore.exceptions import NoCredentialsError
  6 | 
  7 | 
  8 | def query2db(query, agent, report_type, start_time):
  9 |     # Connect to the MongoDB server
 10 |     usr = os.getenv('MONGO_USER')
 11 |     pwd = os.getenv('MONGO_PWD')
 12 |     client = MongoClient(f"mongodb+srv://{usr}:{pwd}@cluster0.47o7jxs.mongodb.net/?retryWrites=true&w=majority")
 13 | 
 14 |     # Connect to the "Tavily" database
 15 |     db = client['Tavily']
 16 | 
 17 |     # Get the "ResearchQueries" collection
 18 |     collection = db['ResearchQueries']
 19 | 
 20 |     # Create the document to be inserted
 21 |     document = {
 22 |         'query': query,
 23 |         'agent': agent,
 24 |         'report_type': report_type,
 25 |         'start_time': start_time,
 26 |         'end_time': None,
 27 |         'total_time': None,
 28 |         'report_path': None,
 29 |         'status': 'started',
 30 |         'created': datetime.now()
 31 |     }
 32 | 
 33 |     # Insert the document into the collection
 34 |     result = collection.insert_one(document)
 35 | 
 36 |     # Get the _id of the inserted document
 37 |     document_id = result.inserted_id
 38 | 
 39 |     # Close the connection
 40 |     client.close()
 41 | 
 42 |     # Return the _id of the inserted document
 43 |     return document_id
 44 | 
 45 | 
 46 | 
 47 | def update_query(document_id, path=None ,status=None, end_time=None, total_time=None, error=None):
 48 |     # Convert total_time to seconds
 49 |     total_time_seconds = total_time.total_seconds()
 50 | 
 51 |     # Connect to the MongoDB server
 52 |     usr = os.getenv('MONGO_USER')
 53 |     pwd = os.getenv('MONGO_PWD')
 54 |     client = MongoClient(f"mongodb+srv://{usr}:{pwd}@cluster0.47o7jxs.mongodb.net/?retryWrites=true&w=majority")
 55 | 
 56 |     # Connect to the "Tavily" database
 57 |     db = client['Tavily']
 58 | 
 59 |     # Get the "ResearchQueries" collection
 60 |     collection = db['ResearchQueries']
 61 | 
 62 |     # Create the update query
 63 |     update_query = {
 64 |         '$set': {
 65 |             'end_time': end_time,
 66 |             'total_time': total_time_seconds,
 67 |             'report_path': path,
 68 |             'status': status,
 69 |             'error': error
 70 | 
 71 |         }
 72 |     }
 73 | 
 74 |     # Update the document in the collection
 75 |     collection.update_one({'_id': document_id}, update_query)
 76 | 
 77 |     # Close the connection
 78 |     client.close()
 79 | 
 80 | 
 81 | 
 82 | # Initialize the S3 client
 83 | s3 = boto3.client('s3')
 84 | 
 85 | 
 86 | def upload_to_s3(file_path, bucket, file_name):
 87 |     try:
 88 |         # Upload the file
 89 |         s3.upload_file(
 90 |             Filename=file_path,
 91 |             Bucket=bucket,
 92 |             Key=file_name,
 93 |             ExtraArgs={
 94 |                 'ACL': 'public-read',  # this will make the file publicly readable
 95 |                 'ContentType': 'application/pdf'
 96 |             }
 97 |         )
 98 | 
 99 |         print("Upload Successful")
100 |         return f"https://{bucket}.s3.amazonaws.com/{file_name}"
101 | 
102 |     except FileNotFoundError:
103 |         print("The file was not found")
104 |         return None
105 |     except NoCredentialsError:
106 |         print("Credentials not available")
107 |         return None
108 | 
109 | 


--------------------------------------------------------------------------------
/processing/text.py:
--------------------------------------------------------------------------------
  1 | """Text processing functions"""
  2 | import urllib
  3 | from typing import Dict, Generator, Optional
  4 | import string
  5 | 
  6 | from selenium.webdriver.remote.webdriver import WebDriver
  7 | 
  8 | from config import Config
  9 | from agent.llm_utils import create_chat_completion
 10 | import os
 11 | from md2pdf.core import md2pdf
 12 | 
 13 | CFG = Config()
 14 | 
 15 | 
 16 | def split_text(text: str, max_length: int = 8192) -> Generator[str, None, None]:
 17 |     """Split text into chunks of a maximum length
 18 | 
 19 |     Args:
 20 |         text (str): The text to split
 21 |         max_length (int, optional): The maximum length of each chunk. Defaults to 8192.
 22 | 
 23 |     Yields:
 24 |         str: The next chunk of text
 25 | 
 26 |     Raises:
 27 |         ValueError: If the text is longer than the maximum length
 28 |     """
 29 |     paragraphs = text.split("\n")
 30 |     current_length = 0
 31 |     current_chunk = []
 32 | 
 33 |     for paragraph in paragraphs:
 34 |         if current_length + len(paragraph) + 1 <= max_length:
 35 |             current_chunk.append(paragraph)
 36 |             current_length += len(paragraph) + 1
 37 |         else:
 38 |             yield "\n".join(current_chunk)
 39 |             current_chunk = [paragraph]
 40 |             current_length = len(paragraph) + 1
 41 | 
 42 |     if current_chunk:
 43 |         yield "\n".join(current_chunk)
 44 | 
 45 | 
 46 | def summarize_text(
 47 |     url: str, text: str, question: str, driver: Optional[WebDriver] = None
 48 | ) -> str:
 49 |     """Summarize text using the OpenAI API
 50 | 
 51 |     Args:
 52 |         url (str): The url of the text
 53 |         text (str): The text to summarize
 54 |         question (str): The question to ask the model
 55 |         driver (WebDriver): The webdriver to use to scroll the page
 56 | 
 57 |     Returns:
 58 |         str: The summary of the text
 59 |     """
 60 |     if not text:
 61 |         return "Error: No text to summarize"
 62 | 
 63 |     summaries = []
 64 |     chunks = list(split_text(text))
 65 |     scroll_ratio = 1 / len(chunks)
 66 | 
 67 |     for i, chunk in enumerate(chunks):
 68 |         if driver:
 69 |             scroll_to_percentage(driver, scroll_ratio * i)
 70 | 
 71 |         memory_to_add = f"Source: {url}\n" f"Raw content part#{i + 1}: {chunk}"
 72 | 
 73 |         #MEMORY.add_documents([Document(page_content=memory_to_add)])
 74 | 
 75 |         messages = [create_message(chunk, question)]
 76 | 
 77 |         summary = create_chat_completion(
 78 |             model=CFG.fast_llm_model,
 79 |             messages=messages,
 80 |         )
 81 |         summaries.append(summary)
 82 |         memory_to_add = f"Source: {url}\n" f"Content summary part#{i + 1}: {summary}"
 83 | 
 84 |         #MEMORY.add_documents([Document(page_content=memory_to_add)])
 85 | 
 86 | 
 87 |     combined_summary = "\n".join(summaries)
 88 |     messages = [create_message(combined_summary, question)]
 89 | 
 90 |     return create_chat_completion(
 91 |         model=CFG.fast_llm_model,
 92 |         messages=messages,
 93 |     )
 94 | 
 95 | 
 96 | def scroll_to_percentage(driver: WebDriver, ratio: float) -> None:
 97 |     """Scroll to a percentage of the page
 98 | 
 99 |     Args:
100 |         driver (WebDriver): The webdriver to use
101 |         ratio (float): The percentage to scroll to
102 | 
103 |     Raises:
104 |         ValueError: If the ratio is not between 0 and 1
105 |     """
106 |     if ratio < 0 or ratio > 1:
107 |         raise ValueError("Percentage should be between 0 and 1")
108 |     driver.execute_script(f"window.scrollTo(0, document.body.scrollHeight * {ratio});")
109 | 
110 | 
111 | def create_message(chunk: str, question: str) -> Dict[str, str]:
112 |     """Create a message for the chat completion
113 | 
114 |     Args:
115 |         chunk (str): The chunk of text to summarize
116 |         question (str): The question to answer
117 | 
118 |     Returns:
119 |         Dict[str, str]: The message to send to the chat completion
120 |     """
121 |     return {
122 |         "role": "user",
123 |         "content": f'"""{chunk}""" Using the above text, answer the following'
124 |         f' question: "{question}" -- if the question cannot be answered using the text,'
125 |         " simply summarize the text in depth. "
126 |         "Include all factual information, numbers, stats etc if available.",
127 |     }
128 | 
129 | def write_to_file(filename: str, text: str) -> None:
130 |     """Write text to a file
131 | 
132 |     Args:
133 |         text (str): The text to write
134 |         filename (str): The filename to write to
135 |     """
136 |     with open(filename, "w") as file:
137 |         file.write(text)
138 | 
139 | async def write_md_to_pdf(task: str, directory_name: str, text: str) -> None:
140 |     file_path = f"./outputs/{directory_name}/{task}"
141 |     write_to_file(f"{file_path}.md", text)
142 |     md_to_pdf(f"{file_path}.md", f"{file_path}.pdf")
143 |     print(f"{task} written to {file_path}.pdf")
144 | 
145 |     encoded_file_path = urllib.parse.quote(f"{file_path}.pdf")
146 | 
147 |     return encoded_file_path, f"{file_path}.pdf"
148 | 
149 | def read_txt_files(directory):
150 |     all_text = ''
151 | 
152 |     for filename in os.listdir(directory):
153 |         if filename.endswith('.txt'):
154 |             with open(os.path.join(directory, filename), 'r') as file:
155 |                 all_text += file.read() + '\n'
156 | 
157 |     return all_text
158 | 
159 | 
160 | def md_to_pdf(input_file, output_file):
161 |     md2pdf(output_file,
162 |            md_content=None,
163 |            md_file_path=input_file,
164 |            css_file_path="./processing/style.css",
165 |            base_url=None)
166 | 


--------------------------------------------------------------------------------
/client/index.html:
--------------------------------------------------------------------------------
  1 | <!DOCTYPE html>
  2 | <html lang="en">
  3 | 
  4 | <head>
  5 |     <title>GPT Researcher</title>
  6 |     <meta name="description" content="A research assistant powered by GPT-4">
  7 |     <meta name="viewport" content="width=device-width, initial-scale=1">
  8 |     <link rel="icon" href="./static/favicon.ico">
  9 |     <link rel="preconnect" href="https://fonts.googleapis.com">
 10 |     <link rel="preconnect" href="https://fonts.gstatic.com" crossorigin>
 11 |     <link href="https://fonts.googleapis.com/css2?family=Montserrat:wght@400;700&display=swap" rel="stylesheet">
 12 |     <link href="https://stackpath.bootstrapcdn.com/bootstrap/4.5.2/css/bootstrap.min.css" rel="stylesheet">
 13 |     <link rel="stylesheet" href="/site/styles.css"/>
 14 |     <style>
 15 |         .avatar {
 16 |             width: 60px;
 17 |             height: 60px;
 18 |             border-radius: 50%;
 19 |         }
 20 | 
 21 |         .agent-name {
 22 |             text-align: center;
 23 |         }
 24 | 
 25 |         .agent-item {
 26 |             display: flex;
 27 |             flex-direction: column;
 28 |             align-items: center;
 29 |         }
 30 | 
 31 |         .agent-choices {
 32 |             display: none;
 33 |         }
 34 | 
 35 |         .btn-show {
 36 |             display: none;
 37 |         }
 38 |     </style>
 39 | </head>
 40 | 
 41 | <body>
 42 | 
 43 | <section class="landing">
 44 |     <div class="max-w-5xl mx-auto text-center">
 45 |         <h1 class="text-4xl font-extrabold mx-auto lg:text-7xl">
 46 |             Say Goodbye to <br>
 47 |             <span
 48 |                     style="background-image:linear-gradient(to right, #9867F0, #ED4E50); -webkit-background-clip: text; -webkit-text-fill-color: transparent;">Hours
 49 |                     of Research</span>
 50 |         </h1>
 51 |         <p class="max-w-5xl mx-auto text-gray-600 mt-8" style="font-size:20px">
 52 |             Say Hello to GPT Researcher, your AI mate for rapid insights and comprehensive research. GPT Researcher
 53 |             takes care of everything from accurate source gathering to organization of research results - all in one
 54 |             platform designed to make your research process a breeze.
 55 |         </p>
 56 |         <a href="#form" class="btn btn-primary">Get Started</a>
 57 |     </div>
 58 | </section>
 59 | 
 60 | <main class="container" id="form">
 61 |     <div class="agent-item"><img src="/static/defaultAgentAvatar.JPG" class="avatar"
 62 |                                                 alt="Auto Agent"></div>
 63 |     <form method="POST" class="mt-3" onsubmit="GPTResearcher.startResearch(); return false;">
 64 |         <div class="form-group">
 65 |             <label for="api_key">OpenAI API Key (must Have GPT-4 access):</label>
 66 |             <input type="password" name="api_key" class="form-control" required>
 67 |         </div>
 68 |         <div class="form-group">
 69 |             <label for="task" class="agent-question">What would you like me to research next?</label>
 70 |             <input type="text" id="task" name="task" class="form-control" required>
 71 |             <input type="radio" name="agent" id="autoAgent" value="Auto Agent" checked hidden>
 72 |         </div>
 73 |         <div class="form-group">
 74 |             <div class="row">
 75 | 
 76 | 
 77 |             </div>
 78 |             <button type="button" id="btnShowAuto" class="btn btn-secondary mt-3 btn-show">Auto Agent</button>
 79 |         </div>
 80 |         <div class="form-group">
 81 |             <label for="report_type" class="agent-question">What type of report would you like me to generate?</label>
 82 |             <select name="report_type" class="form-control" required>
 83 |                 <option value="research_report">Research Report</option>
 84 |                 <option value="resource_report">Resource Report</option>
 85 |                 <option value="outline_report">Outline Report</option>
 86 |             </select>
 87 |         </div>
 88 |         <input type="submit" value="Research" class="btn btn-primary button-padding">
 89 |     </form>
 90 | 
 91 |     <div class="margin-div">
 92 |         <h2>Agent Output</h2>
 93 |         <p class="mt-2 text-left" style="font-size: 0.8rem;">An agent tailored specifically to your task
 94 |                         will be generated to provide the most precise and relevant research results.</p>
 95 |         <div id="output"></div>
 96 |     </div>
 97 |     <div class="margin-div">
 98 |         <h2>Research Report</h2>
 99 |         <div id="reportContainer"></div>
100 |         <button onclick="copyToClipboard()" class="btn btn-secondary mt-3">Copy to clipboard</button>
101 |         <a id="downloadLink" href="#" class="btn btn-secondary mt-3" target="_blank">Download as PDF</a>
102 |     </div>
103 | </main>
104 | 
105 | <footer>
106 |     <p>GPT Researcher &copy; 2023 | <a target="_blank" href="https://github.com/assafelovic/gpt-researcher">GitHub
107 |         Page</a></p>
108 | </footer>
109 | 
110 | <script src="https://cdnjs.cloudflare.com/ajax/libs/showdown/1.9.1/showdown.min.js"></script>
111 | <script src="/site/scripts.js"></script>
112 | <script>
113 |     const btnChoose = document.getElementById('btnChoose');
114 |     const btnShowAuto = document.getElementById('btnShowAuto');
115 |     const autoAgentDiv = document.getElementById('autoAgentDiv');
116 |     const agentChoices = document.getElementsByClassName('agent-choices');
117 | 
118 |     btnChoose.addEventListener('click', function () {
119 |         btnShowAuto.style.display = 'inline-block';
120 |         btnChoose.style.display = 'none';
121 |         autoAgentDiv.style.display = 'none';
122 |         agentChoices[0].style.display = 'flex';
123 |     });
124 | 
125 |     btnShowAuto.addEventListener('click', function () {
126 |         btnShowAuto.style.display = 'none';
127 |         btnChoose.style.display = 'inline-block';
128 |         autoAgentDiv.style.display = 'flex';
129 |         agentChoices[0].style.display = 'none';
130 |     });
131 | </script>
132 | </body>
133 | 
134 | </html>
135 | 


--------------------------------------------------------------------------------
/actions/web_scrape.py:
--------------------------------------------------------------------------------
  1 | """Selenium web scraping module."""
  2 | from __future__ import annotations
  3 | 
  4 | import logging
  5 | import asyncio
  6 | from pathlib import Path
  7 | from sys import platform
  8 | 
  9 | from bs4 import BeautifulSoup
 10 | from webdriver_manager.chrome import ChromeDriverManager
 11 | from webdriver_manager.firefox import GeckoDriverManager
 12 | from selenium import webdriver
 13 | from selenium.webdriver.chrome.service import Service
 14 | from selenium.webdriver.chrome.options import Options as ChromeOptions
 15 | from selenium.webdriver.common.by import By
 16 | from selenium.webdriver.firefox.options import Options as FirefoxOptions
 17 | from selenium.webdriver.remote.webdriver import WebDriver
 18 | from selenium.webdriver.safari.options import Options as SafariOptions
 19 | from selenium.webdriver.support import expected_conditions as EC
 20 | from selenium.webdriver.support.wait import WebDriverWait
 21 | from fastapi import WebSocket
 22 | 
 23 | import processing.text as summary
 24 | 
 25 | from config import Config
 26 | from processing.html import extract_hyperlinks, format_hyperlinks
 27 | 
 28 | from concurrent.futures import ThreadPoolExecutor
 29 | 
 30 | executor = ThreadPoolExecutor()
 31 | 
 32 | FILE_DIR = Path(__file__).parent.parent
 33 | CFG = Config()
 34 | 
 35 | 
 36 | async def async_browse(url: str, question: str, websocket: WebSocket) -> str:
 37 |     """Browse a website and return the answer and links to the user
 38 | 
 39 |     Args:
 40 |         url (str): The url of the website to browse
 41 |         question (str): The question asked by the user
 42 |         websocket (WebSocketManager): The websocket manager
 43 | 
 44 |     Returns:
 45 |         str: The answer and links to the user
 46 |     """
 47 |     loop = asyncio.get_event_loop()
 48 |     executor = ThreadPoolExecutor(max_workers=8)
 49 | 
 50 |     print(f"Scraping url {url} with question {question}")
 51 |     await websocket.send_json(
 52 |         {"type": "logs", "output": f"🔎 Browsing the {url} for relevant about: {question}..."})
 53 | 
 54 |     try:
 55 |         driver, text = await loop.run_in_executor(executor, scrape_text_with_selenium, url)
 56 |         await loop.run_in_executor(executor, add_header, driver)
 57 |         summary_text = await loop.run_in_executor(executor, summary.summarize_text, url, text, question, driver)
 58 | 
 59 |         await websocket.send_json(
 60 |             {"type": "logs", "output": f"📝 Information gathered from url {url}: {summary_text}"})
 61 | 
 62 |         return f"Information gathered from url {url}: {summary_text}"
 63 |     except Exception as e:
 64 |         print(f"An error occurred while processing the url {url}: {e}")
 65 |         return f"Error processing the url {url}: {e}"
 66 | 
 67 | 
 68 | 
 69 | def browse_website(url: str, question: str) -> tuple[str, WebDriver]:
 70 |     """Browse a website and return the answer and links to the user
 71 | 
 72 |     Args:
 73 |         url (str): The url of the website to browse
 74 |         question (str): The question asked by the user
 75 | 
 76 |     Returns:
 77 |         Tuple[str, WebDriver]: The answer and links to the user and the webdriver
 78 |     """
 79 | 
 80 |     if not url:
 81 |         return "A URL was not specified, cancelling request to browse website.", None
 82 | 
 83 |     driver, text = scrape_text_with_selenium(url)
 84 |     add_header(driver)
 85 |     summary_text = summary.summarize_text(url, text, question, driver)
 86 | 
 87 |     links = scrape_links_with_selenium(driver, url)
 88 | 
 89 |     # Limit links to 5
 90 |     if len(links) > 5:
 91 |         links = links[:5]
 92 | 
 93 |     # write_to_file('research-{0}.txt'.format(url), summary_text + "\nSource Links: {0}\n\n".format(links))
 94 | 
 95 |     close_browser(driver)
 96 |     return f"Answer gathered from website: {summary_text} \n \n Links: {links}", driver
 97 | 
 98 | 
 99 | def scrape_text_with_selenium(url: str) -> tuple[WebDriver, str]:
100 |     """Scrape text from a website using selenium
101 | 
102 |     Args:
103 |         url (str): The url of the website to scrape
104 | 
105 |     Returns:
106 |         Tuple[WebDriver, str]: The webdriver and the text scraped from the website
107 |     """
108 |     logging.getLogger("selenium").setLevel(logging.CRITICAL)
109 | 
110 |     options = ChromeOptions()
111 |     options.add_argument("--headless")
112 |     options.add_argument("--no-sandbox")
113 |     options.add_argument("--disable-dev-shm-usage")  # Overcomes limited resource problems
114 |     options.add_argument(f'user-agent={CFG.user_agent}')
115 |     options.add_experimental_option("prefs", {"download_restrictions": 3})
116 | 
117 |     driver = webdriver.Chrome(options=options)
118 |     driver.get(url)
119 | 
120 |     WebDriverWait(driver, 10).until(
121 |         EC.presence_of_element_located((By.TAG_NAME, "body"))
122 |     )
123 | 
124 |     # Get the HTML content directly from the browser's DOM
125 |     page_source = driver.execute_script("return document.body.outerHTML;")
126 |     soup = BeautifulSoup(page_source, "html.parser")
127 | 
128 |     for script in soup(["script", "style"]):
129 |         script.extract()
130 | 
131 |     # text = soup.get_text()
132 |     text = get_text(soup)
133 | 
134 |     lines = (line.strip() for line in text.splitlines())
135 |     chunks = (phrase.strip() for line in lines for phrase in line.split("  "))
136 |     text = "\n".join(chunk for chunk in chunks if chunk)
137 |     return driver, text
138 | 
139 | 
140 | def get_text(soup):
141 |     """Get the text from the soup
142 | 
143 |     Args:
144 |         soup (BeautifulSoup): The soup to get the text from
145 | 
146 |     Returns:
147 |         str: The text from the soup
148 |     """
149 |     text = ""
150 |     tags = ['h1', 'h2', 'h3', 'h4', 'h5', 'p']
151 |     for element in soup.find_all(tags):  # Find all the <p> elements
152 |         text += element.text + "\n\n"
153 |     return text
154 | 
155 | 
156 | def scrape_links_with_selenium(driver: WebDriver, url: str) -> list[str]:
157 |     """Scrape links from a website using selenium
158 | 
159 |     Args:
160 |         driver (WebDriver): The webdriver to use to scrape the links
161 | 
162 |     Returns:
163 |         List[str]: The links scraped from the website
164 |     """
165 |     page_source = driver.page_source
166 |     soup = BeautifulSoup(page_source, "html.parser")
167 | 
168 |     for script in soup(["script", "style"]):
169 |         script.extract()
170 | 
171 |     hyperlinks = extract_hyperlinks(soup, url)
172 | 
173 |     return format_hyperlinks(hyperlinks)
174 | 
175 | 
176 | def close_browser(driver: WebDriver) -> None:
177 |     """Close the browser
178 | 
179 |     Args:
180 |         driver (WebDriver): The webdriver to close
181 | 
182 |     Returns:
183 |         None
184 |     """
185 |     driver.quit()
186 | 
187 | 
188 | def add_header(driver: WebDriver) -> None:
189 |     """Add a header to the website
190 | 
191 |     Args:
192 |         driver (WebDriver): The webdriver to use to add the header
193 | 
194 |     Returns:
195 |         None
196 |     """
197 |     driver.execute_script(open(f"{FILE_DIR}/js/overlay.js", "r").read())


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # 🔎 GPT Researcher
  2 | [![Official Website](https://img.shields.io/badge/Official%20Website-tavily.com-blue?style=flat&logo=world&logoColor=white)](https://tavily.com)
  3 | [![Discord Follow](https://dcbadge.vercel.app/api/server/rqw8dnM8?style=flat)](https://discord.com/invite/rqw8dnM8)
  4 | [![GitHub Repo stars](https://img.shields.io/github/stars/assafelovic/gpt-researcher?style=social)](https://github.com/assafelovic/gpt-researcher)
  5 | [![Twitter Follow](https://img.shields.io/twitter/follow/assaf_elovic?style=social)](https://twitter.com/assaf_elovic)
  6 | 
  7 | **GPT Researcher is an autonomous agent designed for comprehensive online research on a variety of tasks.** 
  8 | 
  9 | The agent can produce detailed, factual and unbiased research reports, with customization options for focusing on relevant resources, outlines, and lessons. Inspired by [AutoGPT](https://github.com/Significant-Gravitas/Auto-GPT) and the recent [Plan-and-Solve](https://arxiv.org/abs/2305.04091) paper, GPT Researcher addresses issues of speed and determinism, offering a more stable performance and increased speed through parallelized agent work, as opposed to synchronous operations.
 10 | 
 11 | **Our mission is to empower individuals and organizations with accurate, unbiased, and factual information by leveraging the power of AI.**
 12 | 
 13 | ## Why GPT Researcher?
 14 | 
 15 | - To form objective conclusions for manual research tasks can take time, sometimes weeks to find the right resources and information.
 16 | - Current LLMs are trained on past and outdated information, with heavy risks of hallucinations, making them almost irrelevant for research tasks.
 17 | - Solutions that enable web search (such as ChatGPT + Web Plugin), only consider limited resources that in some cases result in superficial conclusions or biased answers.
 18 | - Using only a selection of resources can create bias in determining the right conclusions for research questions or tasks. 
 19 | 
 20 | ## Architecture
 21 | The main idea is to run "planner" and "execution" agents, whereas the planner generates questions to research, and the execution agents seek the most related information based on each generated research question. Finally, the planner filters and aggregates all related information and creates a research report. The agents leverage both gpt3.5-turbo-16k and gpt-4 to complete a research task.
 22 | 
 23 | <div align="center">
 24 | <img align="center" height="500" src="https://cowriter-images.s3.amazonaws.com/arch.png">
 25 | </div>
 26 | 
 27 | 
 28 | More specifcally:
 29 | * Generate a set of research questions that together form an objective opinion on any given task. 
 30 | * For each research question, trigger a crawler agent that scrapes online resources for information relevant to the given task.
 31 | * For each scraped resources, summarize based on relevant information and keep track of its sources.
 32 | * Finally, filter and aggregate all summarized sources and generate a final research report.
 33 | 
 34 | ## Demo
 35 | https://github.com/assafelovic/gpt-researcher/assets/13554167/a00c89a6-a295-4dd0-b58d-098a31c40fda
 36 | 
 37 | ## Features
 38 | - 📝 Generate research, outlines, resources and lessons reports
 39 | - 🌐 Aggregates over 20 web sources per research to form objective and factual conclusions
 40 | - 🖥️ Includes an easy-to-use web interface (HTML/CSS/JS)
 41 | - 🔍 Scrapes web sources with javascript support
 42 | - 📂 Keeps track and context of visited and used web sources
 43 | - 📄 Export research reports to PDF and more...
 44 | 
 45 | ## Quickstart
 46 | > **Step 0** - Install Python 3.11 or later. [See here](https://www.tutorialsteacher.com/python/install-python) for a step-by-step guide.
 47 | 
 48 | <br />
 49 | 
 50 | > **Step 1** - Download the project
 51 | 
 52 | ```bash
 53 | $ git clone https://github.com/assafelovic/gpt-researcher.git
 54 | $ cd gpt-researcher
 55 | ```
 56 | 
 57 | <br />
 58 | 
 59 | > **Step 2** - Install dependencies
 60 | ```bash
 61 | $ pip install -r requirements.txt
 62 | ```
 63 | <br />
 64 | 
 65 | > **Step 3** - Create .env file with your OpenAI Key or simply export it
 66 | 
 67 | ```bash
 68 | $ export OPENAI_API_KEY={Your API Key here}
 69 | ```
 70 | <br />
 71 | 
 72 | > **Step 4** - Run the agent with FastAPI
 73 | 
 74 | ```bash
 75 | $ uvicorn main:app --reload
 76 | ```
 77 | <br />
 78 | 
 79 | > **Step 5** - Go to http://localhost:8000 on any browser and enjoy researching!
 80 | 
 81 | - **update:** if you are having issues with weasyprint, please visit their website and follow the installation instructions: https://doc.courtbouillon.org/weasyprint/stable/first_steps.html
 82 | 
 83 | ## Try it with Docker
 84 | 
 85 | > **Step 1** - Install Docker
 86 | 
 87 | Follow instructions at https://docs.docker.com/engine/install/
 88 | 
 89 | > **Step 2** - Create .env file with your OpenAI Key or simply export it
 90 | 
 91 | ```bash
 92 | $ export OPENAI_API_KEY={Your API Key here}
 93 | ```
 94 | 
 95 | > **Step 3** - Run the application
 96 | 
 97 | ```bash
 98 | $ docker-compose up
 99 | ```
100 | 
101 | > **Step 4** - Go to http://localhost:8000 on any browser and enjoy researching!
102 | 
103 | - **update:** if you are having issues with weasyprint, please visit their website and follow the installation instructions: https://doc.courtbouillon.org/weasyprint/stable/first_steps.html
104 | 
105 | ## 🛡 Disclaimer
106 | 
107 | This project, GPT Researcher, is an experimental application and is provided "as-is" without any warranty, express or implied. We are sharing codes for academic purposes under the MIT education license. Nothing herein is academic advice, and NOT a recommendation to use in academic or research papers.
108 | 
109 | Our view on unbiased research claims:
110 | 1. The whole point of our scraping system is to reduce incorrect fact. How? The more sites we scrape the less chances of incorrect data. We are scraping 20 per research, the chances that they are all wrong is extremely low.
111 | 2. We do not aim to eliminate biases; we aim to reduce it as much as possible. **We are here as a community to figure out the most effective human/llm interactions.**
112 | 3. In research, people also tend towards biases as most have already opinions on the topics they research about. This tool scrapes many opinions and will evenly explain diverse views that a biased person would never have read.
113 | 
114 | **Please note that the use of the GPT-4 language model can be expensive due to its token usage.** By utilizing this project, you acknowledge that you are responsible for monitoring and managing your own token usage and the associated costs. It is highly recommended to check your OpenAI API usage regularly and set up any necessary limits or alerts to prevent unexpected charges.
115 | 
116 | ## 🔧 Troubleshooting
117 | We're constantly working to provide a more stable version. In the meantime, see here for known issues:
118 | 
119 | **cannot load library 'gobject-2.0-0'**
120 | 
121 | The issue relates to the library WeasyPrint (which is used to generate PDFs from the research report). Please follow this guide to resolve it: https://doc.courtbouillon.org/weasyprint/stable/first_steps.html
122 | 
123 | **Error processing the url**
124 | 
125 | We're using [Selenium](https://www.selenium.dev) for site scraping. Some sites fail to be scraped. In these cases, restart and try running again.
126 | 
127 | 
128 | 
129 | 


--------------------------------------------------------------------------------
/agent/prompts.py:
--------------------------------------------------------------------------------
  1 | def generate_agent_role_prompt(agent):
  2 |     """ Generates the agent role prompt.
  3 |     Args: agent (str): The type of the agent.
  4 |     Returns: str: The agent role prompt.
  5 |     """
  6 |     prompts = {
  7 |         "Finance Agent": "You are a seasoned finance analyst AI assistant. Your primary goal is to compose comprehensive, astute, impartial, and methodically arranged financial reports based on provided data and trends.",
  8 |         "Travel Agent": "You are a world-travelled AI tour guide assistant. Your main purpose is to draft engaging, insightful, unbiased, and well-structured travel reports on given locations, including history, attractions, and cultural insights.",
  9 |         "Academic Research Agent": "You are an AI academic research assistant. Your primary responsibility is to create thorough, academically rigorous, unbiased, and systematically organized reports on a given research topic, following the standards of scholarly work.",
 10 |         "Business Analyst": "You are an experienced AI business analyst assistant. Your main objective is to produce comprehensive, insightful, impartial, and systematically structured business reports based on provided business data, market trends, and strategic analysis.",
 11 |         "Computer Security Analyst Agent": "You are an AI specializing in computer security analysis. Your principal duty is to generate comprehensive, meticulously detailed, impartial, and systematically structured reports on computer security topics. This includes Exploits, Techniques, Threat Actors, and Advanced Persistent Threat (APT) Groups. All produced reports should adhere to the highest standards of scholarly work and provide in-depth insights into the complexities of computer security.",
 12 |         "Default Agent": "You are an AI critical thinker research assistant. Your sole purpose is to write well written, critically acclaimed, objective and structured reports on given text."
 13 |     }
 14 | 
 15 |     return prompts.get(agent, "No such agent")
 16 | 
 17 | 
 18 | def generate_report_prompt(question, research_summary):
 19 |     """ Generates the report prompt for the given question and research summary.
 20 |     Args: question (str): The question to generate the report prompt for
 21 |             research_summary (str): The research summary to generate the report prompt for
 22 |     Returns: str: The report prompt for the given question and research summary
 23 |     """
 24 | 
 25 |     return f'"""{research_summary}""" Using the above information, answer the following'\
 26 |            f' question or topic: "{question}" in a detailed report --'\
 27 |            " The report should focus on the answer to the question, should be well structured, informative," \
 28 |            " in depth, with facts and numbers if available, a minimum of 1,200 words and with markdown syntax and apa format. "\
 29 |             "Write all source urls at the end of the report in apa format"
 30 | 
 31 | def generate_search_queries_prompt(question):
 32 |     """ Generates the search queries prompt for the given question.
 33 |     Args: question (str): The question to generate the search queries prompt for
 34 |     Returns: str: The search queries prompt for the given question
 35 |     """
 36 | 
 37 |     return f'Write 4 google search queries to search online that form an objective opinion from the following: "{question}"'\
 38 |            f'You must respond with a list of strings in the following format: ["query 1", "query 2", "query 3", "query 4"]'
 39 | 
 40 | 
 41 | def generate_resource_report_prompt(question, research_summary):
 42 |     """Generates the resource report prompt for the given question and research summary.
 43 | 
 44 |     Args:
 45 |         question (str): The question to generate the resource report prompt for.
 46 |         research_summary (str): The research summary to generate the resource report prompt for.
 47 | 
 48 |     Returns:
 49 |         str: The resource report prompt for the given question and research summary.
 50 |     """
 51 |     return f'"""{research_summary}""" Based on the above information, generate a bibliography recommendation report for the following' \
 52 |            f' question or topic: "{question}". The report should provide a detailed analysis of each recommended resource,' \
 53 |            ' explaining how each source can contribute to finding answers to the research question.' \
 54 |            ' Focus on the relevance, reliability, and significance of each source.' \
 55 |            ' Ensure that the report is well-structured, informative, in-depth, and follows Markdown syntax.' \
 56 |            ' Include relevant facts, figures, and numbers whenever available.' \
 57 |            ' The report should have a minimum length of 1,200 words.'
 58 | 
 59 | 
 60 | def generate_outline_report_prompt(question, research_summary):
 61 |     """ Generates the outline report prompt for the given question and research summary.
 62 |     Args: question (str): The question to generate the outline report prompt for
 63 |             research_summary (str): The research summary to generate the outline report prompt for
 64 |     Returns: str: The outline report prompt for the given question and research summary
 65 |     """
 66 | 
 67 |     return f'"""{research_summary}""" Using the above information, generate an outline for a research report in Markdown syntax'\
 68 |            f' for the following question or topic: "{question}". The outline should provide a well-structured framework'\
 69 |            ' for the research report, including the main sections, subsections, and key points to be covered.' \
 70 |            ' The research report should be detailed, informative, in-depth, and a minimum of 1,200 words.' \
 71 |            ' Use appropriate Markdown syntax to format the outline and ensure readability.'
 72 | 
 73 | def generate_concepts_prompt(question, research_summary):
 74 |     """ Generates the concepts prompt for the given question.
 75 |     Args: question (str): The question to generate the concepts prompt for
 76 |             research_summary (str): The research summary to generate the concepts prompt for
 77 |     Returns: str: The concepts prompt for the given question
 78 |     """
 79 | 
 80 |     return f'"""{research_summary}""" Using the above information, generate a list of 5 main concepts to learn for a research report'\
 81 |            f' on the following question or topic: "{question}". The outline should provide a well-structured framework'\
 82 |            'You must respond with a list of strings in the following format: ["concepts 1", "concepts 2", "concepts 3", "concepts 4, concepts 5"]'
 83 | 
 84 | 
 85 | def generate_lesson_prompt(concept):
 86 |     """
 87 |     Generates the lesson prompt for the given question.
 88 |     Args:
 89 |         concept (str): The concept to generate the lesson prompt for.
 90 |     Returns:
 91 |         str: The lesson prompt for the given concept.
 92 |     """
 93 | 
 94 |     prompt = f'generate a comprehensive lesson about {concept} in Markdown syntax. This should include the definition'\
 95 |     f'of {concept}, its historical background and development, its applications or uses in different'\
 96 |     f'fields, and notable events or facts related to {concept}.'
 97 | 
 98 |     return prompt
 99 | 
100 | def get_report_by_type(report_type):
101 |     report_type_mapping = {
102 |         'research_report': generate_report_prompt,
103 |         'resource_report': generate_resource_report_prompt,
104 |         'outline_report': generate_outline_report_prompt
105 |     }
106 |     return report_type_mapping[report_type]
107 | 
108 | 


--------------------------------------------------------------------------------
/agent/research_agent.py:
--------------------------------------------------------------------------------
  1 | # Description: Research assistant class that handles the research process for a given question.
  2 | 
  3 | # libraries
  4 | import asyncio
  5 | import json
  6 | from actions.web_search import web_search
  7 | from actions.web_scrape import async_browse
  8 | from processing.text import \
  9 |     write_to_file, \
 10 |     create_message, \
 11 |     create_chat_completion, \
 12 |     read_txt_files, \
 13 |     write_md_to_pdf
 14 | from config import Config
 15 | from agent import prompts
 16 | import os
 17 | import string
 18 | 
 19 | 
 20 | CFG = Config()
 21 | 
 22 | 
 23 | class ResearchAgent:
 24 |     def __init__(self, question, agent, agent_role_prompt, websocket):
 25 |         """ Initializes the research assistant with the given question.
 26 |         Args: question (str): The question to research
 27 |         Returns: None
 28 |         """
 29 | 
 30 |         self.question = question
 31 |         self.agent = agent
 32 |         self.agent_role_prompt = agent_role_prompt
 33 |         self.visited_urls = set()
 34 |         self.research_summary = ""
 35 |         self.directory_name = ''.join(c for c in question if c.isascii() and c not in string.punctuation)[:100]
 36 |         self.dir_path = os.path.dirname(f"./outputs/{self.directory_name}/")
 37 |         self.websocket = websocket
 38 | 
 39 |     async def summarize(self, text, topic):
 40 |         """ Summarizes the given text for the given topic.
 41 |         Args: text (str): The text to summarize
 42 |                 topic (str): The topic to summarize the text for
 43 |         Returns: str: The summarized text
 44 |         """
 45 | 
 46 |         messages = [create_message(text, topic)]
 47 |         await self.websocket.send_json({"type": "logs", "output": f"📝 Summarizing text for query: {text}"})
 48 | 
 49 |         return create_chat_completion(
 50 |             model=CFG.fast_llm_model,
 51 |             messages=messages,
 52 |         )
 53 | 
 54 |     async def get_new_urls(self, url_set_input):
 55 |         """ Gets the new urls from the given url set.
 56 |         Args: url_set_input (set[str]): The url set to get the new urls from
 57 |         Returns: list[str]: The new urls from the given url set
 58 |         """
 59 | 
 60 |         new_urls = []
 61 |         for url in url_set_input:
 62 |             if url not in self.visited_urls:
 63 |                 await self.websocket.send_json({"type": "logs", "output": f"✅ Adding source url to research: {url}\n"})
 64 |                 self.visited_urls.add(url)
 65 |                 new_urls.append(url)
 66 | 
 67 |         return new_urls
 68 | 
 69 |     async def call_agent(self, action, stream=False, websocket=None):
 70 |         messages = [{
 71 |             "role": "system",
 72 |             "content": self.agent_role_prompt if self.agent_role_prompt else prompts.generate_agent_role_prompt(self.agent)
 73 |         }, {
 74 |             "role": "user",
 75 |             "content": action,
 76 |         }]
 77 |         answer = create_chat_completion(
 78 |             model=CFG.smart_llm_model,
 79 |             messages=messages,
 80 |             stream=stream,
 81 |             websocket=websocket,
 82 |         )
 83 |         return answer
 84 | 
 85 |     async def create_search_queries(self):
 86 |         """ Creates the search queries for the given question.
 87 |         Args: None
 88 |         Returns: list[str]: The search queries for the given question
 89 |         """
 90 |         result = await self.call_agent(prompts.generate_search_queries_prompt(self.question))
 91 |         print(result)
 92 |         await self.websocket.send_json({"type": "logs", "output": f"🧠 I will conduct my research based on the following queries: {result}..."})
 93 |         return json.loads(result)
 94 | 
 95 |     async def async_search(self, query):
 96 |         """ Runs the async search for the given query.
 97 |         Args: query (str): The query to run the async search for
 98 |         Returns: list[str]: The async search for the given query
 99 |         """
100 |         search_results = json.loads(web_search(query))
101 |         new_search_urls = self.get_new_urls([url.get("href") for url in search_results])
102 | 
103 |         await self.websocket.send_json(
104 |             {"type": "logs", "output": f"🌐 Browsing the following sites for relevant information: {new_search_urls}..."})
105 | 
106 |         # Create a list to hold the coroutine objects
107 |         tasks = [async_browse(url, query, self.websocket) for url in await new_search_urls]
108 | 
109 |         # Gather the results as they become available
110 |         responses = await asyncio.gather(*tasks, return_exceptions=True)
111 | 
112 |         return responses
113 | 
114 |     async def run_search_summary(self, query):
115 |         """ Runs the search summary for the given query.
116 |         Args: query (str): The query to run the search summary for
117 |         Returns: str: The search summary for the given query
118 |         """
119 | 
120 |         await self.websocket.send_json({"type": "logs", "output": f"🔎 Running research for '{query}'..."})
121 | 
122 |         responses = await self.async_search(query)
123 | 
124 |         result = "\n".join(responses)
125 |         os.makedirs(os.path.dirname(f"./outputs/{self.directory_name}/research-{query}.txt"), exist_ok=True)
126 |         write_to_file(f"./outputs/{self.directory_name}/research-{query}.txt", result)
127 |         return result
128 | 
129 |     async def conduct_research(self):
130 |         """ Conducts the research for the given question.
131 |         Args: None
132 |         Returns: str: The research for the given question
133 |         """
134 |         try:
135 |             #self.research_summary = read_txt_files(self.dir_path) if os.path.isdir(self.dir_path) else ""
136 | 
137 |             #if not self.research_summary:
138 |             search_queries = await self.create_search_queries()
139 |             for query in search_queries:
140 |                 research_result = await self.run_search_summary(query)
141 |                 self.research_summary += f"{research_result}\n\n"
142 | 
143 |             await self.websocket.send_json(
144 |                 {"type": "logs", "output": f"Total research words: {len(self.research_summary.split(' '))}"})
145 | 
146 |             return self.research_summary, None
147 |         except Exception as e:
148 |             return None, e
149 | 
150 | 
151 |     async def create_concepts(self):
152 |         """ Creates the concepts for the given question.
153 |         Args: None
154 |         Returns: list[str]: The concepts for the given question
155 |         """
156 |         result = self.call_agent(prompts.generate_concepts_prompt(self.question, self.research_summary))
157 | 
158 |         await self.websocket.send_json({"type": "logs", "output": f"I will research based on the following concepts: {result}\n"})
159 |         return json.loads(result)
160 | 
161 |     async def write_report(self, report_type, websocket):
162 |         """ Writes the report for the given question.
163 |         Args: None
164 |         Returns: str: The report for the given question
165 |         """
166 |         report_type_func = prompts.get_report_by_type(report_type)
167 |         await websocket.send_json(
168 |             {"type": "logs", "output": f"✍️ Writing {report_type} for research task: {self.question}..."})
169 |         answer = await self.call_agent(report_type_func(self.question, self.research_summary), stream=True,
170 |                                        websocket=websocket)
171 | 
172 |         encoded_path, path = await write_md_to_pdf(report_type, self.directory_name, await answer)
173 | 
174 |         return answer, encoded_path, path
175 | 
176 |     async def write_lessons(self):
177 |         """ Writes lessons on essential concepts of the research.
178 |         Args: None
179 |         Returns: None
180 |         """
181 |         concepts = await self.create_concepts()
182 |         for concept in concepts:
183 |             answer = await self.call_agent(prompts.generate_lesson_prompt(concept), stream=True)
184 |             write_md_to_pdf("Lesson", self.directory_name, answer)
185 | 


--------------------------------------------------------------------------------
/agent/llm_utils.py:
--------------------------------------------------------------------------------
  1 | from __future__ import annotations
  2 | 
  3 | import json
  4 | 
  5 | from fastapi import WebSocket
  6 | import time
  7 | 
  8 | import openai
  9 | from colorama import Fore, Style
 10 | from openai.error import APIError, RateLimitError
 11 | 
 12 | from config import Config
 13 | 
 14 | CFG = Config()
 15 | 
 16 | #openai.api_key = CFG.openai_api_key
 17 | 
 18 | from typing import Optional
 19 | import logging
 20 | 
 21 | 
 22 | def create_chat_completion(
 23 |         messages: list,  # type: ignore
 24 |         model: Optional[str] = None,
 25 |         temperature: float = CFG.temperature,
 26 |         max_tokens: Optional[int] = None,
 27 |         stream: Optional[bool] = False,
 28 |         websocket: WebSocket | None = None,
 29 | ) -> str:
 30 |     """Create a chat completion using the OpenAI API
 31 |     Args:
 32 |         messages (list[dict[str, str]]): The messages to send to the chat completion
 33 |         model (str, optional): The model to use. Defaults to None.
 34 |         temperature (float, optional): The temperature to use. Defaults to 0.9.
 35 |         max_tokens (int, optional): The max tokens to use. Defaults to None.
 36 |         stream (bool, optional): Whether to stream the response. Defaults to False.
 37 |     Returns:
 38 |         str: The response from the chat completion
 39 |     """
 40 | 
 41 |     # validate input
 42 |     if model is None:
 43 |         raise ValueError("Model cannot be None")
 44 |     if max_tokens is not None and max_tokens > 8001:
 45 |         raise ValueError(f"Max tokens cannot be more than 8001, but got {max_tokens}")
 46 |     if stream and websocket is None:
 47 |         raise ValueError("Websocket cannot be None when stream is True")
 48 | 
 49 |     # create response
 50 |     for attempt in range(10):  # maximum of 10 attempts
 51 |         try:
 52 |             response = send_chat_completion_request(
 53 |                 messages, model, temperature, max_tokens, stream, websocket
 54 |             )
 55 |             return response
 56 |         except RateLimitError:
 57 |             logging.warning("Rate limit reached, backing off...")
 58 |             time.sleep(2 ** (attempt + 2))  # exponential backoff
 59 |         except APIError as e:
 60 |             if e.http_status != 502 or attempt == 9:  # if not Bad Gateway error or final attempt
 61 |                 raise
 62 |             logging.error("API Error: Bad gateway, backing off...")
 63 |             time.sleep(2 ** (attempt + 2))  # exponential backoff
 64 | 
 65 |     logging.error("Failed to get response after 10 attempts")
 66 |     raise RuntimeError("Failed to get response from OpenAI API")
 67 | 
 68 | 
 69 | def send_chat_completion_request(
 70 |         messages, model, temperature, max_tokens, stream, websocket
 71 | ):
 72 |     if not stream:
 73 |         result = openai.ChatCompletion.create(
 74 |             model=model,
 75 |             messages=messages,
 76 |             temperature=temperature,
 77 |             max_tokens=max_tokens,
 78 |         )
 79 |         return result.choices[0].message["content"]
 80 |     else:
 81 |         return stream_response(model, messages, temperature, max_tokens, websocket)
 82 | 
 83 | 
 84 | async def stream_response(model, messages, temperature, max_tokens, websocket):
 85 |     paragraph = ""
 86 |     response = ""
 87 |     print(f"streaming response...")
 88 | 
 89 |     for chunk in openai.ChatCompletion.create(
 90 |             model=model,
 91 |             messages=messages,
 92 |             temperature=temperature,
 93 |             max_tokens=max_tokens,
 94 |             stream=True,
 95 |     ):
 96 |         content = chunk["choices"][0].get("delta", {}).get("content")
 97 |         if content is not None:
 98 |             response += content
 99 |             paragraph += content
100 |             if "\n" in paragraph:
101 |                 await websocket.send_json({"type": "report", "output": paragraph})
102 |                 paragraph = ""
103 |     print(f"streaming response complete")
104 |     return response
105 | 
106 | 
107 | def choose_agent(task: str) -> str:
108 |     """Determines what agent should be used
109 |     Args:
110 |         task (str): The research question the user asked
111 |     Returns:
112 |         agent - The agent that will be used
113 |         agent_role_prompt (str): The prompt for the agent
114 |     """
115 |     try:
116 |         configuration = choose_agent_configuration()
117 | 
118 |         response = openai.ChatCompletion.create(
119 |             model=CFG.smart_llm_model,
120 |             messages=[
121 |                 {"role": "user", "content": f"{task}"}],
122 |             functions=configuration,
123 |             temperature=0,
124 |         )
125 |         message = response["choices"][0]["message"]
126 | 
127 |         if message.get("function_call"):
128 |             function_name = message["function_call"]["name"]
129 |             return {"agent": json.loads(message["function_call"]["arguments"]).get("agent"),
130 |                     "agent_role_prompt": json.loads(message["function_call"]["arguments"]).get("instructions")}
131 |         else:
132 |             return {"agent": "Default Agent",
133 |                     "agent_role_prompt": "You are an AI critical thinker research assistant. Your sole purpose is to write well written, critically acclaimed, objective and structured reports on given text."}
134 |     except Exception as e:
135 |         print(f"{Fore.RED}Error in choose_agent: {e}{Style.RESET_ALL}")
136 |         return {"agent": "Default Agent",
137 |                 "agent_role_prompt": "You are an AI critical thinker research assistant. Your sole purpose is to write well written, critically acclaimed, objective and structured reports on given text."}
138 | 
139 | 
140 | def choose_agent_configuration():
141 |     configuration = [
142 |         {
143 |             "name": "research",
144 |             "description": "Researches the given topic even if it can't be answered",
145 |             "parameters": {
146 |                 "type": "object",
147 |                 "properties": {
148 |                     "agent": {
149 |                         "type": "string",
150 |                         "description":
151 |                             """
152 |                                 Determines the field of the topic and the name of the agent we could use in order to research 
153 |                                 about the topic provided.
154 | 
155 |                                 Example of agents:
156 |                                     "Business Analyst Agent", "Finance Agent", "Travel Agent",
157 |                                  "Academic Research Agent", "Computer Security Analyst Agent"
158 | 
159 |                                  if an agent for the field required doesn't exist make one up
160 |                                  fit an emoji to every agent before the agent name
161 |                             """,
162 |                     },
163 |                     "instructions": {
164 |                         "type": "string",
165 |                         "description":
166 |                             """
167 |                             each provided agent needs instructions in order to start working,
168 |                             examples for agents and their instructions:
169 |                                     "Finance Agent": "You are a seasoned finance analyst AI assistant. Your primary goal is to compose comprehensive, astute, impartial, and methodically arranged financial reports based on provided data and trends.",
170 |                                     "Travel Agent": "You are a world-travelled AI tour guide assistant. Your main purpose is to draft engaging, insightful, unbiased, and well-structured travel reports on given locations, including history, attractions, and cultural insights.",
171 |                                     "Academic Research Agent": "You are an AI academic research assistant. Your primary responsibility is to create thorough, academically rigorous, unbiased, and systematically organized reports on a given research topic, following the standards of scholarly work.",
172 |                                     "Business Analyst": "You are an experienced AI business analyst assistant. Your main objective is to produce comprehensive, insightful, impartial, and systematically structured business reports based on provided business data, market trends, and strategic analysis.",
173 |                                     "Computer Security Analyst Agent": "You are an AI specializing in computer security analysis. Your principal duty is to generate comprehensive, meticulously detailed, impartial, and systematically structured reports on computer security topics. This includes Exploits, Techniques, Threat Actors, and Advanced Persistent Threat (APT) Groups. All produced reports should adhere to the highest standards of scholarly work and provide in-depth insights into the complexities of computer security.",
174 | 
175 |                             """,
176 |                     },
177 |                 },
178 |                 "required": ["agent", "instructions"],
179 |             },
180 |         }
181 |     ]
182 |     return configuration
183 | 
184 | 
185 | 


--------------------------------------------------------------------------------