├── .dockerignore
├── .env.example
├── .github
    ├── dependabot.yml
    └── workflows
    │   └── docker-bulid.yml
├── .gitignore
├── CODE_OF_CONDUCT.md
├── CONTRIBUTING.md
├── Dockerfile
├── LICENSE
├── README-zh_CN.md
├── README.md
├── backend
    ├── __init__.py
    ├── report_type
    │   ├── __init__.py
    │   ├── basic_report
    │   │   ├── __init__.py
    │   │   └── basic_report.py
    │   └── detailed_report
    │   │   ├── README.md
    │   │   ├── __init__.py
    │   │   └── detailed_report.py
    ├── server.py
    ├── utils.py
    └── websocket_manager.py
├── cli.py
├── docker-compose.yml
├── docs
    ├── CNAME
    ├── README.md
    ├── babel.config.js
    ├── blog
    │   ├── 2023-09-22-gpt-researcher
    │   │   ├── architecture.png
    │   │   ├── index.md
    │   │   └── planner.jpeg
    │   ├── 2023-11-12-openai-assistant
    │   │   ├── diagram-1.png
    │   │   ├── diagram-assistant.jpeg
    │   │   └── index.md
    │   ├── 2024-05-19-gptr-langgraph
    │   │   ├── architecture.jpeg
    │   │   ├── blog-langgraph.jpeg
    │   │   └── index.md
    │   └── authors.yml
    ├── docs
    │   ├── contribute.md
    │   ├── examples
    │   │   ├── examples.ipynb
    │   │   └── examples.md
    │   ├── faq.md
    │   ├── gpt-researcher
    │   │   ├── config.md
    │   │   ├── example.md
    │   │   ├── getting-started.md
    │   │   ├── introduction.md
    │   │   ├── langgraph.md
    │   │   ├── llms.md
    │   │   ├── pip-package.md
    │   │   ├── roadmap.md
    │   │   ├── tailored-research.md
    │   │   └── troubleshooting.md
    │   ├── reference
    │   │   ├── config
    │   │   │   ├── config.md
    │   │   │   └── singleton.md
    │   │   ├── processing
    │   │   │   ├── html.md
    │   │   │   └── text.md
    │   │   └── sidebar.json
    │   └── welcome.md
    ├── docusaurus.config.js
    ├── package.json
    ├── pydoc-markdown.yml
    ├── sidebars.js
    ├── src
    │   ├── components
    │   │   ├── HomepageFeatures.js
    │   │   └── HomepageFeatures.module.css
    │   ├── css
    │   │   └── custom.css
    │   └── pages
    │   │   ├── index.js
    │   │   └── index.module.css
    ├── static
    │   ├── .nojekyll
    │   └── img
    │   │   ├── architecture.png
    │   │   ├── banner1.jpg
    │   │   ├── examples.png
    │   │   ├── favicon.ico
    │   │   ├── gptr-logo.png
    │   │   ├── gptresearcher.png
    │   │   ├── leaderboard.png
    │   │   ├── multi-agent.png
    │   │   └── tavily.png
    └── yarn.lock
├── examples
    ├── pip-run.ipynb
    └── sample_report.py
├── frontend
    ├── index.html
    ├── pdf_styles.css
    ├── scripts.js
    ├── static
    │   ├── academicResearchAgentAvatar.png
    │   ├── businessAnalystAgentAvatar.png
    │   ├── computerSecurityanalystAvatar.png
    │   ├── defaultAgentAvatar.JPG
    │   ├── favicon.ico
    │   ├── financeAgentAvatar.png
    │   ├── gptr-logo.png
    │   ├── mathAgentAvatar.png
    │   └── travelAgentAvatar.png
    └── styles.css
├── gpt_researcher
    ├── README.md
    ├── __init__.py
    ├── config
    │   ├── __init__.py
    │   └── config.py
    ├── context
    │   ├── __init__.py
    │   ├── compression.py
    │   └── retriever.py
    ├── document
    │   ├── __init__.py
    │   └── document.py
    ├── llm_provider
    │   ├── __init__.py
    │   ├── azureopenai
    │   │   ├── __init__.py
    │   │   └── azureopenai.py
    │   ├── google
    │   │   ├── __init__.py
    │   │   └── google.py
    │   └── openai
    │   │   ├── __init__.py
    │   │   └── openai.py
    ├── master
    │   ├── __init__.py
    │   ├── agent.py
    │   ├── functions.py
    │   └── prompts.py
    ├── memory
    │   ├── __init__.py
    │   └── embeddings.py
    ├── retrievers
    │   ├── __init__.py
    │   ├── bing
    │   │   ├── __init__.py
    │   │   └── bing.py
    │   ├── duckduckgo
    │   │   ├── __init__.py
    │   │   └── duckduckgo.py
    │   ├── google
    │   │   ├── __init__.py
    │   │   └── google.py
    │   ├── searx
    │   │   ├── __init__.py
    │   │   └── searx.py
    │   ├── serpapi
    │   │   ├── __init__.py
    │   │   └── serpapi.py
    │   ├── serper
    │   │   ├── __init__.py
    │   │   └── serper.py
    │   ├── tavily_search
    │   │   ├── __init__.py
    │   │   └── tavily_search.py
    │   └── yahoo
    │   │   ├── __init__.py
    │   │   └── yahoo.py
    ├── scraper
    │   ├── __init__.py
    │   ├── arxiv
    │   │   ├── __init__.py
    │   │   └── arxiv.py
    │   ├── beautiful_soup
    │   │   ├── __init__.py
    │   │   └── beautiful_soup.py
    │   ├── newspaper
    │   │   ├── __init__.py
    │   │   └── newspaper.py
    │   ├── pymupdf
    │   │   ├── __init__.py
    │   │   └── pymupdf.py
    │   ├── scraper.py
    │   └── web_base_loader
    │   │   ├── __init__.py
    │   │   └── web_base_loader.py
    └── utils
    │   ├── __init__.py
    │   ├── enum.py
    │   ├── llm.py
    │   └── validators.py
├── main.py
├── multi_agents
    ├── README.md
    ├── agent.py
    ├── agents
    │   ├── __init__.py
    │   ├── editor.py
    │   ├── master.py
    │   ├── publisher.py
    │   ├── researcher.py
    │   ├── reviewer.py
    │   ├── reviser.py
    │   ├── utils
    │   │   ├── __init__.py
    │   │   ├── file_formats.py
    │   │   ├── llms.py
    │   │   ├── pdf_styles.css
    │   │   └── views.py
    │   └── writer.py
    ├── langgraph.json
    ├── main.py
    ├── memory
    │   ├── __init__.py
    │   ├── draft.py
    │   └── research.py
    ├── requirements.txt
    └── task.json
├── poetry.lock
├── poetry.toml
├── pyproject.toml
├── requirements.txt
├── scraping
    ├── js
    │   └── overlay.js
    ├── processing
    │   ├── __init__.py
    │   ├── html.py
    │   └── text.py
    ├── scrape_skills.py
    └── web_scrape.py
├── setup.py
└── tests
    ├── all-6-report-types.py
    └── documents-report-source.py


/.dockerignore:
--------------------------------------------------------------------------------
1 | .git


--------------------------------------------------------------------------------
/.env.example:
--------------------------------------------------------------------------------
1 | OPENAI_API_KEY=
2 | TAVILY_API_KEY=
3 | LANGCHAIN_API_KEY=
4 | DOC_PATH=./my-docs


--------------------------------------------------------------------------------
/.github/dependabot.yml:
--------------------------------------------------------------------------------
 1 | # To get started with Dependabot version updates, you'll need to specify which
 2 | # package ecosystems to update and where the package manifests are located.
 3 | # Please see the documentation for all configuration options:
 4 | # https://docs.github.com/github/administering-a-repository/configuration-options-for-dependency-updates
 5 | 
 6 | version: 2
 7 | updates:
 8 |   - package-ecosystem: "pip" # See documentation for possible values
 9 |     directory: "/" # Location of package manifests
10 |     schedule:
11 |       interval: "weekly"
12 |   - package-ecosystem: "docker"
13 |     directory: "/"
14 |     schedule:
15 |       interval: "weekly"
16 | 


--------------------------------------------------------------------------------
/.github/workflows/docker-bulid.yml:
--------------------------------------------------------------------------------
 1 | name: GitHub Actions Workflow
 2 | run-name: ${{ github.actor }} has started docker build workflow.
 3 | on:
 4 |   pull_request:
 5 |     types: [opened, edited, ready_for_review]
 6 | jobs:
 7 |   docker:
 8 |     runs-on: ubuntu-latest
 9 |     steps:
10 |       - name: Git checkout
11 |         uses: actions/checkout@master
12 |       - name: Set up QEMU
13 |         uses: docker/setup-qemu-action@v2
14 |       - name: Set up Docker Buildx
15 |         uses: docker/setup-buildx-action@v2
16 |         with:
17 |           driver: docker
18 |       - name: Build Dockerfile
19 |         uses: docker/build-push-action@v4
20 |         with:
21 |           push: false
22 |           tags: assafelovic/gpt-researcher:latest
23 |           file: Dockerfile
24 | 
25 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | #Ignore env containing secrets
 2 | .env
 3 | .venv
 4 | .envrc
 5 | 
 6 | #Ignore Virtual Env
 7 | env/
 8 | venv/
 9 | .venv/
10 | 
11 | # Other Environments
12 | ENV/
13 | env.bak/
14 | venv.bak/
15 | 
16 | #Ignore generated outputs
17 | outputs/
18 | 
19 | #Ignore my local docs
20 | my-docs/
21 | 
22 | #Ignore pycache
23 | **/__pycache__/
24 | 
25 | #Ignore mypy cache
26 | .mypy_cache/
27 | node_modules
28 | .idea
29 | .DS_Store
30 | .docusaurus
31 | build
32 | docs/build
33 | 


--------------------------------------------------------------------------------
/CODE_OF_CONDUCT.md:
--------------------------------------------------------------------------------
  1 | # Contributor Covenant Code of Conduct
  2 | 
  3 | ## Our Pledge
  4 | 
  5 | We as members, contributors, and leaders pledge to make participation in our
  6 | community a harassment-free experience for everyone, regardless of age, body
  7 | size, visible or invisible disability, ethnicity, sex characteristics, gender
  8 | identity and expression, level of experience, education, socio-economic status,
  9 | nationality, personal appearance, race, religion, or sexual identity
 10 | and orientation.
 11 | 
 12 | We pledge to act and interact in ways that contribute to an open, welcoming,
 13 | diverse, inclusive, and healthy community.
 14 | 
 15 | ## Our Standards
 16 | 
 17 | Examples of behavior that contributes to a positive environment for our
 18 | community include:
 19 | 
 20 | * Demonstrating empathy and kindness toward other people
 21 | * Being respectful of differing opinions, viewpoints, and experiences
 22 | * Giving and gracefully accepting constructive feedback
 23 | * Accepting responsibility and apologizing to those affected by our mistakes,
 24 |   and learning from the experience
 25 | * Focusing on what is best not just for us as individuals, but for the
 26 |   overall community
 27 | 
 28 | Examples of unacceptable behavior include:
 29 | 
 30 | * The use of sexualized language or imagery, and sexual attention or
 31 |   advances of any kind
 32 | * Trolling, insulting or derogatory comments, and personal or political attacks
 33 | * Public or private harassment
 34 | * Publishing others' private information, such as a physical or email
 35 |   address, without their explicit permission
 36 | * Other conduct which could reasonably be considered inappropriate in a
 37 |   professional setting
 38 | 
 39 | ## Enforcement Responsibilities
 40 | 
 41 | Community leaders are responsible for clarifying and enforcing our standards of
 42 | acceptable behavior and will take appropriate and fair corrective action in
 43 | response to any behavior that they deem inappropriate, threatening, offensive,
 44 | or harmful.
 45 | 
 46 | Community leaders have the right and responsibility to remove, edit, or reject
 47 | comments, commits, code, wiki edits, issues, and other contributions that are
 48 | not aligned to this Code of Conduct, and will communicate reasons for moderation
 49 | decisions when appropriate.
 50 | 
 51 | ## Scope
 52 | 
 53 | This Code of Conduct applies within all community spaces, and also applies when
 54 | an individual is officially representing the community in public spaces.
 55 | Examples of representing our community include using an official e-mail address,
 56 | posting via an official social media account, or acting as an appointed
 57 | representative at an online or offline event.
 58 | 
 59 | ## Enforcement
 60 | 
 61 | Instances of abusive, harassing, or otherwise unacceptable behavior may be
 62 | reported to the community leaders responsible for enforcement at
 63 | Assaf.elovic@gmail.com.
 64 | All complaints will be reviewed and investigated promptly and fairly.
 65 | 
 66 | All community leaders are obligated to respect the privacy and security of the
 67 | reporter of any incident.
 68 | 
 69 | ## Enforcement Guidelines
 70 | 
 71 | Community leaders will follow these Community Impact Guidelines in determining
 72 | the consequences for any action they deem in violation of this Code of Conduct:
 73 | 
 74 | ### 1. Correction
 75 | 
 76 | **Community Impact**: Use of inappropriate language or other behavior deemed
 77 | unprofessional or unwelcome in the community.
 78 | 
 79 | **Consequence**: A private, written warning from community leaders, providing
 80 | clarity around the nature of the violation and an explanation of why the
 81 | behavior was inappropriate. A public apology may be requested.
 82 | 
 83 | ### 2. Warning
 84 | 
 85 | **Community Impact**: A violation through a single incident or series
 86 | of actions.
 87 | 
 88 | **Consequence**: A warning with consequences for continued behavior. No
 89 | interaction with the people involved, including unsolicited interaction with
 90 | those enforcing the Code of Conduct, for a specified period of time. This
 91 | includes avoiding interactions in community spaces as well as external channels
 92 | like social media. Violating these terms may lead to a temporary or
 93 | permanent ban.
 94 | 
 95 | ### 3. Temporary Ban
 96 | 
 97 | **Community Impact**: A serious violation of community standards, including
 98 | sustained inappropriate behavior.
 99 | 
100 | **Consequence**: A temporary ban from any sort of interaction or public
101 | communication with the community for a specified period of time. No public or
102 | private interaction with the people involved, including unsolicited interaction
103 | with those enforcing the Code of Conduct, is allowed during this period.
104 | Violating these terms may lead to a permanent ban.
105 | 
106 | ### 4. Permanent Ban
107 | 
108 | **Community Impact**: Demonstrating a pattern of violation of community
109 | standards, including sustained inappropriate behavior,  harassment of an
110 | individual, or aggression toward or disparagement of classes of individuals.
111 | 
112 | **Consequence**: A permanent ban from any sort of public interaction within
113 | the community.
114 | 
115 | ## Attribution
116 | 
117 | This Code of Conduct is adapted from the [Contributor Covenant][homepage],
118 | version 2.0, available at
119 | https://www.contributor-covenant.org/version/2/0/code_of_conduct.html.
120 | 
121 | Community Impact Guidelines were inspired by [Mozilla's code of conduct
122 | enforcement ladder](https://github.com/mozilla/diversity).
123 | 
124 | [homepage]: https://www.contributor-covenant.org
125 | 
126 | For answers to common questions about this code of conduct, see the FAQ at
127 | https://www.contributor-covenant.org/faq. Translations are available at
128 | https://www.contributor-covenant.org/translations.
129 | 


--------------------------------------------------------------------------------
/CONTRIBUTING.md:
--------------------------------------------------------------------------------
 1 | # Contributing to GPT Researcher
 2 | First off, we'd like to welcome and thank you for your interest and effort in contributing to our open source project ❤️. Contributions of all forms are welcome, from new features and bug fixes, to documentation and more. 
 3 | 
 4 | We are on a mission to build the #1 AI agent for comprehensive, unbiased, and factual research online. And we need your support to achieve this grand vision. 
 5 | 
 6 | Please take a moment to review this document in order to make the contribution process easy and effective for everyone involved.
 7 | 
 8 | ## Reporting Issues
 9 | 
10 | If you come across any issue or have an idea for an improvement, don't hesitate to create an issue on GitHub. Describe your problem in sufficient detail, providing as much relevant information as possible. This way, we can reproduce the issue before attempting to fix it or respond appropriately.
11 | 
12 | ## Contributing Code
13 | 
14 | 1. **Fork the repository and create your branch from `master`.** 
15 | If it's not an urgent bug fix, you should branch from `master` and work on the feature or fix in there.
16 | 
17 | 2. **Conduct your changes.**
18 | Make your changes following best practices for coding in the project's language. 
19 | 
20 | 3. **Test your changes.**
21 | Make sure your changes pass all the tests if there are any. If the project doesn't have automated testing infrastructure, test your changes manually to confirm they behave as expected.
22 | 
23 | 4. **Follow the coding style.**
24 | Ensure your code adheres to the coding conventions used throughout the project, that includes indentation, accurate comments, etc.
25 | 
26 | 5. **Commit your changes.**
27 | Make your git commits informative and concise. This is very helpful for others when they look at the git log.
28 | 
29 | 6. **Push to your fork and submit a pull request.**
30 | When your work is ready and passes tests, push your branch to your fork of the repository and submit a pull request from there.
31 | 
32 | 7. **Pat your back and wait for the review.**
33 | Your work is done, congratulations! Now sit tight. The project maintainers will review your submission as soon as possible. They might suggest changes or ask for improvements. Both constructive conversation and patience are key to the collaboration process.
34 | 
35 | 
36 | ## Documentation
37 | 
38 | If you would like to contribute to the project's documentation, please follow the same steps: fork the repository, make your changes, test them, and submit a pull request. 
39 | 
40 | Documentation is a vital part of any software. It's not just about having good code. Ensuring that the users and contributors understand what's going on, how to use the software or how to contribute, is crucial.
41 | 
42 | We're grateful for all our contributors, and we look forward to building the world's leading AI research agent hand-in-hand with you. Let's harness the power of Open Source and AI to change the world together!


--------------------------------------------------------------------------------
/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM python:3.11.4-slim-bullseye as install-browser
 2 | 
 3 | RUN apt-get update \
 4 |     && apt-get satisfy -y \
 5 |     "chromium, chromium-driver (>= 115.0)" \
 6 |     && chromium --version && chromedriver --version
 7 | 
 8 | RUN apt-get update \
 9 |     && apt-get install -y --fix-missing firefox-esr wget \
10 |     && wget https://github.com/mozilla/geckodriver/releases/download/v0.33.0/geckodriver-v0.33.0-linux64.tar.gz \
11 |     && tar -xvzf geckodriver* \
12 |     && chmod +x geckodriver \
13 |     && mv geckodriver /usr/local/bin/
14 | 
15 | FROM install-browser as gpt-researcher-install
16 | 
17 | ENV PIP_ROOT_USER_ACTION=ignore
18 | 
19 | RUN mkdir /usr/src/app
20 | WORKDIR /usr/src/app
21 | 
22 | COPY ./requirements.txt ./requirements.txt
23 | RUN pip install -r requirements.txt
24 | 
25 | FROM gpt-researcher-install AS gpt-researcher
26 | 
27 | RUN useradd -ms /bin/bash gpt-researcher \
28 |     && chown -R gpt-researcher:gpt-researcher /usr/src/app
29 | 
30 | USER gpt-researcher
31 | 
32 | COPY --chown=gpt-researcher:gpt-researcher ./ ./
33 | 
34 | EXPOSE 8000
35 | CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "8000"]
36 | 
37 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2024 Assaf Elovic
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/backend/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hwchase17/gpt-researcher/df4350939dbe42a059d7cd00aec0ae61388c9e35/backend/__init__.py


--------------------------------------------------------------------------------
/backend/report_type/__init__.py:
--------------------------------------------------------------------------------
1 | from .basic_report.basic_report import BasicReport
2 | from .detailed_report.detailed_report import DetailedReport
3 | 
4 | __all__ = [
5 |     "BasicReport",
6 |     "DetailedReport"
7 | ]


--------------------------------------------------------------------------------
/backend/report_type/basic_report/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hwchase17/gpt-researcher/df4350939dbe42a059d7cd00aec0ae61388c9e35/backend/report_type/basic_report/__init__.py


--------------------------------------------------------------------------------
/backend/report_type/basic_report/basic_report.py:
--------------------------------------------------------------------------------
 1 | from gpt_researcher.master.agent import GPTResearcher
 2 | from fastapi import WebSocket
 3 | 
 4 | class BasicReport():
 5 |     def __init__(self, query: str, report_type: str, report_source:str, source_urls, config_path: str, websocket: WebSocket):
 6 |         self.query = query
 7 |         self.report_type = report_type
 8 |         self.report_source = report_source
 9 |         self.source_urls = source_urls
10 |         self.config_path = config_path
11 |         self.websocket = websocket
12 |         
13 |     async def run(self):
14 |         # Initialize researcher
15 |         researcher = GPTResearcher(self.query, self.report_type, self.report_source, self.source_urls, self.config_path, self.websocket)
16 |         
17 |         # Run research
18 |         await researcher.conduct_research()
19 |         
20 |         # and generate report        
21 |         report = await researcher.write_report()
22 |         
23 |         return report


--------------------------------------------------------------------------------
/backend/report_type/detailed_report/README.md:
--------------------------------------------------------------------------------
 1 | ## Detailed Reports
 2 | 
 3 | Introducing long and detailed reports, with a completely new architecture inspired by the latest [STORM](https://arxiv.org/abs/2402.14207) paper.
 4 | 
 5 | In this method we do the following:
 6 | 
 7 | 1. Trigger Initial GPT Researcher report based on task
 8 | 2. Generate subtopics from research summary
 9 | 3. For each subtopic the headers of the subtopic report are extracted and accumulated
10 | 4. For each subtopic a report is generated making sure that any information about the headers accumulated until now are not re-generated.
11 | 5. An additional introduction section is written along with a table of contents constructed from the entire report.
12 | 6. The final report is constructed by appending these : Intro + Table of contents + Subsection reports


--------------------------------------------------------------------------------
/backend/report_type/detailed_report/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hwchase17/gpt-researcher/df4350939dbe42a059d7cd00aec0ae61388c9e35/backend/report_type/detailed_report/__init__.py


--------------------------------------------------------------------------------
/backend/server.py:
--------------------------------------------------------------------------------
 1 | from fastapi import FastAPI, Request, WebSocket, WebSocketDisconnect
 2 | from fastapi.staticfiles import StaticFiles
 3 | from fastapi.templating import Jinja2Templates
 4 | from pydantic import BaseModel
 5 | from backend.websocket_manager import WebSocketManager
 6 | from backend.utils import write_md_to_pdf, write_md_to_word, write_text_to_md
 7 | import time
 8 | import json
 9 | import os
10 | 
11 | 
12 | class ResearchRequest(BaseModel):
13 |     task: str
14 |     report_type: str
15 |     agent: str
16 | 
17 | 
18 | app = FastAPI()
19 | 
20 | app.mount("/site", StaticFiles(directory="./frontend"), name="site")
21 | app.mount("/static", StaticFiles(directory="./frontend/static"), name="static")
22 | 
23 | templates = Jinja2Templates(directory="./frontend")
24 | 
25 | manager = WebSocketManager()
26 | 
27 | 
28 | # Dynamic directory for outputs once first research is run
29 | @app.on_event("startup")
30 | def startup_event():
31 |     if not os.path.isdir("outputs"):
32 |         os.makedirs("outputs")
33 |     app.mount("/outputs", StaticFiles(directory="outputs"), name="outputs")
34 | 
35 | @app.get("/")
36 | async def read_root(request: Request):
37 |     return templates.TemplateResponse('index.html', {"request": request, "report": None})
38 | 
39 | 
40 | @app.websocket("/ws")
41 | async def websocket_endpoint(websocket: WebSocket):
42 |     await manager.connect(websocket)
43 |     try:
44 |         while True:
45 |             data = await websocket.receive_text()
46 |             if data.startswith("start"):
47 |                 json_data = json.loads(data[6:])
48 |                 task = json_data.get("task")
49 |                 report_type = json_data.get("report_type")
50 |                 filename = f"task_{int(time.time())}_{task}"
51 |                 report_source = json_data.get("report_source")
52 |                 if task and report_type:
53 |                     report = await manager.start_streaming(task, report_type, report_source, websocket)
54 |                     # Saving report as pdf
55 |                     pdf_path = await write_md_to_pdf(report, filename)
56 |                     # Saving report as docx
57 |                     docx_path = await write_md_to_word(report, filename)
58 |                     # Returning the path of saved report files
59 |                     md_path = await write_text_to_md(report, filename)
60 |                     await websocket.send_json({"type": "path", "output": {"pdf": pdf_path, "docx": docx_path, "md": md_path}})
61 |                 else:
62 |                     print("Error: not enough parameters provided.")
63 | 
64 |     except WebSocketDisconnect:
65 |         await manager.disconnect(websocket)
66 | 
67 | 


--------------------------------------------------------------------------------
/backend/utils.py:
--------------------------------------------------------------------------------
 1 | import aiofiles
 2 | import urllib
 3 | import mistune
 4 | 
 5 | 
 6 | async def write_to_file(filename: str, text: str) -> None:
 7 |     """Asynchronously write text to a file in UTF-8 encoding.
 8 | 
 9 |     Args:
10 |         filename (str): The filename to write to.
11 |         text (str): The text to write.
12 |     """
13 |     # Convert text to UTF-8, replacing any problematic characters
14 |     text_utf8 = text.encode('utf-8', errors='replace').decode('utf-8')
15 | 
16 |     async with aiofiles.open(filename, "w", encoding='utf-8') as file:
17 |         await file.write(text_utf8)
18 | 
19 | 
20 | async def write_text_to_md(text: str, filename: str = "") -> str:
21 |     """Writes text to a Markdown file and returns the file path.
22 | 
23 |     Args:
24 |         text (str): Text to write to the Markdown file.
25 | 
26 |     Returns:
27 |         str: The file path of the generated Markdown file.
28 |     """
29 |     file_path = f"outputs/{filename[:60]}.md"
30 |     await write_to_file(file_path, text)
31 |     return file_path
32 | 
33 | 
34 | async def write_md_to_pdf(text: str, filename: str = "") -> str:
35 |     """Converts Markdown text to a PDF file and returns the file path.
36 | 
37 |     Args:
38 |         text (str): Markdown text to convert.
39 | 
40 |     Returns:
41 |         str: The encoded file path of the generated PDF.
42 |     """
43 |     file_path = f"outputs/{filename[:60]}.pdf"
44 | 
45 |     try:
46 |         from md2pdf.core import md2pdf
47 |         md2pdf(file_path,
48 |                md_content=text,
49 |                #md_file_path=f"{file_path}.md",
50 |                css_file_path="./frontend/pdf_styles.css",
51 |                base_url=None)
52 |         print(f"Report written to {file_path}.pdf")
53 |     except Exception as e:
54 |         print(f"Error in converting Markdown to PDF: {e}")
55 |         return ""
56 | 
57 |     encoded_file_path = urllib.parse.quote(file_path)
58 |     return encoded_file_path
59 | 
60 | 
61 | async def write_md_to_word(text: str, filename: str = "") -> str:
62 |     """Converts Markdown text to a DOCX file and returns the file path.
63 | 
64 |     Args:
65 |         text (str): Markdown text to convert.
66 | 
67 |     Returns:
68 |         str: The encoded file path of the generated DOCX.
69 |     """
70 |     file_path = f"outputs/{filename[:60]}.docx"
71 | 
72 |     try:
73 |         from docx import Document
74 |         from htmldocx import HtmlToDocx
75 |         # Convert report markdown to HTML
76 |         html = mistune.html(text)
77 |         # Create a document object
78 |         doc = Document()
79 |         # Convert the html generated from the report to document format
80 |         HtmlToDocx().add_html_to_document(html, doc)
81 | 
82 |         # Saving the docx document to file_path
83 |         doc.save(file_path)
84 |         
85 |         print(f"Report written to {file_path}")
86 | 
87 |         encoded_file_path = urllib.parse.quote(file_path)
88 |         return encoded_file_path
89 |     
90 |     except Exception as e:
91 |         print(f"Error in converting Markdown to DOCX: {e}")
92 |         return ""
93 | 


--------------------------------------------------------------------------------
/backend/websocket_manager.py:
--------------------------------------------------------------------------------
 1 | # connect any client to gpt-researcher using websocket
 2 | import asyncio
 3 | import datetime
 4 | from typing import Dict, List
 5 | 
 6 | from fastapi import WebSocket
 7 | 
 8 | from backend.report_type import BasicReport, DetailedReport
 9 | 
10 | from gpt_researcher.utils.enum import ReportType
11 | 
12 | 
13 | class WebSocketManager:
14 |     """Manage websockets"""
15 | 
16 |     def __init__(self):
17 |         """Initialize the WebSocketManager class."""
18 |         self.active_connections: List[WebSocket] = []
19 |         self.sender_tasks: Dict[WebSocket, asyncio.Task] = {}
20 |         self.message_queues: Dict[WebSocket, asyncio.Queue] = {}
21 | 
22 |     async def start_sender(self, websocket: WebSocket):
23 |         """Start the sender task."""
24 |         queue = self.message_queues.get(websocket)
25 |         if not queue:
26 |             return
27 | 
28 |         while True:
29 |             message = await queue.get()
30 |             if websocket in self.active_connections:
31 |                 try:
32 |                     await websocket.send_text(message)
33 |                 except:
34 |                     break
35 |             else:
36 |                 break
37 | 
38 |     async def connect(self, websocket: WebSocket):
39 |         """Connect a websocket."""
40 |         await websocket.accept()
41 |         self.active_connections.append(websocket)
42 |         self.message_queues[websocket] = asyncio.Queue()
43 |         self.sender_tasks[websocket] = asyncio.create_task(
44 |             self.start_sender(websocket))
45 | 
46 |     async def disconnect(self, websocket: WebSocket):
47 |         """Disconnect a websocket."""
48 |         if websocket in self.active_connections:
49 |             self.active_connections.remove(websocket)
50 |             self.sender_tasks[websocket].cancel()
51 |             await self.message_queues[websocket].put(None)
52 |             del self.sender_tasks[websocket]
53 |             del self.message_queues[websocket]
54 | 
55 |     async def start_streaming(self, task, report_type, report_source, websocket):
56 |         """Start streaming the output."""
57 |         report = await run_agent(task, report_type, report_source, websocket)
58 |         return report
59 | 
60 | 
61 | async def run_agent(task, report_type, report_source, websocket):
62 |     """Run the agent."""
63 |     # measure time
64 |     start_time = datetime.datetime.now()
65 |     # add customized JSON config file path here
66 |     config_path = ""
67 |     # Instead of running the agent directly run it through the different report type classes
68 |     if report_type == ReportType.DetailedReport.value:
69 |         researcher = DetailedReport(query=task, report_type=report_type, report_source=report_source,
70 |                                     source_urls=None, config_path=config_path, websocket=websocket)
71 |     else:
72 |         researcher = BasicReport(query=task, report_type=report_type, report_source=report_source,
73 |                                  source_urls=None, config_path=config_path, websocket=websocket)
74 | 
75 |     report = await researcher.run()
76 |     # measure time
77 |     end_time = datetime.datetime.now()
78 |     await websocket.send_json({"type": "logs", "output": f"\nTotal run time: {end_time - start_time}\n"})
79 | 
80 |     return report
81 | 


--------------------------------------------------------------------------------
/cli.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Provides a command line interface for the GPTResearcher class.
 3 | 
 4 | Usage:
 5 | 
 6 | ```shell
 7 | python cli.py "<query>" --report_type <report_type>
 8 | ```
 9 | 
10 | """
11 | import asyncio
12 | import argparse
13 | from argparse import RawTextHelpFormatter
14 | from uuid import uuid4
15 | 
16 | from dotenv import load_dotenv
17 | 
18 | from gpt_researcher import GPTResearcher
19 | from gpt_researcher.utils.enum import ReportType
20 | 
21 | # =============================================================================
22 | # CLI
23 | # =============================================================================
24 | 
25 | cli = argparse.ArgumentParser(
26 |     description="Generate a research report.",
27 |     # Enables the use of newlines in the help message
28 |     formatter_class=RawTextHelpFormatter)
29 | 
30 | # =====================================
31 | # Arg: Query
32 | # =====================================
33 | 
34 | cli.add_argument(
35 |     # Position 0 argument
36 |     "query",
37 |     type=str,
38 |     help="The query to conduct research on.")
39 | 
40 | # =====================================
41 | # Arg: Report Type
42 | # =====================================
43 | 
44 | choices = [report_type.value for report_type in ReportType]
45 | 
46 | report_type_descriptions = {
47 |     ReportType.ResearchReport.value: "Summary - Short and fast (~2 min)",
48 |     ReportType.DetailedReport.value: "Detailed - In depth and longer (~5 min)",
49 |     ReportType.ResourceReport.value: "",
50 |     ReportType.OutlineReport.value: "",
51 |     ReportType.CustomReport.value: "",
52 |     ReportType.SubtopicReport.value: ""
53 | }
54 | 
55 | cli.add_argument(
56 |     "--report_type",
57 |     type=str,
58 |     help="The type of report to generate. Options:\n" + "\n".join(
59 |         f"  {choice}: {report_type_descriptions[choice]}" for choice in choices
60 |     ),
61 |     # Deserialize ReportType as a List of strings:
62 |     choices=choices,
63 |     required=True)
64 | 
65 | # =============================================================================
66 | # Main
67 | # =============================================================================
68 | 
69 | 
70 | async def main(args):
71 |     """ 
72 |     Conduct research on the given query, generate the report, and write
73 |     it as a markdown file to the output directory.
74 |     """
75 |     researcher = GPTResearcher(
76 |         query=args.query,
77 |         report_type=args.report_type)
78 | 
79 |     await researcher.conduct_research()
80 | 
81 |     report = await researcher.write_report()
82 | 
83 |     # Write the report to a file
84 |     artifact_filepath = f"outputs/{uuid4()}.md"
85 |     with open(artifact_filepath, "w") as f:
86 |         f.write(report)
87 | 
88 |     print(f"Report written to '{artifact_filepath}'")
89 | 
90 | if __name__ == "__main__":
91 |     load_dotenv()
92 |     args = cli.parse_args()
93 |     asyncio.run(main(args))
94 | 


--------------------------------------------------------------------------------
/docker-compose.yml:
--------------------------------------------------------------------------------
 1 | version: '3'
 2 | services:
 3 |   gpt-researcher:
 4 |     image: kramer1346/gpt-researcher
 5 |     build: ./
 6 |     environment: 
 7 |       OPENAI_API_KEY: ${OPENAI_API_KEY}
 8 |       TAVILY_API_KEY: ${TAVILY_API_KEY}
 9 |       LANGCHAIN_API_KEY: ${LANGCHAIN_API_KEY}
10 |     ports:
11 |       - 8000:8000


--------------------------------------------------------------------------------
/docs/CNAME:
--------------------------------------------------------------------------------
1 | docs.gptr.dev


--------------------------------------------------------------------------------
/docs/README.md:
--------------------------------------------------------------------------------
 1 | # Website
 2 | 
 3 | This website is built using [Docusaurus 2](https://docusaurus.io/), a modern static website generator.
 4 | 
 5 | ## Prerequisites
 6 | 
 7 | To build and test documentation locally, begin by downloading and installing [Node.js](https://nodejs.org/en/download/), and then installing [Yarn](https://classic.yarnpkg.com/en/).
 8 | On Windows, you can install via the npm package manager (npm) which comes bundled with Node.js:
 9 | 
10 | ```console
11 | npm install --global yarn
12 | ```
13 | 
14 | ## Installation
15 | 
16 | ```console
17 | pip install pydoc-markdown
18 | cd website
19 | yarn install
20 | ```
21 | 
22 | ## Local Development
23 | 
24 | Navigate to the website folder and run:
25 | 
26 | ```console
27 | pydoc-markdown
28 | yarn start
29 | ```
30 | 
31 | This command starts a local development server and opens up a browser window. Most changes are reflected live without having to restart the server.
32 | 


--------------------------------------------------------------------------------
/docs/babel.config.js:
--------------------------------------------------------------------------------
1 | module.exports = {
2 |   presets: [require.resolve('@docusaurus/core/lib/babel/preset')],
3 | };
4 | 


--------------------------------------------------------------------------------
/docs/blog/2023-09-22-gpt-researcher/architecture.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hwchase17/gpt-researcher/df4350939dbe42a059d7cd00aec0ae61388c9e35/docs/blog/2023-09-22-gpt-researcher/architecture.png


--------------------------------------------------------------------------------
/docs/blog/2023-09-22-gpt-researcher/planner.jpeg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hwchase17/gpt-researcher/df4350939dbe42a059d7cd00aec0ae61388c9e35/docs/blog/2023-09-22-gpt-researcher/planner.jpeg


--------------------------------------------------------------------------------
/docs/blog/2023-11-12-openai-assistant/diagram-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hwchase17/gpt-researcher/df4350939dbe42a059d7cd00aec0ae61388c9e35/docs/blog/2023-11-12-openai-assistant/diagram-1.png


--------------------------------------------------------------------------------
/docs/blog/2023-11-12-openai-assistant/diagram-assistant.jpeg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hwchase17/gpt-researcher/df4350939dbe42a059d7cd00aec0ae61388c9e35/docs/blog/2023-11-12-openai-assistant/diagram-assistant.jpeg


--------------------------------------------------------------------------------
/docs/blog/2024-05-19-gptr-langgraph/architecture.jpeg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hwchase17/gpt-researcher/df4350939dbe42a059d7cd00aec0ae61388c9e35/docs/blog/2024-05-19-gptr-langgraph/architecture.jpeg


--------------------------------------------------------------------------------
/docs/blog/2024-05-19-gptr-langgraph/blog-langgraph.jpeg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hwchase17/gpt-researcher/df4350939dbe42a059d7cd00aec0ae61388c9e35/docs/blog/2024-05-19-gptr-langgraph/blog-langgraph.jpeg


--------------------------------------------------------------------------------
/docs/blog/authors.yml:
--------------------------------------------------------------------------------
1 | assafe:
2 |   name: Assaf Elovic
3 |   title: Creator @ GPT Researcher
4 |   url: https://github.com/assafelovic
5 |   image_url: https://lh3.googleusercontent.com/a/ACg8ocJtrLku69VG_2Y0sJa5mt66gIGNaEBX5r_mgE6CRPEb7A=s96-c
6 | 


--------------------------------------------------------------------------------
/docs/docs/contribute.md:
--------------------------------------------------------------------------------
1 | # Contribute
2 | 
3 | We highly welcome contributions! Please check out [contributing](https://github.com/assafelovic/gpt-researcher/blob/master/CONTRIBUTING.md) if you're interested.
4 | 
5 | Please check out our [roadmap](https://trello.com/b/3O7KBePw/gpt-researcher-roadmap) page and reach out to us via our [Discord community](https://discord.gg/2pFkc83fRq) if you're interested in joining our mission.


--------------------------------------------------------------------------------
/docs/docs/examples/examples.md:
--------------------------------------------------------------------------------
 1 | # Examples
 2 | 
 3 | ### Run PIP Package
 4 | ```python
 5 | from gpt_researcher import GPTResearcher
 6 | import asyncio
 7 | 
 8 | 
 9 | async def main():
10 |     """
11 |     This is a sample script that shows how to run a research report.
12 |     """
13 |     # Query
14 |     query = "What happened in the latest burning man floods?"
15 | 
16 |     # Report Type
17 |     report_type = "research_report"
18 | 
19 |     # Initialize the researcher
20 |     researcher = GPTResearcher(query=query, report_type=report_type, config_path=None)
21 |     # Conduct research on the given query
22 |     await researcher.conduct_research()
23 |     # Write the report
24 |     report = await researcher.write_report()
25 |     
26 |     return report
27 | 
28 | 
29 | if __name__ == "__main__":
30 |     asyncio.run(main())
31 | ```


--------------------------------------------------------------------------------
/docs/docs/faq.md:
--------------------------------------------------------------------------------
 1 | # Frequently Asked Questions
 2 | 
 3 | ### How do I get started?
 4 | It really depends on what you're aiming for. 
 5 | 
 6 | If you're looking to connect your AI application to the internet with Tavily tailored API, check out the [Tavily API](https://docs.tavily.com/docs/tavily-api/introductionn) documentation. 
 7 | If you're looking to build and deploy our open source autonomous research agent GPT Researcher, please see [GPT Researcher](/docs/gpt-researcher/introduction) documentation.
 8 | You can also check out demos and examples for inspiration [here](/docs/examples/examples).
 9 | ### What is GPT Researcher?
10 | GPT Researcher is a popular open source autonomous research agent that takes care of the tedious task of research for you, by scraping, filtering and aggregating over 20+ web sources per a single research task.
11 | 
12 | GPT Researcher is built with best practices for leveraging LLMs (prompt engineering, RAG, chains, embeddings, etc), and is optimized for quick and efficient research. It is also fully customizable and can be tailored to your specific needs.
13 | 
14 | To learn more about GPT Researcher, check out the [documentation page](/docs/gpt-researcher/introduction).
15 | ### How much does each research run cost?
16 | A research task using GPT Researcher costs around $0.01 per a single run (for GPT-4 usage). We're constantly optimizing LLM calls to reduce costs and improve performance. 
17 | ### How do you ensure the report is factual and accurate?
18 | we do our best to ensure that the information we provide is factual and accurate. We do this by using multiple sources, and by using proprietary AI to score and rank the most relevant and accurate information. We also use proprietary AI to filter out irrelevant information and sources.
19 | 
20 | Lastly, by using RAG and other techniques, we ensure that the information is relevant to the context of the research task, leading to more accurate generative AI content and reduced hallucinations.
21 | 
22 | ### What are your plans for the future?
23 | We're constantly working on improving our products and services. We're currently working on improving our search API together with design partners, and adding more data sources to our search engine. We're also working on improving our research agent GPT Researcher, and adding more features to it while growing our amazing open source community.
24 | 
25 | If you're interested in our roadmap or looking to collaborate, check out our [roadmap page](https://trello.com/b/3O7KBePw/gpt-researcher-roadmap). 
26 | 
27 | Feel free to [contact us](mailto:assafelovic@gmail.com) if you have any further questions or suggestions!


--------------------------------------------------------------------------------
/docs/docs/gpt-researcher/config.md:
--------------------------------------------------------------------------------
 1 | # Introduction
 2 | 
 3 | The config.py enables you to customize GPT Researcher to your specific needs and preferences.
 4 | 
 5 | Thanks to our amazing community and contributions, GPT Researcher supports multiple LLMs and Retrievers.
 6 | In addition, GPT Researcher can be tailored to various report formats (such as APA), word count, research iterations depth, etc.
 7 | 
 8 | GPT Researcher defaults to our recommended suite of integrations: [OpenAI](https://platform.openai.com/docs/overview) for LLM calls and [Tavily API](https://app.tavily.com) for retrieving realtime online information.
 9 | 
10 | As seen below, OpenAI still stands as the superior LLM. We assume it will stay this way for some time, and that prices will only continue to decrease, while performance and speed increase over time.
11 | 
12 | <div style={{ marginBottom: '10px' }}>
13 | <img align="center" height="350" src="/img/leaderboard.png" />
14 | </div>
15 | 
16 | Here is an example of the default config.py file found in `/gpt_researcher/config/`:
17 | 
18 | ```python
19 | import os
20 | def __init__(self, config_file: str = None):
21 |     """Initialize the config class."""
22 |     self.config_file = os.path.expanduser(config_file) if config_file else os.getenv('CONFIG_FILE')
23 |     self.retriever = os.getenv('RETRIEVER', "tavily")
24 |     self.embedding_provider = os.getenv('EMBEDDING_PROVIDER', 'openai')
25 |     self.llm_provider = os.getenv('LLM_PROVIDER', "openai")
26 |     self.fast_llm_model = os.getenv('FAST_LLM_MODEL', "gpt-3.5-turbo-16k")
27 |     self.smart_llm_model = os.getenv('SMART_LLM_MODEL', "gpt-4o")
28 |     self.fast_token_limit = int(os.getenv('FAST_TOKEN_LIMIT', 2000))
29 |     self.smart_token_limit = int(os.getenv('SMART_TOKEN_LIMIT', 4000))
30 |     self.browse_chunk_max_length = int(os.getenv('BROWSE_CHUNK_MAX_LENGTH', 8192))
31 |     self.summary_token_limit = int(os.getenv('SUMMARY_TOKEN_LIMIT', 700))
32 |     self.temperature = float(os.getenv('TEMPERATURE', 0.55))
33 |     self.user_agent = os.getenv('USER_AGENT', "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 "
34 |                                                "(KHTML, like Gecko) Chrome/119.0.0.0 Safari/537.36 Edg/119.0.0.0")
35 |     self.max_search_results_per_query = int(os.getenv('MAX_SEARCH_RESULTS_PER_QUERY', 5))
36 |     self.memory_backend = os.getenv('MEMORY_BACKEND', "local")
37 |     self.total_words = int(os.getenv('TOTAL_WORDS', 800))
38 |     self.report_format = os.getenv('REPORT_FORMAT', "APA")
39 |     self.max_iterations = int(os.getenv('MAX_ITERATIONS', 3))
40 |     self.agent_role = os.getenv('AGENT_ROLE', None)
41 |     self.scraper = os.getenv("SCRAPER", "bs")
42 |     self.max_subtopics = os.getenv("MAX_SUBTOPICS", 3)
43 |     self.doc_path = os.getenv("DOC_PATH", "")
44 | ```
45 | To change the default configurations, you can simply add env variables to your `.env` file as named in the config.py file.
46 | 
47 | Please note that you can also include your own external JSON file `config.json` by adding the path in the `config_file` param.
48 | 
49 | To learn more about additional LLM support you can check out the docs [here](/docs/gpt-researcher/llms).
50 | 
51 | You can also change the search engine by modifying the `retriever` param to others such as `duckduckgo`, `bing`, `google`, `serper`, `searx` and more. [Check here](https://github.com/assafelovic/gpt-researcher/tree/master/gpt_researcher/retrievers) for supported retrievers.
52 | 
53 | Please note that you might need to sign up and obtain an API key for any of the other supported retrievers and LLM providers.


--------------------------------------------------------------------------------
/docs/docs/gpt-researcher/example.md:
--------------------------------------------------------------------------------
 1 | # Agent Example
 2 | 
 3 | If you're interested in using GPT Researcher as a standalone agent, you can easily import it into any existing Python project. Below, is an example of calling the agent to generate a research report:
 4 | 
 5 | ```python
 6 | from gpt_researcher import GPTResearcher
 7 | import asyncio
 8 | 
 9 | # It is best to define global constants at the top of your script
10 | QUERY = "What happened in the latest burning man floods?"
11 | REPORT_TYPE = "research_report"
12 | 
13 | async def fetch_report(query, report_type):
14 |     """
15 |     Fetch a research report based on the provided query and report type.
16 |     """
17 |     researcher = GPTResearcher(query=query, report_type=report_type, config_path=None)
18 |     await researcher.conduct_research()
19 |     report = await researcher.write_report()
20 |     return report
21 | 
22 | async def generate_research_report():
23 |     """
24 |     This is a sample script that executes an async main function to run a research report.
25 |     """
26 |     report = await fetch_report(QUERY, REPORT_TYPE)
27 |     print(report)
28 | 
29 | if __name__ == "__main__":
30 |     asyncio.run(generate_research_report())
31 | ```
32 | 
33 | You can further enhance this example to use the returned report as context for generating valuable content such as news article, marketing content, email templates, newsletters, etc.
34 | 
35 | You can also use GPT Researcher to gather information about code documentation, business analysis, financial information and more. All of which can be used to complete much more complex tasks that require factual and high quality realtime information.
36 | 


--------------------------------------------------------------------------------
/docs/docs/gpt-researcher/getting-started.md:
--------------------------------------------------------------------------------
  1 | # Getting Started
  2 | > **Step 0** - Install Python 3.11 or later. [See here](https://www.tutorialsteacher.com/python/install-python) for a step-by-step guide.
  3 | 
  4 | > **Step 1** - Download the project and navigate to its directory
  5 | 
  6 | ```bash
  7 | $ git clone https://github.com/assafelovic/gpt-researcher.git
  8 | $ cd gpt-researcher
  9 | ```
 10 | 
 11 | > **Step 3** - Set up API keys using two methods: exporting them directly or storing them in a `.env` file.
 12 | 
 13 | For Linux/Temporary Windows Setup, use the export method:
 14 | 
 15 | ```bash
 16 | export OPENAI_API_KEY={Your OpenAI API Key here}
 17 | export TAVILY_API_KEY={Your Tavily API Key here}
 18 | ```
 19 | 
 20 | For a more permanent setup, create a `.env` file in the current `gpt-researcher` folder and input the keys as follows:
 21 | 
 22 | ```bash
 23 | OPENAI_API_KEY={Your OpenAI API Key here}
 24 | TAVILY_API_KEY={Your Tavily API Key here}
 25 | ```
 26 | 
 27 | - **For LLM, we recommend [OpenAI GPT](https://platform.openai.com/docs/guides/gpt)**, but you can use any other LLM model (including open sources) supported by [Langchain Adapter](https://python.langchain.com/docs/guides/adapters/openai), simply change the llm model and provider in config/config.py. 
 28 | - **For search engine, we recommend [Tavily Search API](https://app.tavily.com)**, but you can also refer to other search engines of your choice by changing the search provider in config/config.py to `"duckduckgo"`, `"googleAPI"`, `"bing"`, `"googleSerp"`, or `"searx"`. Then add the corresponding env API key as seen in the config.py file.
 29 | 
 30 | ## Quickstart
 31 | 
 32 | > **Step 1** - Install dependencies
 33 | 
 34 | ```bash
 35 | $ pip install -r requirements.txt
 36 | ```
 37 | 
 38 | > **Step 2** - Run the agent with FastAPI
 39 | 
 40 | ```bash
 41 | $ uvicorn main:app --reload
 42 | ```
 43 | 
 44 | > **Step 3** - Go to http://localhost:8000 on any browser and enjoy researching!
 45 | 
 46 | ## Using Virtual Environment or Poetry
 47 | Select either based on your familiarity with each:
 48 | 
 49 | ### Virtual Environment
 50 | 
 51 | #### *Establishing the Virtual Environment with Activate/Deactivate configuration*
 52 | 
 53 | Create a virtual environment using the `venv` package with the environment name `<your_name>`, for example, `env`. Execute the following command in the PowerShell/CMD terminal:
 54 | 
 55 | ```bash
 56 | python -m venv env
 57 | ```
 58 | 
 59 | To activate the virtual environment, use the following activation script in PowerShell/CMD terminal:
 60 | 
 61 | ```bash
 62 | .\env\Scripts\activate
 63 | ```
 64 | 
 65 | To deactivate the virtual environment, run the following deactivation script in PowerShell/CMD terminal:
 66 | 
 67 | ```bash
 68 | deactivate
 69 | ```
 70 | 
 71 | #### *Install the dependencies for a Virtual environment*
 72 | 
 73 | After activating the `env` environment, install dependencies using the `requirements.txt` file with the following command:
 74 | 
 75 | ```bash
 76 | python -m pip install -r requirements.txt
 77 | ```
 78 | 
 79 | <br />
 80 | 
 81 | ### Poetry
 82 | 
 83 | #### *Establishing the Poetry dependencies and virtual environment with Poetry version `~1.7.1`*
 84 | 
 85 | Install project dependencies and simultaneously create a virtual environment for the specified project. By executing this command, Poetry reads the project's "pyproject.toml" file to determine the required dependencies and their versions, ensuring a consistent and isolated development environment. The virtual environment allows for a clean separation of project-specific dependencies, preventing conflicts with system-wide packages and enabling more straightforward dependency management throughout the project's lifecycle.
 86 | 
 87 | ```bash
 88 | poetry install
 89 | ```
 90 | 
 91 | #### *Activate the virtual environment associated with a Poetry project*
 92 | 
 93 | By running this command, the user enters a shell session within the isolated environment associated with the project, providing a dedicated space for development and execution. This virtual environment ensures that the project dependencies are encapsulated, avoiding conflicts with system-wide packages. Activating the Poetry shell is essential for seamlessly working on a project, as it ensures that the correct versions of dependencies are used and provides a controlled environment conducive to efficient development and testing.
 94 | 
 95 | ```bash
 96 | poetry shell
 97 | ```
 98 | 
 99 | ### *Run the app*
100 | > Launch the FastAPI application agent on a *Virtual Environment or Poetry* setup by executing the following command:
101 | ```bash
102 | python -m uvicorn main:app --reload
103 | ```
104 | > Visit http://localhost:8000 in any web browser and explore your research!
105 | 
106 | <br />
107 | 
108 | 
109 | ## Try it with Docker
110 | 
111 | > **Step 1** - Install Docker
112 | 
113 | Follow instructions at https://docs.docker.com/engine/install/
114 | 
115 | > **Step 2** - Create .env file with your OpenAI Key or simply export it
116 | 
117 | ```bash
118 | $ export OPENAI_API_KEY={Your API Key here}
119 | $ export TAVILY_API_KEY={Your Tavily API Key here}
120 | ```
121 | 
122 | > **Step 3** - Run the application
123 | 
124 | ```bash
125 | $ docker-compose up
126 | ```
127 | 
128 | > **Step 4** - Go to http://localhost:8000 on any browser and enjoy researching!
129 | 


--------------------------------------------------------------------------------
/docs/docs/gpt-researcher/introduction.md:
--------------------------------------------------------------------------------
 1 | # Introduction
 2 | 
 3 | **[GPT Researcher](https://gptr.dev) is an autonomous agent designed for comprehensive online research on a variety of tasks.** 
 4 | 
 5 | The agent can produce detailed, factual and unbiased research reports, with customization options for focusing on relevant resources, outlines, and lessons. Inspired by the recent [Plan-and-Solve](https://arxiv.org/abs/2305.04091) and [RAG](https://arxiv.org/abs/2005.11401) papers, GPT Researcher addresses issues of speed, determinism and reliability, offering a more stable performance and increased speed through parallelized agent work, as opposed to synchronous operations.
 6 | 
 7 | ## Why GPT Researcher?
 8 | 
 9 | - To form objective conclusions for manual research tasks can take time, sometimes weeks to find the right resources and information.
10 | - Current LLMs are trained on past and outdated information, with heavy risks of hallucinations, making them almost irrelevant for research tasks.
11 | - Solutions that enable web search (such as ChatGPT + Web Plugin), only consider limited resources and content that in some cases result in superficial conclusions or biased answers.
12 | - Using only a selection of resources can create bias in determining the right conclusions for research questions or tasks. 
13 | 
14 | ## Architecture
15 | The main idea is to run "planner" and "execution" agents, whereas the planner generates questions to research, and the execution agents seek the most related information based on each generated research question. Finally, the planner filters and aggregates all related information and creates a research report. <br /> <br /> 
16 | The agents leverage both gpt3.5-turbo and gpt-4-turbo (128K context) to complete a research task. We optimize for costs using each only when necessary. **The average research task takes around 3 minutes to complete, and costs ~$0.1.**
17 | 
18 | <div align="center">
19 | <img align="center" height="500" src="https://cowriter-images.s3.amazonaws.com/architecture.png" />
20 | </div>
21 | 
22 | 
23 | More specifically:
24 | * Create a domain specific agent based on research query or task.
25 | * Generate a set of research questions that together form an objective opinion on any given task. 
26 | * For each research question, trigger a crawler agent that scrapes online resources for information relevant to the given task.
27 | * For each scraped resources, summarize based on relevant information and keep track of its sources.
28 | * Finally, filter and aggregate all summarized sources and generate a final research report.
29 | 
30 | ## Demo
31 | <iframe height="400" width="700" src="https://github.com/assafelovic/gpt-researcher/assets/13554167/a00c89a6-a295-4dd0-b58d-098a31c40fda" frameborder="0" allow="autoplay; encrypted-media" allowfullscreen></iframe>
32 | 
33 | ## Tutorials
34 |  - [How it Works](https://medium.com/better-programming/how-i-built-an-autonomous-ai-agent-for-online-research-93435a97c6c)
35 |  - [How to Install](https://www.loom.com/share/04ebffb6ed2a4520a27c3e3addcdde20?sid=da1848e8-b1f1-42d1-93c3-5b0b9c3b24ea)
36 |  - [Live Demo](https://www.loom.com/share/6a3385db4e8747a1913dd85a7834846f?sid=a740fd5b-2aa3-457e-8fb7-86976f59f9b8)
37 |  - [Homepage](https://gptr.dev)
38 | 
39 | ## Features
40 | - 📝 Generate research, outlines, resources and lessons reports
41 | - 📜 Can generate long and detailed research reports (over 2K words)
42 | - 🌐 Aggregates over 20 web sources per research to form objective and factual conclusions
43 | - 🖥️ Includes an easy-to-use web interface (HTML/CSS/JS)
44 | - 🔍 Scrapes web sources with javascript support
45 | - 📂 Keeps track and context of visited and used web sources
46 | - 📄 Export research reports to PDF, Word and more...
47 | 
48 | 
49 | ## Disclaimer
50 | 
51 | This project, GPT Researcher, is an experimental application and is provided "as-is" without any warranty, express or implied. We are sharing codes for academic purposes under the MIT license. Nothing herein is academic advice, and NOT a recommendation to use in academic or research papers.
52 | 
53 | Our view on unbiased research claims:
54 | 1. The whole point of our scraping system is to reduce incorrect fact. How? The more sites we scrape the less chances of incorrect data. We are scraping 20 per research, the chances that they are all wrong is extremely low.
55 | 2. We do not aim to eliminate biases; we aim to reduce it as much as possible. **We are here as a community to figure out the most effective human/llm interactions.**
56 | 3. In research, people also tend towards biases as most have already opinions on the topics they research about. This tool scrapes many opinions and will evenly explain diverse views that a biased person would never have read.
57 | 
58 | **Please note that the use of the GPT-4 language model can be expensive due to its token usage.** By utilizing this project, you acknowledge that you are responsible for monitoring and managing your own token usage and the associated costs. It is highly recommended to check your OpenAI API usage regularly and set up any necessary limits or alerts to prevent unexpected charges.
59 | 


--------------------------------------------------------------------------------
/docs/docs/gpt-researcher/langgraph.md:
--------------------------------------------------------------------------------
 1 | # GPTR x LangGraph
 2 | 
 3 | We are strong advocates for the future of AI agents, envisioning a world where autonomous agents communicate and collaborate as a cohesive team to undertake and complete complex tasks.
 4 | 
 5 | We hold the belief that research is a pivotal element in successfully tackling these complex tasks, ensuring superior outcomes.
 6 | 
 7 | Consider the scenario of developing a coding agent responsible for coding tasks using the latest API documentation and best practices. It would be wise to integrate an agent specializing in research to curate the most recent and relevant documentation, before crafting a technical design that would subsequently be handed off to the coding assistant tasked with generating the code. This approach is applicable across various sectors, including finance, business analysis, healthcare, marketing, and legal, among others.
 8 | 
 9 | One multi-agent framework that we're excited about is [LangGraph](https://python.langchain.com/docs/langgraph/), built by the team at [Langchain](https://www.langchain.com/).
10 | LangGraph is a Python library for building stateful, multi-actor applications with LLMs. It extends the [LangChain Expression Language](https://python.langchain.com/docs/expression_language/) with the ability to coordinate multiple chains (or actors) across multiple steps of computation.
11 | 
12 | What's great about LangGraph is that it follows a DAG architecture, enabling each specialized agent to communicate with one another, and subsequently trigger actions among other agents within the graph. 
13 | 
14 | We've added an example for leveraging [GPT Researcher with LangGraph](https://github.com/assafelovic/gpt-researcher/tree/master/multi_agents) which can be found in `/multi_agents`.
15 | 
16 | The example demonstrates a generic use case for an editorial agent team that works together to complete a research report on a given task.
17 | 
18 | ## The Multi Agent Team
19 | The research team is made up of 7 AI agents:
20 | - **Chief Editor** - Oversees the research process and manages the team. This is the "master" agent that coordinates the other agents using Langgraph.
21 | - **Researcher** (gpt-researcher) - A specialized autonomous agent that conducts in depth research on a given topic.
22 | - **Editor** - Responsible for planning the research outline and structure.
23 | - **Reviewer** - Validates the correctness of the research results given a set of criteria.
24 | - **Revisor** - Revises the research results based on the feedback from the reviewer.
25 | - **Writer** - Responsible for compiling and writing the final report.
26 | - **Publisher** - Responsible for publishing the final report in various formats.
27 | 
28 | ## How it works
29 | Generally, the process is based on the following stages: 
30 | 1. Planning stage
31 | 2. Data collection and analysis
32 | 3. Writing and submission
33 | 4. Review and revision
34 | 5. Publication
35 | 
36 | ### Architecture
37 | <div align="center">
38 | <img align="center" height="600" src="https://cowriter-images.s3.amazonaws.com/gptr-langgraph-architecture.png"></img>
39 | </div>
40 | <br clear="all"/>
41 | 
42 | ### Steps
43 | More specifically (as seen in the architecture diagram) the process is as follows:
44 | - Browser (gpt-researcher) - Browses the internet for initial research based on the given research task.
45 | - Editor - Plans the report outline and structure based on the initial research.
46 | - For each outline topic (in parallel):
47 |   - Researcher (gpt-researcher) - Runs an in depth research on the subtopics and writes a draft.
48 |   - Reviewer - Validates the correctness of the draft given a set of criteria and provides feedback.
49 |   - Revisor - Revises the draft until it is satisfactory based on the reviewer feedback.
50 | - Writer - Compiles and writes the final report including an introduction, conclusion and references section from the given research findings.
51 | - Publisher - Publishes the final report to multi formats such as PDF, Docx, Markdown, etc.
52 | 
53 | ## How to run
54 | 1. Install required packages:
55 |     ```bash
56 |     pip install -r requirements.txt
57 |     ```
58 | 2. Run the application:
59 |     ```bash
60 |     python main.py
61 |     ```
62 | 
63 | ## Usage
64 | To change the research query and customize the report, edit the `task.json` file in the main directory.
65 | #### Task.json contains the following fields:
66 | - `query` - The research query or task.
67 | - `model` - The OpenAI LLM to use for the agents.
68 | - `max_sections` - The maximum number of sections in the report. Each section is a subtopic of the research query.
69 | - `publish_formats` - The formats to publish the report in. The reports will be written in the `output` directory.
70 | - `follow_guidelines` - If true, the research report will follow the guidelines below. It will take longer to complete. If false, the report will be generated faster but may not follow the guidelines.
71 | - `guidelines` - A list of guidelines that the report must follow.
72 | - `verbose` - If true, the application will print detailed logs to the console.
73 | 
74 | #### For example:
75 | ```json
76 | {
77 |   "query": "Is AI in a hype cycle?",
78 |   "model": "gpt-4o",
79 |   "max_sections": 3, 
80 |   "publish_formats": { 
81 |     "markdown": true,
82 |     "pdf": true,
83 |     "docx": true
84 |   },
85 |   "follow_guidelines": true,
86 |   "guidelines": [
87 |     "The report MUST fully answer the original question",
88 |     "The report MUST be written in apa format",
89 |     "The report MUST be written in english"
90 |   ],
91 |   "verbose": true
92 | }
93 | ```
94 | 


--------------------------------------------------------------------------------
/docs/docs/gpt-researcher/llms.md:
--------------------------------------------------------------------------------
 1 | # Configure LLM
 2 | As described in the [introduction](/docs/gpt-researcher/config), the default LLM is OpenAI due to its superior performance and speed. 
 3 | However, GPT Researcher supports various open/closed source LLMs, and you can easily switch between them by adding the `LLM_PROVIDER` env variable and corresponding configuration params.
 4 | 
 5 | Below you can find how to configure the various supported LLMs.
 6 | 
 7 | ## OpenAI
 8 | 
 9 | ## Ollama
10 | 
11 | ## Groq
12 | 
13 | GroqCloud provides advanced AI hardware and software solutions designed to deliver amazingly fast AI inference performance.
14 | To leverage Groq in GPT-Researcher, you will need a GroqCloud account and an API Key. (__NOTE:__ Groq has a very _generous free tier_.)
15 | 
16 | - You can signup here: [https://console.groq.com/login](https://console.groq.com/login)
17 | - Once you are logged in, you can get an API Key here: [https://console.groq.com/keys](https://console.groq.com/keys)
18 | 
19 | - Once you have an API key, you will need to add it to your `systems environment` using the variable name:
20 | `GROQ_API_KEY="*********************"`
21 | 
22 | 
23 | And finally, you will need to configure the GPT-Researcher Provider and Model variables:
24 | 
25 | ```bash
26 | # To use Groq set the llm provider to groq
27 | LLM_PROVIDER=groq
28 | 
29 | # Set one of the LLM models supported by Groq
30 | FAST_LLM_MODEL=Mixtral-8x7b-32768
31 | 
32 | # Set one of the LLM models supported by Groq
33 | SMART_LLM_MODEL=Mixtral-8x7b-32768 
34 | 
35 | # The temperature to use defaults to 0.55
36 | TEMPERATURE=0.55
37 | ```
38 | 
39 | __NOTE:__ As of the writing of this Doc (May 2024), the available Language Models from Groq are:
40 | 
41 | * Llama3-70b-8192
42 | * Llama3-8b-8192
43 | * Mixtral-8x7b-32768
44 | * Gemma-7b-it
45 | 
46 | ## Anthropic
47 | 
48 | ...
49 | 


--------------------------------------------------------------------------------
/docs/docs/gpt-researcher/pip-package.md:
--------------------------------------------------------------------------------
  1 | # PIP Package
  2 | [![PyPI version](https://badge.fury.io/py/gpt-researcher.svg)](https://badge.fury.io/py/gpt-researcher)
  3 | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/assafelovic/gpt-researcher/blob/master/examples/pip-run.ipynb)
  4 | 
  5 | 🌟 **Exciting News!** Now, you can integrate `gpt-researcher` with your apps seamlessly!
  6 | 
  7 | ## Steps to Install GPT Researcher 🛠️
  8 | 
  9 | Follow these easy steps to get started:
 10 | 
 11 | 0. **Pre-requisite**: Ensure Python 3.10+ is installed on your machine 💻
 12 | 1. **Install gpt-researcher**: Grab the official package from [PyPi](https://pypi.org/project/gpt-researcher/).
 13 | 
 14 | ```bash
 15 | pip install gpt-researcher
 16 | ```
 17 | 
 18 | 2. **Environment Variables:** Create a .env file with your OpenAI API key or simply export it
 19 | 
 20 | ```bash
 21 | export OPENAI_API_KEY={Your OpenAI API Key here}
 22 | ```
 23 | 
 24 | ```bash
 25 | export TAVILY_API_KEY={Your Tavily API Key here}
 26 | ```
 27 | 
 28 | 3. **Start using GPT Researcher in your own codebase**
 29 | 
 30 | ## Example Usage 📝
 31 | 
 32 | ```python
 33 | from gpt_researcher import GPTResearcher
 34 | import asyncio
 35 | 
 36 | 
 37 | from gpt_researcher import GPTResearcher
 38 | import asyncio
 39 | 
 40 | 
 41 | async def get_report(query: str, report_type: str) -> str:
 42 |     researcher = GPTResearcher(query, report_type)
 43 |     research_result = await researcher.conduct_research()
 44 |     report = await researcher.write_report()
 45 |     return report
 46 | 
 47 | if __name__ == "__main__":
 48 |     query = "what team may win the NBA finals?"
 49 |     report_type = "research_report"
 50 | 
 51 |     report = asyncio.run(get_report(query, report_type))
 52 |     print(report)
 53 | ```
 54 | 
 55 | ## Specific Examples 🌐
 56 | 
 57 | ### Example 1: Research Report 📚
 58 | 
 59 | ```python
 60 | query = "Latest developments in renewable energy technologies"
 61 | report_type = "research_report"
 62 | ```
 63 | 
 64 | ### Example 2: Resource Report 📋
 65 | 
 66 | ```python
 67 | query = "List of top AI conferences in 2023"
 68 | report_type = "resource_report"
 69 | ```
 70 | 
 71 | ### Example 3: Outline Report 📝
 72 | 
 73 | ```python
 74 | query = "Outline for an article on the impact of AI in education"
 75 | report_type = "outline_report"
 76 | ```
 77 | 
 78 | ## Integration with Web Frameworks 🌍
 79 | 
 80 | ### FastAPI Example
 81 | 
 82 | ```python
 83 | from fastapi import FastAPI
 84 | from gpt_researcher import GPTResearcher
 85 | import asyncio
 86 | 
 87 | app = FastAPI()
 88 | 
 89 | @app.get("/report/{report_type}")
 90 | async def get_report(query: str, report_type: str) -> dict:
 91 |     researcher = GPTResearcher(query, report_type)
 92 |     research_result = await researcher.conduct_research()
 93 |     report = await researcher.write_report()
 94 |     return {"report": report}
 95 | 
 96 | # Run the server
 97 | # uvicorn main:app --reload
 98 | ```
 99 | 
100 | ### Flask Example
101 | 
102 | **Pre-requisite**: Install flask with the async extra.
103 | 
104 | ```bash
105 | pip install 'flask[async]'
106 | ```
107 | 
108 | ```python
109 | from flask import Flask, request
110 | from gpt_researcher import GPTResearcher
111 | 
112 | app = Flask(__name__)
113 | 
114 | @app.route('/report/<report_type>', methods=['GET'])
115 | async def get_report(report_type):
116 |     query = request.args.get('query')
117 |     researcher = GPTResearcher(query, report_type)
118 |     research_result = await researcher.conduct_research()
119 |     report = await researcher.write_report()
120 |     return report
121 | 
122 | # Run the server
123 | # flask run
124 | ```
125 | **Run the server**
126 | 
127 | ```bash
128 | flask run
129 | ```
130 | 
131 | **Example Request**
132 | 
133 | ```bash
134 | curl -X GET "http://localhost:5000/report/research_report?query=what team may win the nba finals?"
135 | ```
136 | 
137 | **Note**: The above code snippets are just examples. You can customize them as per your requirements.
138 | 


--------------------------------------------------------------------------------
/docs/docs/gpt-researcher/roadmap.md:
--------------------------------------------------------------------------------
1 | # Roadmap
2 | 
3 | We're constantly working on additional features and improvements to our products and services. We're also working on new products and services to help you build better AI applications using [GPT Researcher](https://gptr.dev).
4 | 
5 | Our vision is to build the #1 autonomous research agent for AI developers and researchers, and we're excited to have you join us on this journey!
6 | 
7 | The roadmap is prioritized based on the following goals: Performance, Quality, Modularity and Conversational flexibility. The roadmap is public and can be found [here](https://trello.com/b/3O7KBePw/gpt-researcher-roadmap). 
8 | 
9 | Interested in collaborating or contributing? Check out our [contributing page](/docs/contribute) for more information.


--------------------------------------------------------------------------------
/docs/docs/gpt-researcher/tailored-research.md:
--------------------------------------------------------------------------------
 1 | # Tailored Research
 2 | The GPT Researcher package allows you to tailor the research to your needs such as researching on specific sources or local documents, and even specify the agent prompt instruction upon which the research is conducted.
 3 | 
 4 | ### Research on Specific Sources 📚
 5 | 
 6 | You can specify the sources you want the GPT Researcher to research on by providing a list of URLs. The GPT Researcher will then conduct research on the provided sources.
 7 | 
 8 | ```python
 9 | from gpt_researcher import GPTResearcher
10 | import asyncio
11 | 
12 | async def get_report(query: str, report_type: str, sources: list) -> str:
13 |     researcher = GPTResearcher(query=query, report_type=report_type, source_urls=sources)
14 |     await researcher.conduct_research()
15 |     report = await researcher.write_report()
16 |     return report
17 | 
18 | if __name__ == "__main__":
19 |     query = "What are the latest advancements in AI?"
20 |     report_type = "research_report"
21 |     sources = ["https://en.wikipedia.org/wiki/Artificial_intelligence", "https://www.ibm.com/watson/ai"]
22 | 
23 |     report = asyncio.run(get_report(query, report_type, sources))
24 |     print(report)
25 | ```
26 | 
27 | ### Specify Agent Prompt 📝
28 | 
29 | You can specify the agent prompt instruction upon which the research is conducted. This allows you to guide the research in a specific direction and tailor the report layout.
30 | Simply pass the prompt as the `query` argument to the `GPTResearcher` class and the "custom_report" `report_type`.
31 | 
32 | ```python
33 | from gpt_researcher import GPTResearcher
34 | import asyncio
35 | 
36 | async def get_report(prompt: str, report_type: str) -> str:
37 |     researcher = GPTResearcher(query=prompt, report_type=report_type)
38 |     await researcher.conduct_research()
39 |     report = await researcher.write_report()
40 |     return report
41 |     
42 | if __name__ == "__main__":
43 |     report_type = "custom_report"
44 |     prompt = "Research the latest advancements in AI and provide a detailed report in APA format including sources."
45 | 
46 |     report = asyncio.run(get_report(prompt=prompt, report_type=report_type))
47 |     print(report)
48 | ```
49 | 
50 | ### Research on Local Documents 📄
51 | You can instruct the GPT Researcher to research on local documents by providing the path to those documents. Currently supported file formats are: PDF, plain text, CSV, Excel, Markdown, PowerPoint, and Word documents.
52 | 
53 | *Step 1*: Add the env variable `DOC_PATH` pointing to the folder where your documents are located.
54 | 
55 | For example:
56 | 
57 | ```bash
58 | export DOC_PATH="./my-docs"
59 | ```
60 | 
61 | *Step 2*: When you create an instance of the `GPTResearcher` class, pass the `report_source` argument as `"local"`.
62 | 
63 | GPT Researcher will then conduct research on the provided documents.
64 | 
65 | ```python
66 | from gpt_researcher import GPTResearcher
67 | import asyncio
68 | 
69 | async def get_report(query: str, report_type: str, report_source: str) -> str:
70 |     researcher = GPTResearcher(query=query, report_type=report_type, report_source=report_source)
71 |     await researcher.conduct_research()
72 |     report = await researcher.write_report()
73 |     return report
74 |     
75 | if __name__ == "__main__":
76 |     query = "What can you tell me about myself based on my documents?"
77 |     report_type = "research_report"
78 |     report_source = "local" # "local" or "web"
79 | 
80 |     report = asyncio.run(get_report(query=query, report_type=report_type, report_source=report_source))
81 |     print(report)
82 | ```


--------------------------------------------------------------------------------
/docs/docs/gpt-researcher/troubleshooting.md:
--------------------------------------------------------------------------------
 1 | # Troubleshooting
 2 | We're constantly working to provide a more stable version. If you're running into any issues, please first check out the resolved issues or ask us via our [Discord community](https://discord.gg/2pFkc83fRq).
 3 | 
 4 | ### model: gpt-4 does not exist
 5 | This relates to not having permission to use gpt-4 yet. Based on OpenAI, it will be [widely available for all by end of July](https://help.openai.com/en/articles/7102672-how-can-i-access-gpt-4).
 6 | 
 7 | ### cannot load library 'gobject-2.0-0'
 8 | 
 9 | The issue relates to the library WeasyPrint (which is used to generate PDFs from the research report). Please follow this guide to resolve it: https://doc.courtbouillon.org/weasyprint/stable/first_steps.html
10 | 
11 | Or you can install this package manually
12 | 
13 | In case of MacOS you can install this lib using
14 | `brew install glib gobject-introspection`
15 | 
16 | In case of Linux you can install this lib using
17 | `sudo apt install libglib2.0-dev`
18 | 
19 | **cannot load library 'pango'**
20 | 
21 | In case of MacOS you can install this lib using
22 | `brew install pango`
23 | 
24 | In case of Linux you can install this lib using
25 | `sudo apt install libpango-1.0-0`
26 | 
27 | **Workaround for Mac M chip users**
28 | 
29 | If the above solutions don't work, you can try the following:
30 | - Install a fresh version of Python 3.11 pointed to brew:
31 | `brew install python@3.11`
32 | - Install the required libraries:
33 | `brew install pango glib gobject-introspection`
34 | - Install the required GPT Researcher Python packages:
35 | `pip3.11 install -r requirements.txt`
36 | - Run the app with Python 3.11 (using brew):
37 | `python3.11 -m uvicorn main:app --reload`
38 | 
39 | **Error processing the url**
40 | 
41 | We're using [Selenium](https://www.selenium.dev) for site scraping. Some sites fail to be scraped. In these cases, restart and try running again.
42 | 
43 | 
44 | **Chrome version issues**
45 | 
46 | Many users have an issue with their chromedriver because the latest chrome browser version doesn't have a compatible chrome driver yet.
47 | 
48 | To downgrade your Chrome web browser using [slimjet](https://www.slimjet.com/chrome/google-chrome-old-version.php), follow these steps. First, visit the website and scroll down to find the list of available older Chrome versions. Choose the version you wish to install
49 | making sure it's compatible with your operating system.
50 | Once you've selected the desired version, click on the corresponding link to download the installer. Before proceeding with the installation, it's crucial to uninstall your current version of Chrome to avoid conflicts.
51 | 
52 | It's important to check if the version you downgrade to, has a chromedriver available in the official [chrome driver website](https://chromedriver.chromium.org/downloads)
53 | 
54 | **If none of the above work, you can [try out our hosted beta](https://app.tavily.com)**


--------------------------------------------------------------------------------
/docs/docs/reference/config/config.md:
--------------------------------------------------------------------------------
  1 | ---
  2 | sidebar_label: config
  3 | title: config.config
  4 | ---
  5 | 
  6 | Configuration class to store the state of bools for different scripts access.
  7 | 
  8 | ## Config Objects
  9 | 
 10 | ```python
 11 | class Config(metaclass=Singleton)
 12 | ```
 13 | 
 14 | Configuration class to store the state of bools for different scripts access.
 15 | 
 16 | #### \_\_init\_\_
 17 | 
 18 | ```python
 19 | def __init__() -> None
 20 | ```
 21 | 
 22 | Initialize the Config class
 23 | 
 24 | #### set\_fast\_llm\_model
 25 | 
 26 | ```python
 27 | def set_fast_llm_model(value: str) -> None
 28 | ```
 29 | 
 30 | Set the fast LLM model value.
 31 | 
 32 | #### set\_smart\_llm\_model
 33 | 
 34 | ```python
 35 | def set_smart_llm_model(value: str) -> None
 36 | ```
 37 | 
 38 | Set the smart LLM model value.
 39 | 
 40 | #### set\_fast\_token\_limit
 41 | 
 42 | ```python
 43 | def set_fast_token_limit(value: int) -> None
 44 | ```
 45 | 
 46 | Set the fast token limit value.
 47 | 
 48 | #### set\_smart\_token\_limit
 49 | 
 50 | ```python
 51 | def set_smart_token_limit(value: int) -> None
 52 | ```
 53 | 
 54 | Set the smart token limit value.
 55 | 
 56 | #### set\_browse\_chunk\_max\_length
 57 | 
 58 | ```python
 59 | def set_browse_chunk_max_length(value: int) -> None
 60 | ```
 61 | 
 62 | Set the browse_website command chunk max length value.
 63 | 
 64 | #### set\_openai\_api\_key
 65 | 
 66 | ```python
 67 | def set_openai_api_key(value: str) -> None
 68 | ```
 69 | 
 70 | Set the OpenAI API key value.
 71 | 
 72 | #### set\_debug\_mode
 73 | 
 74 | ```python
 75 | def set_debug_mode(value: bool) -> None
 76 | ```
 77 | 
 78 | Set the debug mode value.
 79 | 
 80 | ## APIKeyError Objects
 81 | 
 82 | ```python
 83 | class APIKeyError(Exception)
 84 | ```
 85 | 
 86 | Exception raised when an API key is not set in config.py or as an environment variable.
 87 | 
 88 | #### check\_openai\_api\_key
 89 | 
 90 | ```python
 91 | def check_openai_api_key(cfg) -> None
 92 | ```
 93 | 
 94 | Check if the OpenAI API key is set in config.py or as an environment variable.
 95 | 
 96 | #### check\_tavily\_api\_key
 97 | 
 98 | ```python
 99 | def check_tavily_api_key(cfg) -> None
100 | ```
101 | 
102 | Check if the Tavily Search API key is set in config.py or as an environment variable.
103 | 
104 | #### check\_google\_api\_key
105 | 
106 | ```python
107 | def check_google_api_key(cfg) -> None
108 | ```
109 | 
110 | Check if the Google API key is set in config.py or as an environment variable.
111 | 
112 | #### check\_serp\_api\_key
113 | 
114 | ```python
115 | def check_serp_api_key(cfg) -> None
116 | ```
117 | 
118 | Check if the SERP API key is set in config.py or as an environment variable.
119 | 
120 | #### check\_searx\_url
121 | 
122 | ```python
123 | def check_searx_url(cfg) -> None
124 | ```
125 | 
126 | Check if the Searx URL is set in config.py or as an environment variable.
127 | 
128 | 


--------------------------------------------------------------------------------
/docs/docs/reference/config/singleton.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | sidebar_label: singleton
 3 | title: config.singleton
 4 | ---
 5 | 
 6 | The singleton metaclass for ensuring only one instance of a class.
 7 | 
 8 | ## Singleton Objects
 9 | 
10 | ```python
11 | class Singleton(abc.ABCMeta, type)
12 | ```
13 | 
14 | Singleton metaclass for ensuring only one instance of a class.
15 | 
16 | #### \_\_call\_\_
17 | 
18 | ```python
19 | def __call__(cls, *args, **kwargs)
20 | ```
21 | 
22 | Call method for the singleton metaclass.
23 | 
24 | ## AbstractSingleton Objects
25 | 
26 | ```python
27 | class AbstractSingleton(abc.ABC, metaclass=Singleton)
28 | ```
29 | 
30 | Abstract singleton class for ensuring only one instance of a class.
31 | 
32 | 


--------------------------------------------------------------------------------
/docs/docs/reference/processing/html.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | sidebar_label: html
 3 | title: processing.html
 4 | ---
 5 | 
 6 | HTML processing functions
 7 | 
 8 | #### extract\_hyperlinks
 9 | 
10 | ```python
11 | def extract_hyperlinks(soup: BeautifulSoup,
12 |                        base_url: str) -> list[tuple[str, str]]
13 | ```
14 | 
15 | Extract hyperlinks from a BeautifulSoup object
16 | 
17 | **Arguments**:
18 | 
19 | - `soup` _BeautifulSoup_ - The BeautifulSoup object
20 | - `base_url` _str_ - The base URL
21 |   
22 | 
23 | **Returns**:
24 | 
25 |   List[Tuple[str, str]]: The extracted hyperlinks
26 | 
27 | #### format\_hyperlinks
28 | 
29 | ```python
30 | def format_hyperlinks(hyperlinks: list[tuple[str, str]]) -> list[str]
31 | ```
32 | 
33 | Format hyperlinks to be displayed to the user
34 | 
35 | **Arguments**:
36 | 
37 | - `hyperlinks` _List[Tuple[str, str]]_ - The hyperlinks to format
38 |   
39 | 
40 | **Returns**:
41 | 
42 | - `List[str]` - The formatted hyperlinks
43 | 
44 | 


--------------------------------------------------------------------------------
/docs/docs/reference/processing/text.md:
--------------------------------------------------------------------------------
  1 | ---
  2 | sidebar_label: text
  3 | title: processing.text
  4 | ---
  5 | 
  6 | Text processing functions
  7 | 
  8 | #### split\_text
  9 | 
 10 | ```python
 11 | def split_text(text: str,
 12 |                max_length: int = 8192) -> Generator[str, None, None]
 13 | ```
 14 | 
 15 | Split text into chunks of a maximum length
 16 | 
 17 | **Arguments**:
 18 | 
 19 | - `text` _str_ - The text to split
 20 | - `max_length` _int, optional_ - The maximum length of each chunk. Defaults to 8192.
 21 |   
 22 | 
 23 | **Yields**:
 24 | 
 25 | - `str` - The next chunk of text
 26 |   
 27 | 
 28 | **Raises**:
 29 | 
 30 | - `ValueError` - If the text is longer than the maximum length
 31 | 
 32 | #### summarize\_text
 33 | 
 34 | ```python
 35 | def summarize_text(url: str,
 36 |                    text: str,
 37 |                    question: str,
 38 |                    driver: Optional[WebDriver] = None) -> str
 39 | ```
 40 | 
 41 | Summarize text using the OpenAI API
 42 | 
 43 | **Arguments**:
 44 | 
 45 | - `url` _str_ - The url of the text
 46 | - `text` _str_ - The text to summarize
 47 | - `question` _str_ - The question to ask the model
 48 | - `driver` _WebDriver_ - The webdriver to use to scroll the page
 49 |   
 50 | 
 51 | **Returns**:
 52 | 
 53 | - `str` - The summary of the text
 54 | 
 55 | #### scroll\_to\_percentage
 56 | 
 57 | ```python
 58 | def scroll_to_percentage(driver: WebDriver, ratio: float) -> None
 59 | ```
 60 | 
 61 | Scroll to a percentage of the page
 62 | 
 63 | **Arguments**:
 64 | 
 65 | - `driver` _WebDriver_ - The webdriver to use
 66 | - `ratio` _float_ - The percentage to scroll to
 67 |   
 68 | 
 69 | **Raises**:
 70 | 
 71 | - `ValueError` - If the ratio is not between 0 and 1
 72 | 
 73 | #### create\_message
 74 | 
 75 | ```python
 76 | def create_message(chunk: str, question: str) -> Dict[str, str]
 77 | ```
 78 | 
 79 | Create a message for the chat completion
 80 | 
 81 | **Arguments**:
 82 | 
 83 | - `chunk` _str_ - The chunk of text to summarize
 84 | - `question` _str_ - The question to answer
 85 |   
 86 | 
 87 | **Returns**:
 88 | 
 89 |   Dict[str, str]: The message to send to the chat completion
 90 | 
 91 | #### write\_to\_file
 92 | 
 93 | ```python
 94 | def write_to_file(filename: str, text: str) -> None
 95 | ```
 96 | 
 97 | Write text to a file
 98 | 
 99 | **Arguments**:
100 | 
101 | - `text` _str_ - The text to write
102 | - `filename` _str_ - The filename to write to
103 | 
104 | 


--------------------------------------------------------------------------------
/docs/docs/reference/sidebar.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "items": [
 3 |     {
 4 |       "items": [
 5 |         "reference/config/config",
 6 |         "reference/config/singleton"
 7 |       ],
 8 |       "label": "config",
 9 |       "type": "category"
10 |     },
11 |     {
12 |       "items": [
13 |         "reference/processing/html",
14 |         "reference/processing/text"
15 |       ],
16 |       "label": "processing",
17 |       "type": "category"
18 |     }
19 |   ],
20 |   "label": "Reference",
21 |   "type": "category"
22 | }


--------------------------------------------------------------------------------
/docs/docs/welcome.md:
--------------------------------------------------------------------------------
 1 | # Introduction
 2 | 
 3 | Hey there! 👋
 4 | 
 5 | We're a team of AI researchers and developers who are passionate about building the next generation of AI assistants. 
 6 | Our mission is to empower individuals and organizations with accurate, unbiased, and factual information.
 7 | 
 8 | ### GPT Researcher
 9 | In this digital age, quickly accessing relevant and trustworthy information is more crucial than ever. However, we've learned that none of today's search engines provide a suitable tool that provides factual, explicit and objective answers without the need to continuously click and explore multiple sites for a given research task. 
10 | 
11 | This is why we've built the trending open source **[GPT Researcher](https://github.com/assafelovic/gpt-researcher)**. GPT Researcher is an autonomous agent that takes care of the tedious task of research for you, by scraping, filtering and aggregating over 20+ web sources per a single research task. 
12 | 
13 | To learn more about GPT Researcher, check out the [documentation page](/docs/gpt-researcher/introduction).
14 | 
15 | ### Tavily Search API
16 | Tavily Search API is a search engine optimized for LLMs, aimed at efficient, quick and persistent search results. 
17 | 
18 | To learn how to build your AI application with Tavily Search API, check out the [documentation page](https://docs.tavily.com/docs/tavily-api/introduction).
19 | 
20 | To try our API in action, you can now use GPT Researcher on our hosted version [here](https://app.tavily.com/chat) or on our [API Playground](https://app.tavily.com/playground).
21 | 
22 | If you're an AI developer looking to integrate your application with our API or seek increased API limits, **[please reach out!](mailto:support@tavily.com)**
23 | 


--------------------------------------------------------------------------------
/docs/docusaurus.config.js:
--------------------------------------------------------------------------------
  1 | /** @type {import('@docusaurus/types').DocusaurusConfig} */
  2 | const math = require('remark-math');
  3 | const katex = require('rehype-katex');
  4 | 
  5 | module.exports = {
  6 |   title: 'GPT Researcher',
  7 |   tagline: 'The leading autonomous AI research agent',
  8 |   url: 'https://docs.gptr.dev',
  9 |   baseUrl: '/',
 10 |   onBrokenLinks: 'ignore',
 11 |   //deploymentBranch: 'master',
 12 |   onBrokenMarkdownLinks: 'warn',
 13 |   favicon: 'img/favicon.ico',
 14 |   organizationName: 'assafelovic',
 15 |   trailingSlash: false,
 16 |   projectName: 'gpt-researcher',
 17 |   themeConfig: {
 18 |     navbar: {
 19 |       title: 'GPT Researcher',
 20 |       logo: {
 21 |         alt: 'GPT Researcher',
 22 |         src: 'img/gptr-logo.png',
 23 |       },
 24 |       items: [
 25 |         {
 26 |           type: 'doc',
 27 |           docId: 'welcome',
 28 |           position: 'left',
 29 |           label: 'Docs',
 30 |         },
 31 | 
 32 |         {to: 'blog', label: 'Blog', position: 'left'},
 33 |         {
 34 |           type: 'doc',
 35 |           docId: 'faq',
 36 |           position: 'left',
 37 |           label: 'FAQ',
 38 |         },
 39 |         {
 40 |             href: 'mailto:assaf.elovic@gmail.com',
 41 |             position: 'left',
 42 |             label: 'Contact',
 43 |         },
 44 |         {
 45 |           href: 'https://github.com/assafelovic/gpt-researcher',
 46 |           label: 'GitHub',
 47 |           position: 'right',
 48 |         },
 49 |       ],
 50 |     },
 51 |     footer: {
 52 |       style: 'dark',
 53 |       links: [
 54 |         {
 55 |           title: 'Community',
 56 |           items: [
 57 |             {
 58 |               label: 'Discord',
 59 |               href: 'https://discord.gg/8YkBcCED5y',
 60 |             },
 61 |             {
 62 |               label: 'Twitter',
 63 |               href: 'https://twitter.com/assaf_elovic',
 64 |             },
 65 |             {
 66 |               label: 'LinkedIn',
 67 |               href: 'https://www.linkedin.com/in/assafe/',
 68 |             },
 69 |           ],
 70 |         },
 71 |         {
 72 |           title: 'Company',
 73 |           items: [
 74 |             {
 75 |               label: 'Homepage',
 76 |               href: 'https://gptr.dev',
 77 |             },
 78 |             {
 79 |               label: 'Contact',
 80 |               href: 'mailto:assafelovic@gmail.com',
 81 |             },
 82 |           ],
 83 |         },
 84 |       ],
 85 |       copyright: `Copyright © ${new Date().getFullYear()} GPT Researcher.`,
 86 |     },
 87 |   },
 88 |   presets: [
 89 |     [
 90 |       '@docusaurus/preset-classic',
 91 |       {
 92 |         docs: {
 93 |           sidebarPath: require.resolve('./sidebars.js'),
 94 |           // Please change this to your repo.
 95 |           editUrl:
 96 |             'https://github.com/assafelovic/gpt-researcher/tree/master/docs',
 97 |           remarkPlugins: [math],
 98 |           rehypePlugins: [katex],
 99 |         },
100 |         theme: {
101 |           customCss: require.resolve('./src/css/custom.css'),
102 |         },
103 |       },
104 |     ],
105 |   ],
106 |   stylesheets: [
107 |     {
108 |         href: "https://cdn.jsdelivr.net/npm/katex@0.13.11/dist/katex.min.css",
109 |         integrity: "sha384-Um5gpz1odJg5Z4HAmzPtgZKdTBHZdw8S29IecapCSB31ligYPhHQZMIlWLYQGVoc",
110 |         crossorigin: "anonymous",
111 |     },
112 |   ],
113 | 
114 |   plugins: [
115 |     // ... Your other plugins.
116 |     [
117 |       require.resolve("@easyops-cn/docusaurus-search-local"),
118 |       {
119 |         // ... Your options.
120 |         // `hashed` is recommended as long-term-cache of index file is possible.
121 |         hashed: true,
122 |         blogDir:"./blog/"
123 |         // For Docs using Chinese, The `language` is recommended to set to:
124 |         // ```
125 |         // language: ["en", "zh"],
126 |         // ```
127 |         // When applying `zh` in language, please install `nodejieba` in your project.
128 |       },
129 |     ],
130 |   ],
131 | };
132 | 


--------------------------------------------------------------------------------
/docs/package.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "name": "website",
 3 |   "version": "0.0.0",
 4 |   "private": true,
 5 |   "resolutions" :{
 6 |     "nth-check":"2.0.1",
 7 |     "trim":"0.0.3",
 8 |     "got": "11.8.5",
 9 |     "node-forge": "1.3.0",
10 |     "minimatch": "3.0.5",
11 |     "loader-utils": "2.0.4",
12 |     "eta": "2.0.0",
13 |     "@sideway/formula": "3.0.1",
14 |     "http-cache-semantics": "4.1.1"
15 |    },
16 |   "scripts": {
17 |     "docusaurus": "docusaurus",
18 |     "start": "docusaurus start",
19 |     "build": "docusaurus build",
20 |     "swizzle": "docusaurus swizzle",
21 |     "deploy": "docusaurus deploy",
22 |     "clear": "docusaurus clear",
23 |     "serve": "docusaurus serve",
24 |     "write-translations": "docusaurus write-translations",
25 |     "write-heading-ids": "docusaurus write-heading-ids"
26 |   },
27 |   "dependencies": {
28 |     "@docusaurus/core": "0.0.0-4193",
29 |     "@docusaurus/preset-classic": "0.0.0-4193",
30 |     "@easyops-cn/docusaurus-search-local": "^0.21.1",
31 |     "@mdx-js/react": "^1.6.21",
32 |     "@svgr/webpack": "^5.5.0",
33 |     "clsx": "^1.1.1",
34 |     "file-loader": "^6.2.0",
35 |     "hast-util-is-element": "1.1.0",
36 |     "react": "^17.0.1",
37 |     "react-dom": "^17.0.1",
38 |     "rehype-katex": "4",
39 |     "remark-math": "3",
40 |     "trim": "^0.0.3",
41 |     "url-loader": "^4.1.1",
42 |     "minimatch": "3.0.5"
43 |   },
44 |   "browserslist": {
45 |     "production": [
46 |       ">0.5%",
47 |       "not dead",
48 |       "not op_mini all"
49 |     ],
50 |     "development": [
51 |       "last 1 chrome version",
52 |       "last 1 firefox version",
53 |       "last 1 safari version"
54 |     ]
55 |   }
56 | }
57 | 


--------------------------------------------------------------------------------
/docs/pydoc-markdown.yml:
--------------------------------------------------------------------------------
 1 | loaders:
 2 |    - type: python
 3 |      search_path: [../docs]
 4 | processors:
 5 |   - type: filter
 6 |     skip_empty_modules: true
 7 |   - type: smart
 8 |   - type: crossref
 9 | renderer:
10 |   type: docusaurus
11 |   docs_base_path: docs
12 |   relative_output_path: reference
13 |   relative_sidebar_path: sidebar.json
14 |   sidebar_top_level_label: Reference
15 |   markdown:
16 |     escape_html_in_docstring: false
17 | 


--------------------------------------------------------------------------------
/docs/sidebars.js:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * Creating a sidebar enables you to:
 3 |  - create an ordered group of docs
 4 |  - render a sidebar for each doc of that group
 5 |  - provide next/previous navigation
 6 | 
 7 |  The sidebars can be generated from the filesystem, or explicitly defined here.
 8 | 
 9 |  Create as many sidebars as you want.
10 |  */
11 | 
12 |  module.exports = {
13 |   docsSidebar: [
14 |     'welcome',
15 |     {
16 |       type: 'category',
17 |       label: 'GPT Researcher',
18 |       collapsible: true,
19 |       collapsed: false,
20 |       items: [
21 |         'gpt-researcher/introduction',
22 |         'gpt-researcher/getting-started',
23 |         'gpt-researcher/pip-package',
24 |          'gpt-researcher/example',
25 |         'gpt-researcher/troubleshooting',
26 |       ],
27 |     },
28 |     {
29 |       type: 'category',
30 |       label: 'Customization',
31 |       collapsible: true,
32 |       collapsed: false,
33 |       items: [
34 |         'gpt-researcher/config',
35 |         'gpt-researcher/tailored-research',
36 |         'gpt-researcher/llms',
37 |         ]
38 |     },
39 |     {
40 |       type: 'category',
41 |       label: 'Multi-Agent Frameworks',
42 |       collapsible: true,
43 |       collapsed: false,
44 |       items: [
45 |         'gpt-researcher/langgraph',
46 |         ]
47 |     },
48 |     {'Examples': [{type: 'autogenerated', dirName: 'examples'}]},
49 |     'contribute',
50 |   ],
51 |   // pydoc-markdown auto-generated markdowns from docstrings
52 |   referenceSideBar: [require("./docs/reference/sidebar.json")]
53 | };
54 | 


--------------------------------------------------------------------------------
/docs/src/components/HomepageFeatures.js:
--------------------------------------------------------------------------------
 1 | import React from 'react';
 2 | import clsx from 'clsx';
 3 | import { Link } from 'react-router-dom';
 4 | import styles from './HomepageFeatures.module.css';
 5 | 
 6 | const FeatureList = [
 7 |   {
 8 |     title: 'GPT Researcher',
 9 |     Svg: require('../../static/img/gptr-logo.png').default,
10 |     docLink: './docs/gpt-researcher/getting-started',
11 |     description: (
12 |       <>
13 |         GPT Researcher is an open source autonomous agent designed for comprehensive online research on a variety of tasks.
14 |       </>
15 |     ),
16 |   },
17 |   /*{
18 |     title: 'Tavily Search API',
19 |     Svg: require('../../static/img/tavily.png').default,
20 |     docLink: './docs/tavily-api/introduction',
21 |     description: (
22 |       <>
23 |         Tavily Search API is a search engine optimized for LLMs, optimized for a factual, efficient, and persistent search experience
24 |       </>
25 |     ),
26 |   },*/
27 |   {
28 |     title: 'Multi-Agent Assistant',
29 |     Svg: require('../../static/img/multi-agent.png').default,
30 |     docLink: './docs/gpt-researcher/langgraph',
31 |     description: (
32 |       <>
33 |         Learn how a team of AI agents can work together to conduct research on a given topic, from planning to publication.
34 |       </>
35 |     ),
36 |   },
37 |   {
38 |     title: 'Examples and Demos',
39 |     Svg: require('../../static/img/examples.png').default,
40 |     docLink: './docs/examples/examples',
41 |     description: (
42 |       <>
43 |           Check out Tavily API in action across multiple frameworks and use cases
44 |       </>
45 |     ),
46 |   },
47 | ];
48 | 
49 | function Feature({Svg, title, description, docLink}) {
50 |   return (
51 |     <div className={clsx('col col--4')}>
52 |       <div className="text--center">
53 |         {/*<Svg className={styles.featureSvg} alt={title} />*/}
54 |         <img src={Svg} alt={title} height="60"/>
55 |       </div>
56 |       <div className="text--center padding-horiz--md">
57 |         <Link to={docLink}>
58 |             <h3>{title}</h3>
59 |         </Link>
60 |         <p>{description}</p>
61 |       </div>
62 |     </div>
63 |   );
64 | }
65 | 
66 | export default function HomepageFeatures() {
67 |   return (
68 |     <section className={styles.features}>
69 |       <div className="container" style={{marginTop: 30}}>
70 |         <div className="row" style={{justifyContent: 'center'}}>
71 |           {FeatureList.map((props, idx) => (
72 |             <Feature key={idx} {...props} />
73 |           ))}
74 |         </div>
75 |       </div>
76 |     </section>
77 |   );
78 | }
79 | 


--------------------------------------------------------------------------------
/docs/src/components/HomepageFeatures.module.css:
--------------------------------------------------------------------------------
 1 | /* stylelint-disable docusaurus/copyright-header */
 2 | 
 3 | .features {
 4 |   display: flex;
 5 |   align-items: center;
 6 |   padding: 2rem 0;
 7 |   width: 100%;
 8 | }
 9 | 
10 | .featureSvg {
11 |   height: 120px;
12 |   width: 200px;
13 | }
14 | 


--------------------------------------------------------------------------------
/docs/src/css/custom.css:
--------------------------------------------------------------------------------
 1 | :root {
 2 |   --ifm-font-size-base: 17px;
 3 |   --ifm-code-font-size: 90%;
 4 | 
 5 |   --ifm-color-primary: #0c4da2;
 6 |   --ifm-color-primary-dark: rgb(11, 69, 146);
 7 |   --ifm-color-primary-darker: #0a418a;
 8 |   --ifm-color-primary-darkest: #083671;
 9 |   --ifm-color-primary-light: #0d55b2;
10 |   --ifm-color-primary-lighter: #0e59ba;
11 |   --ifm-color-primary-lightest: #1064d3;
12 | 
13 |   --ifm-color-emphasis-300: #1064d3;
14 |   --ifm-link-color: #1064d3;
15 |   --ifm-menu-color-active: #1064d3;
16 | }
17 | 
18 | .docusaurus-highlight-code-line {
19 | background-color: rgba(0, 0, 0, 0.1);
20 | display: block;
21 | margin: 0 calc(-1 * var(--ifm-pre-padding));
22 | padding: 0 var(--ifm-pre-padding);
23 | }
24 | html[data-theme='dark'] .docusaurus-highlight-code-line {
25 | background-color: rgb(0, 0, 0, 0.3);
26 | }
27 | 
28 | .admonition-content a {
29 | text-decoration: underline;
30 | font-weight: 600;
31 | color: inherit;
32 | }
33 | 
34 | a {
35 | font-weight: 600;
36 | }
37 | 
38 | blockquote {
39 |   /* samsung blue with lots of transparency */
40 |   background-color: #0c4da224;
41 | }
42 | @media (prefers-color-scheme: dark) {
43 | :root {
44 |   --ifm-hero-text-color: white;
45 | }
46 | }
47 | @media (prefers-color-scheme: dark) {
48 | .hero.hero--primary { --ifm-hero-text-color: white;}
49 | }
50 | 
51 | @media (prefers-color-scheme: dark) {
52 | blockquote {
53 |   --ifm-color-emphasis-300: var(--ifm-color-primary);
54 |   /* border-left: 6px solid var(--ifm-color-emphasis-300); */
55 | }
56 | }
57 | @media (prefers-color-scheme: dark) {
58 | code {
59 |   /* background-color: rgb(41, 45, 62); */
60 | }
61 | }
62 | 
63 | 
64 | /* Docusaurus still defaults to their green! */
65 | @media (prefers-color-scheme: dark) {
66 | .react-toggle-thumb {
67 |   border-color: var(--ifm-color-primary) !important;
68 | }
69 | }
70 | 
71 | 
72 | .header-github-link:hover {
73 | opacity: 0.6;
74 | }
75 | 
76 | .header-github-link:before {
77 | content: '';
78 | width: 24px;
79 | height: 24px;
80 | display: flex;
81 | background: url("data:image/svg+xml,%3Csvg viewBox='0 0 24 24' xmlns='http://www.w3.org/2000/svg'%3E%3Cpath d='M12 .297c-6.63 0-12 5.373-12 12 0 5.303 3.438 9.8 8.205 11.385.6.113.82-.258.82-.577 0-.285-.01-1.04-.015-2.04-3.338.724-4.042-1.61-4.042-1.61C4.422 18.07 3.633 17.7 3.633 17.7c-1.087-.744.084-.729.084-.729 1.205.084 1.838 1.236 1.838 1.236 1.07 1.835 2.809 1.305 3.495.998.108-.776.417-1.305.76-1.605-2.665-.3-5.466-1.332-5.466-5.93 0-1.31.465-2.38 1.235-3.22-.135-.303-.54-1.523.105-3.176 0 0 1.005-.322 3.3 1.23.96-.267 1.98-.399 3-.405 1.02.006 2.04.138 3 .405 2.28-1.552 3.285-1.23 3.285-1.23.645 1.653.24 2.873.12 3.176.765.84 1.23 1.91 1.23 3.22 0 4.61-2.805 5.625-5.475 5.92.42.36.81 1.096.81 2.22 0 1.606-.015 2.896-.015 3.286 0 .315.21.69.825.57C20.565 22.092 24 17.592 24 12.297c0-6.627-5.373-12-12-12'/%3E%3C/svg%3E")
82 |   no-repeat;
83 | }
84 | 
85 | html[data-theme='dark'] .header-github-link:before {
86 | background: url("data:image/svg+xml,%3Csvg viewBox='0 0 24 24' xmlns='http://www.w3.org/2000/svg'%3E%3Cpath fill='white' d='M12 .297c-6.63 0-12 5.373-12 12 0 5.303 3.438 9.8 8.205 11.385.6.113.82-.258.82-.577 0-.285-.01-1.04-.015-2.04-3.338.724-4.042-1.61-4.042-1.61C4.422 18.07 3.633 17.7 3.633 17.7c-1.087-.744.084-.729.084-.729 1.205.084 1.838 1.236 1.838 1.236 1.07 1.835 2.809 1.305 3.495.998.108-.776.417-1.305.76-1.605-2.665-.3-5.466-1.332-5.466-5.93 0-1.31.465-2.38 1.235-3.22-.135-.303-.54-1.523.105-3.176 0 0 1.005-.322 3.3 1.23.96-.267 1.98-.399 3-.405 1.02.006 2.04.138 3 .405 2.28-1.552 3.285-1.23 3.285-1.23.645 1.653.24 2.873.12 3.176.765.84 1.23 1.91 1.23 3.22 0 4.61-2.805 5.625-5.475 5.92.42.36.81 1.096.81 2.22 0 1.606-.015 2.896-.015 3.286 0 .315.21.69.825.57C20.565 22.092 24 17.592 24 12.297c0-6.627-5.373-12-12-12'/%3E%3C/svg%3E")
87 |   no-repeat;
88 | }
89 | 


--------------------------------------------------------------------------------
/docs/src/pages/index.js:
--------------------------------------------------------------------------------
 1 | import React from 'react';
 2 | import clsx from 'clsx';
 3 | import Layout from '@theme/Layout';
 4 | import Link from '@docusaurus/Link';
 5 | import useDocusaurusContext from '@docusaurus/useDocusaurusContext';
 6 | import styles from './index.module.css';
 7 | import HomepageFeatures from '../components/HomepageFeatures';
 8 | 
 9 | function HomepageHeader() {
10 |   const {siteConfig} = useDocusaurusContext();
11 |   return (
12 |     <header className={clsx('hero hero--primary', styles.heroBanner)} style={{backgroundImage: "linear-gradient(to right, #f472b6, #a78bfa, #22d3ee)"}}>
13 |       <div className="container">
14 |         <h1 className="hero__title">{siteConfig.title}</h1>
15 |         <p className="hero__subtitle">{siteConfig.tagline}</p>
16 |         <div className={styles.buttons}>
17 |           <Link
18 |             className="button button--secondary button--lg"
19 |             to="/docs/gpt-researcher/getting-started">
20 |             Getting Started - 5 min ⏱️
21 |           </Link>
22 |         </div>
23 |       </div>
24 |     </header>
25 |   );
26 | }
27 | 
28 | export default function Home() {
29 |   const {siteConfig} = useDocusaurusContext();
30 |   return (
31 |     <Layout
32 |       title={`Documentation`}
33 |       description="GPT Researcher is the leading autonomous agent designed for comprehensive online research on a variety of tasks.">
34 |       <HomepageHeader />
35 |       <main>
36 |         <HomepageFeatures />
37 |       </main>
38 |     </Layout>
39 |   );
40 | }
41 | 


--------------------------------------------------------------------------------
/docs/src/pages/index.module.css:
--------------------------------------------------------------------------------
 1 | /* stylelint-disable docusaurus/copyright-header */
 2 | 
 3 | /**
 4 |  * CSS files with the .module.css suffix will be treated as CSS modules
 5 |  * and scoped locally.
 6 |  */
 7 | 
 8 | .heroBanner {
 9 |   padding: 5rem 0;
10 |   text-align: center;
11 |   position: relative;
12 |   overflow: hidden;
13 | }
14 | 
15 | @media screen and (max-width: 966px) {
16 |   .heroBanner {
17 |     padding: 2rem;
18 |   }
19 | }
20 | 
21 | .buttons {
22 |   display: flex;
23 |   align-items: center;
24 |   justify-content: center;
25 | }
26 | 


--------------------------------------------------------------------------------
/docs/static/.nojekyll:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hwchase17/gpt-researcher/df4350939dbe42a059d7cd00aec0ae61388c9e35/docs/static/.nojekyll


--------------------------------------------------------------------------------
/docs/static/img/architecture.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hwchase17/gpt-researcher/df4350939dbe42a059d7cd00aec0ae61388c9e35/docs/static/img/architecture.png


--------------------------------------------------------------------------------
/docs/static/img/banner1.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hwchase17/gpt-researcher/df4350939dbe42a059d7cd00aec0ae61388c9e35/docs/static/img/banner1.jpg


--------------------------------------------------------------------------------
/docs/static/img/examples.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hwchase17/gpt-researcher/df4350939dbe42a059d7cd00aec0ae61388c9e35/docs/static/img/examples.png


--------------------------------------------------------------------------------
/docs/static/img/favicon.ico:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hwchase17/gpt-researcher/df4350939dbe42a059d7cd00aec0ae61388c9e35/docs/static/img/favicon.ico


--------------------------------------------------------------------------------
/docs/static/img/gptr-logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hwchase17/gpt-researcher/df4350939dbe42a059d7cd00aec0ae61388c9e35/docs/static/img/gptr-logo.png


--------------------------------------------------------------------------------
/docs/static/img/gptresearcher.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hwchase17/gpt-researcher/df4350939dbe42a059d7cd00aec0ae61388c9e35/docs/static/img/gptresearcher.png


--------------------------------------------------------------------------------
/docs/static/img/leaderboard.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hwchase17/gpt-researcher/df4350939dbe42a059d7cd00aec0ae61388c9e35/docs/static/img/leaderboard.png


--------------------------------------------------------------------------------
/docs/static/img/multi-agent.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hwchase17/gpt-researcher/df4350939dbe42a059d7cd00aec0ae61388c9e35/docs/static/img/multi-agent.png


--------------------------------------------------------------------------------
/docs/static/img/tavily.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hwchase17/gpt-researcher/df4350939dbe42a059d7cd00aec0ae61388c9e35/docs/static/img/tavily.png


--------------------------------------------------------------------------------
/examples/pip-run.ipynb:
--------------------------------------------------------------------------------
 1 | {
 2 |   "nbformat": 4,
 3 |   "nbformat_minor": 0,
 4 |   "metadata": {
 5 |     "colab": {
 6 |       "provenance": []
 7 |     },
 8 |     "kernelspec": {
 9 |       "name": "python3",
10 |       "display_name": "Python 3"
11 |     },
12 |     "language_info": {
13 |       "name": "python"
14 |     }
15 |   },
16 |   "cells": [
17 |     {
18 |       "cell_type": "code",
19 |       "execution_count": 1,
20 |       "metadata": {
21 |         "id": "byPgKYhAE6gn"
22 |       },
23 |       "outputs": [],
24 |       "source": [
25 |         "import os\n",
26 |         "os.environ['OPENAI_API_KEY'] = 'your_openai_api_key'\n",
27 |         "os.environ['TAVILY_API_KEY'] = 'your_tavily_api_key' # Get a free key here: https://app.tavily.com"
28 |       ]
29 |     },
30 |     {
31 |       "cell_type": "code",
32 |       "source": [
33 |         "!pip install -U gpt-researcher nest_asyncio"
34 |       ],
35 |       "metadata": {
36 |         "id": "-rXET3OZLxwH"
37 |       },
38 |       "execution_count": null,
39 |       "outputs": []
40 |     },
41 |     {
42 |       "cell_type": "code",
43 |       "source": [
44 |         "import nest_asyncio # required for notebooks\n",
45 |         "nest_asyncio.apply()\n",
46 |         "\n",
47 |         "from gpt_researcher import GPTResearcher\n",
48 |         "import asyncio\n",
49 |         "\n",
50 |         "async def get_report(query: str, report_type: str) -> str:\n",
51 |         "    researcher = GPTResearcher(query, report_type)\n",
52 |         "    research_result = await researcher.conduct_research()\n",
53 |         "    report = await researcher.write_report()\n",
54 |         "    return report\n",
55 |         "\n",
56 |         "if __name__ == \"__main__\":\n",
57 |         "    query = \"which team may win the NBA finals?\"\n",
58 |         "    report_type = \"research_report\"\n",
59 |         "\n",
60 |         "    report = asyncio.run(get_report(query, report_type))\n",
61 |         "    print(report)"
62 |       ],
63 |       "metadata": {
64 |         "id": "KWZe2InrL0ji"
65 |       },
66 |       "execution_count": null,
67 |       "outputs": []
68 |     }
69 |   ]
70 | }
71 | 


--------------------------------------------------------------------------------
/examples/sample_report.py:
--------------------------------------------------------------------------------
 1 | from gpt_researcher import GPTResearcher
 2 | import asyncio
 3 | 
 4 | 
 5 | async def main():
 6 |     """
 7 |     This is a sample script that shows how to run a research report.
 8 |     """
 9 |     # Query
10 |     query = "What happened in the latest burning man floods?"
11 | 
12 |     # Report Type
13 |     report_type = "research_report"
14 | 
15 |     # Initialize the researcher
16 |     researcher = GPTResearcher(query=query, report_type=report_type, config_path=None)
17 |     # Conduct research on the given query
18 |     await researcher.conduct_research()
19 |     # Write the report
20 |     report = await researcher.write_report()
21 |     
22 |     return report
23 | 
24 | 
25 | if __name__ == "__main__":
26 |     asyncio.run(main())
27 | 


--------------------------------------------------------------------------------
/frontend/pdf_styles.css:
--------------------------------------------------------------------------------
 1 | body {
 2 |     font-family: 'Libre Baskerville', serif;
 3 |     font-size: 12pt; /* standard size for academic papers */
 4 |     line-height: 1.6; /* for readability */
 5 |     color: #333; /* softer on the eyes than black */
 6 |     background-color: #fff; /* white background */
 7 |     margin: 0;
 8 |     padding: 0;
 9 | }
10 | 
11 | h1, h2, h3, h4, h5, h6 {
12 |     font-family: 'Libre Baskerville', serif;
13 |     color: #000; /* darker than the body text */
14 |     margin-top: 1em; /* space above headers */
15 | }
16 | 
17 | h1 {
18 |     font-size: 2em; /* make h1 twice the size of the body text */
19 | }
20 | 
21 | h2 {
22 |     font-size: 1.5em;
23 | }
24 | 
25 | /* Add some space between paragraphs */
26 | p {
27 |     margin-bottom: 1em;
28 | }
29 | 
30 | /* Style for blockquotes, often used in academic papers */
31 | blockquote {
32 |     font-style: italic;
33 |     margin: 1em 0;
34 |     padding: 1em;
35 |     background-color: #f9f9f9; /* a light grey background */
36 | }
37 | 
38 | /* You might want to style tables, figures, etc. too */
39 | table {
40 |     border-collapse: collapse;
41 |     width: 100%;
42 | }
43 | 
44 | table, th, td {
45 |     border: 1px solid #ddd;
46 |     text-align: left;
47 |     padding: 8px;
48 | }
49 | 
50 | th {
51 |     background-color: #f2f2f2;
52 |     color: black;
53 | }


--------------------------------------------------------------------------------
/frontend/static/academicResearchAgentAvatar.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hwchase17/gpt-researcher/df4350939dbe42a059d7cd00aec0ae61388c9e35/frontend/static/academicResearchAgentAvatar.png


--------------------------------------------------------------------------------
/frontend/static/businessAnalystAgentAvatar.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hwchase17/gpt-researcher/df4350939dbe42a059d7cd00aec0ae61388c9e35/frontend/static/businessAnalystAgentAvatar.png


--------------------------------------------------------------------------------
/frontend/static/computerSecurityanalystAvatar.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hwchase17/gpt-researcher/df4350939dbe42a059d7cd00aec0ae61388c9e35/frontend/static/computerSecurityanalystAvatar.png


--------------------------------------------------------------------------------
/frontend/static/defaultAgentAvatar.JPG:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hwchase17/gpt-researcher/df4350939dbe42a059d7cd00aec0ae61388c9e35/frontend/static/defaultAgentAvatar.JPG


--------------------------------------------------------------------------------
/frontend/static/favicon.ico:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hwchase17/gpt-researcher/df4350939dbe42a059d7cd00aec0ae61388c9e35/frontend/static/favicon.ico


--------------------------------------------------------------------------------
/frontend/static/financeAgentAvatar.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hwchase17/gpt-researcher/df4350939dbe42a059d7cd00aec0ae61388c9e35/frontend/static/financeAgentAvatar.png


--------------------------------------------------------------------------------
/frontend/static/gptr-logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hwchase17/gpt-researcher/df4350939dbe42a059d7cd00aec0ae61388c9e35/frontend/static/gptr-logo.png


--------------------------------------------------------------------------------
/frontend/static/mathAgentAvatar.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hwchase17/gpt-researcher/df4350939dbe42a059d7cd00aec0ae61388c9e35/frontend/static/mathAgentAvatar.png


--------------------------------------------------------------------------------
/frontend/static/travelAgentAvatar.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hwchase17/gpt-researcher/df4350939dbe42a059d7cd00aec0ae61388c9e35/frontend/static/travelAgentAvatar.png


--------------------------------------------------------------------------------
/frontend/styles.css:
--------------------------------------------------------------------------------
  1 | @keyframes gradientBG {
  2 |     0% {background-position: 0% 50%;}
  3 |     50% {background-position: 100% 50%;}
  4 |     100% {background-position: 0% 50%;}
  5 | }
  6 | 
  7 | html {
  8 |     scroll-behavior: smooth;
  9 | }
 10 | 
 11 | body {
 12 |     font-family: 'Montserrat', sans-serif;
 13 |     color: #fff;
 14 |     line-height: 1.6;
 15 |     background-size: 200% 200%;
 16 |     background-image: linear-gradient(45deg, #151A2D, #2D284D, #151A2D);
 17 |     animation: gradientBG 10s ease infinite;
 18 | }
 19 | 
 20 | .landing {
 21 |     display: flex;
 22 |     justify-content: center;
 23 |     align-items: center;
 24 |     height: 100vh;
 25 |     text-align: center;
 26 | }
 27 | 
 28 | .landing h1 {
 29 |     font-size: 3.5rem;
 30 |     font-weight: 700;
 31 |     margin-bottom: 2rem;
 32 | }
 33 | 
 34 | .landing p {
 35 |     font-size: 1.5rem;
 36 |     font-weight: 400;
 37 |     max-width: 500px;
 38 |     margin: auto;
 39 |     margin-bottom: 2rem;
 40 | }
 41 | 
 42 | .container {
 43 |     max-width: 900px;
 44 |     margin: auto;
 45 |     padding: 20px;
 46 |     background-color: rgba(255, 255, 255, 0.1);
 47 |     border-radius: 12px;
 48 |     box-shadow: 0px 10px 25px rgba(0, 0, 0, 0.1);
 49 |     transition: all .3s ease-in-out;
 50 |     margin-bottom: 180px;
 51 | }
 52 | 
 53 | .container:hover {
 54 |     transform: scale(1.01);
 55 |     box-shadow: 0px 15px 30px rgba(0, 0, 0, 0.2);
 56 | }
 57 | 
 58 | input, select, #output, #reportContainer {
 59 |     background-color: rgba(255,255,255,0.1);
 60 |     border: none;
 61 |     color: #fff;
 62 |     transition: all .3s ease-in-out;
 63 | }
 64 | 
 65 | input:hover, input:focus, select:hover, select:focus {
 66 |     background-color: #dfe4ea;
 67 |     border: 1px solid rgba(255, 255, 255, 0.5);
 68 |     box-shadow: 0px 4px 8px rgba(0, 0, 0, 0.1);
 69 |     transition: all 0.3s ease-in-out;
 70 | }
 71 | 
 72 | .btn-primary {
 73 |     background: linear-gradient(to right, #0062cc, #007bff);
 74 |     border: none;
 75 |     transition: all .3s ease-in-out;
 76 | }
 77 | 
 78 | .btn-secondary {
 79 |     background: linear-gradient(to right, #6c757d, #6c757d);
 80 |     border: none;
 81 |     transition: all .3s ease-in-out;
 82 | }
 83 | 
 84 | .btn:hover {
 85 |     opacity: 0.8;
 86 |     transform: scale(1.1);
 87 |     box-shadow: 0px 10px 20px rgba(0, 0, 0, 0.3);
 88 | }
 89 | 
 90 | .agent_question {
 91 |     font-size: 1.4rem;
 92 |     font-weight: 500;
 93 |     margin-bottom: 0.2rem;
 94 | }
 95 | 
 96 | footer {
 97 |     position: fixed;
 98 |     left: 0;
 99 |     bottom: 0;
100 |     width: 100%;
101 |     background: linear-gradient(to right, #151A2D, #111827);
102 |     color: white;
103 |     text-align: center;
104 |     padding: 10px 0;
105 | }
106 | 
107 | .margin-div {
108 |     margin-top: 20px;
109 |     margin-bottom: 20px;
110 |     padding: 10px;
111 | }
112 | 
113 | .agent_response {
114 |     background-color: #747d8c;
115 |     margin: 10px;
116 |     padding: 10px;
117 |     border-radius: 12px;
118 | }
119 | 
120 | #output {
121 |     height: 300px;
122 |     font-family: 'Times New Roman', Times, , "Courier New", serif;
123 |     overflow: auto;
124 |     padding: 10px;
125 |     margin-bottom: 10px;
126 |     margin-top: 10px;
127 | }
128 | 
129 | #reportContainer {
130 |     background-color: rgba(255,255,255,0.1);
131 |     border: none;
132 |     color: #fff;
133 |     transition: all .3s ease-in-out;
134 |     padding: 10px;
135 |     border-radius: 12px;
136 | }
137 | 


--------------------------------------------------------------------------------
/gpt_researcher/README.md:
--------------------------------------------------------------------------------
 1 | # 🔎 GPT Researcher
 2 | [![Official Website](https://img.shields.io/badge/Official%20Website-tavily.com-blue?style=for-the-badge&logo=world&logoColor=white)](https://tavily.com)
 3 | [![Discord Follow](https://dcbadge.vercel.app/api/server/2pFkc83fRq?style=for-the-badge)](https://discord.com/invite/2pFkc83fRq)
 4 | 
 5 | [![GitHub Repo stars](https://img.shields.io/github/stars/assafelovic/gpt-researcher?style=social)](https://github.com/assafelovic/gpt-researcher)
 6 | [![Twitter Follow](https://img.shields.io/twitter/follow/tavilyai?style=social)](https://twitter.com/tavilyai)
 7 | [![PyPI version](https://badge.fury.io/py/gpt-researcher.svg)](https://badge.fury.io/py/gpt-researcher)
 8 | 
 9 | **GPT Researcher is an autonomous agent designed for comprehensive online research on a variety of tasks.** 
10 | 
11 | The agent can produce detailed, factual and unbiased research reports, with customization options for focusing on relevant resources, outlines, and lessons. Inspired by the recent [Plan-and-Solve](https://arxiv.org/abs/2305.04091) and [RAG](https://arxiv.org/abs/2005.11401) papers, GPT Researcher addresses issues of speed, determinism and reliability, offering a more stable performance and increased speed through parallelized agent work, as opposed to synchronous operations.
12 | 
13 | **Our mission is to empower individuals and organizations with accurate, unbiased, and factual information by leveraging the power of AI.**
14 | 
15 | #### PIP Package
16 | > **Step 0** - Install Python 3.11 or later. [See here](https://www.tutorialsteacher.com/python/install-python) for a step-by-step guide.
17 | > **Step 1** - install GPT Researcher package [PyPI page](https://pypi.org/project/gpt-researcher/)
18 | ```bash
19 | $ pip install gpt-researcher
20 | ```
21 | > **Step 2** - Create .env file with your OpenAI Key and Tavily API key or simply export it
22 | ```bash
23 | $ export OPENAI_API_KEY={Your OpenAI API Key here}
24 | ```
25 | ```bash
26 | $ export TAVILY_API_KEY={Your Tavily API Key here}
27 | ```
28 | > **Step 3** - Start Coding using GPT Researcher in your own code, example:
29 | ```python
30 | from gpt_researcher import GPTResearcher
31 | import asyncio
32 | 
33 | 
34 | async def get_report(query: str, report_type: str) -> str:
35 |     researcher = GPTResearcher(query, report_type)
36 |     report = await researcher.run()
37 |     return report
38 | 
39 | if __name__ == "__main__":
40 |     query = "what team may win the NBA finals?"
41 |     report_type = "research_report"
42 | 
43 |     report = asyncio.run(get_report(query, report_type))
44 |     print(report)
45 | 
46 | ```
47 | 
48 | 
49 | 


--------------------------------------------------------------------------------
/gpt_researcher/__init__.py:
--------------------------------------------------------------------------------
1 | from .master import GPTResearcher
2 | from .config import Config
3 | 
4 | __all__ = ['GPTResearcher', 'Config']
5 | 


--------------------------------------------------------------------------------
/gpt_researcher/config/__init__.py:
--------------------------------------------------------------------------------
1 | from .config import Config
2 | 
3 | __all__ = ['Config']


--------------------------------------------------------------------------------
/gpt_researcher/config/config.py:
--------------------------------------------------------------------------------
 1 | # config file
 2 | import json
 3 | import os
 4 | 
 5 | 
 6 | class Config:
 7 |     """Config class for GPT Researcher."""
 8 | 
 9 |     def __init__(self, config_file: str = None):
10 |         """Initialize the config class."""
11 |         self.config_file = os.path.expanduser(config_file) if config_file else os.getenv('CONFIG_FILE')
12 |         self.retriever = os.getenv('RETRIEVER', "tavily")
13 |         self.embedding_provider = os.getenv('EMBEDDING_PROVIDER', 'openai')
14 |         self.llm_provider = os.getenv('LLM_PROVIDER', "openai")
15 |         self.fast_llm_model = os.getenv('FAST_LLM_MODEL', "gpt-3.5-turbo-16k")
16 |         self.smart_llm_model = os.getenv('SMART_LLM_MODEL', "gpt-4o")
17 |         self.fast_token_limit = int(os.getenv('FAST_TOKEN_LIMIT', 2000))
18 |         self.smart_token_limit = int(os.getenv('SMART_TOKEN_LIMIT', 4000))
19 |         self.browse_chunk_max_length = int(os.getenv('BROWSE_CHUNK_MAX_LENGTH', 8192))
20 |         self.summary_token_limit = int(os.getenv('SUMMARY_TOKEN_LIMIT', 700))
21 |         self.temperature = float(os.getenv('TEMPERATURE', 0.55))
22 |         self.user_agent = os.getenv('USER_AGENT', "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 "
23 |                                                    "(KHTML, like Gecko) Chrome/119.0.0.0 Safari/537.36 Edg/119.0.0.0")
24 |         self.max_search_results_per_query = int(os.getenv('MAX_SEARCH_RESULTS_PER_QUERY', 5))
25 |         self.memory_backend = os.getenv('MEMORY_BACKEND', "local")
26 |         self.total_words = int(os.getenv('TOTAL_WORDS', 800))
27 |         self.report_format = os.getenv('REPORT_FORMAT', "APA")
28 |         self.max_iterations = int(os.getenv('MAX_ITERATIONS', 3))
29 |         self.agent_role = os.getenv('AGENT_ROLE', None)
30 |         self.scraper = os.getenv("SCRAPER", "bs")
31 |         self.max_subtopics = os.getenv("MAX_SUBTOPICS", 3)
32 |         self.doc_path = os.getenv("DOC_PATH", "")
33 |         
34 |         self.load_config_file()
35 |         
36 |         if self.doc_path:
37 |             self.validate_doc_path()
38 |         
39 |     def validate_doc_path(self):
40 |         """Ensure that the folder exists at the doc path"""
41 |         os.makedirs(self.doc_path, exist_ok=True)
42 | 
43 |     def load_config_file(self) -> None:
44 |         """Load the config file."""
45 |         if self.config_file is None:
46 |             return None
47 |         with open(self.config_file, "r") as f:
48 |             config = json.load(f)
49 |         for key, value in config.items():
50 |             setattr(self, key.lower(), value)


--------------------------------------------------------------------------------
/gpt_researcher/context/__init__.py:
--------------------------------------------------------------------------------
1 | from .compression import ContextCompressor
2 | from .retriever import SearchAPIRetriever
3 | 
4 | __all__ = ['ContextCompressor', 'SearchAPIRetriever']
5 | 


--------------------------------------------------------------------------------
/gpt_researcher/context/compression.py:
--------------------------------------------------------------------------------
 1 | from .retriever import SearchAPIRetriever
 2 | from langchain.retrievers import (
 3 |     ContextualCompressionRetriever,
 4 | )
 5 | from langchain.retrievers.document_compressors import (
 6 |     DocumentCompressorPipeline,
 7 |     EmbeddingsFilter,
 8 | )
 9 | from langchain.text_splitter import RecursiveCharacterTextSplitter
10 | 
11 | 
12 | class ContextCompressor:
13 |     def __init__(self, documents, embeddings, max_results=5, **kwargs):
14 |         self.max_results = max_results
15 |         self.documents = documents
16 |         self.kwargs = kwargs
17 |         self.embeddings = embeddings
18 |         self.similarity_threshold = 0.38
19 | 
20 |     def _get_contextual_retriever(self):
21 |         splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=100)
22 |         relevance_filter = EmbeddingsFilter(embeddings=self.embeddings,
23 |                                             similarity_threshold=self.similarity_threshold)
24 |         pipeline_compressor = DocumentCompressorPipeline(
25 |             transformers=[splitter, relevance_filter]
26 |         )
27 |         base_retriever = SearchAPIRetriever(
28 |             pages=self.documents
29 |         )
30 |         contextual_retriever = ContextualCompressionRetriever(
31 |             base_compressor=pipeline_compressor, base_retriever=base_retriever
32 |         )
33 |         return contextual_retriever
34 | 
35 |     def _pretty_print_docs(self, docs, top_n):
36 |         return f"\n".join(f"Source: {d.metadata.get('source')}\n"
37 |                           f"Title: {d.metadata.get('title')}\n"
38 |                           f"Content: {d.page_content}\n"
39 |                           for i, d in enumerate(docs) if i < top_n)
40 | 
41 |     def get_context(self, query, max_results=5):
42 |         compressed_docs = self._get_contextual_retriever()
43 |         relevant_docs = compressed_docs.invoke(query)
44 |         return self._pretty_print_docs(relevant_docs, max_results)


--------------------------------------------------------------------------------
/gpt_researcher/context/retriever.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | from enum import Enum
 3 | from typing import Any, Dict, List, Optional
 4 | 
 5 | from langchain.callbacks.manager import CallbackManagerForRetrieverRun
 6 | from langchain.schema import Document
 7 | from langchain.schema.retriever import BaseRetriever
 8 | 
 9 | 
10 | class SearchAPIRetriever(BaseRetriever):
11 |     """Search API retriever."""
12 |     pages: List[Dict] = []
13 | 
14 |     def _get_relevant_documents(
15 |         self, query: str, *, run_manager: CallbackManagerForRetrieverRun
16 |     ) -> List[Document]:
17 | 
18 |         docs = [
19 |             Document(
20 |                 page_content=page.get("raw_content", ""),
21 |                 metadata={
22 |                     "title": page.get("title", ""),
23 |                     "source": page.get("url", ""),
24 |                 },
25 |             )
26 |             for page in self.pages
27 |         ]
28 | 
29 |         return docs
30 | 


--------------------------------------------------------------------------------
/gpt_researcher/document/__init__.py:
--------------------------------------------------------------------------------
1 | from .document import DocumentLoader
2 | 
3 | __all__ = ['DocumentLoader']
4 | 


--------------------------------------------------------------------------------
/gpt_researcher/document/document.py:
--------------------------------------------------------------------------------
 1 | import asyncio
 2 | import os
 3 | 
 4 | from langchain_community.document_loaders import (
 5 |     PyMuPDFLoader, 
 6 |     TextLoader, 
 7 |     UnstructuredCSVLoader, 
 8 |     UnstructuredExcelLoader,
 9 |     UnstructuredMarkdownLoader, 
10 |     UnstructuredPowerPointLoader,
11 |     UnstructuredWordDocumentLoader
12 | )
13 | 
14 | 
15 | class DocumentLoader:
16 | 
17 |     def __init__(self, path):
18 |         self.path = path
19 | 
20 |     async def load(self) -> list:
21 |         tasks = []
22 |         for root, dirs, files in os.walk(self.path):
23 |             for file in files:
24 |                 file_path = os.path.join(root, file)
25 |                 file_name, file_extension_with_dot = os.path.splitext(file_path)
26 |                 file_extension = file_extension_with_dot.strip(".")
27 |                 tasks.append(self._load_document(file_path, file_extension))
28 | 
29 |         docs = []
30 |         for pages in await asyncio.gather(*tasks):
31 |             for page in pages:
32 |                 if page.page_content:
33 |                     docs.append({
34 |                         "raw_content": page.page_content,
35 |                         "url": os.path.basename(page.metadata['source'])
36 |                     })
37 |                     
38 |         if not docs:
39 |             raise ValueError("🤷 Failed to load any documents!")
40 | 
41 |         return docs
42 | 
43 |     async def _load_document(self, file_path: str, file_extension: str) -> list:
44 |         try:
45 |             loader_dict = {
46 |                 "pdf": PyMuPDFLoader(file_path),
47 |                 "txt": TextLoader(file_path),
48 |                 "doc": UnstructuredWordDocumentLoader(file_path),
49 |                 "docx": UnstructuredWordDocumentLoader(file_path),
50 |                 "pptx": UnstructuredPowerPointLoader(file_path),
51 |                 "csv": UnstructuredCSVLoader(file_path, mode="elements"),
52 |                 "xls": UnstructuredExcelLoader(file_path, mode="elements"),
53 |                 "xlsx": UnstructuredExcelLoader(file_path, mode="elements"),
54 |                 "md": UnstructuredMarkdownLoader(file_path)
55 |             }
56 | 
57 |             loader = loader_dict.get(file_extension, None)
58 |             if loader:
59 |                 data = loader.load()
60 |                 return data
61 | 
62 |         except Exception as e:
63 |             print(f"Failed to load document : {file_path}")
64 |             print(e)
65 |             return []
66 | 


--------------------------------------------------------------------------------
/gpt_researcher/llm_provider/__init__.py:
--------------------------------------------------------------------------------
 1 | from .google.google import GoogleProvider
 2 | from .openai.openai import OpenAIProvider
 3 | from .azureopenai.azureopenai import AzureOpenAIProvider
 4 | 
 5 | __all__ = [
 6 |     "GoogleProvider",
 7 |     "OpenAIProvider",
 8 |     "AzureOpenAIProvider"
 9 | ]
10 | 


--------------------------------------------------------------------------------
/gpt_researcher/llm_provider/azureopenai/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hwchase17/gpt-researcher/df4350939dbe42a059d7cd00aec0ae61388c9e35/gpt_researcher/llm_provider/azureopenai/__init__.py


--------------------------------------------------------------------------------
/gpt_researcher/llm_provider/azureopenai/azureopenai.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | 
 3 | from colorama import Fore, Style
 4 | from langchain_openai import AzureChatOpenAI
 5 | 
 6 | '''
 7 | Please note:
 8 | Needs additional env vars such as: 
 9 |     AZURE_OPENAI_ENDPOINT  e.g. https://xxxx.openai.azure.com/",
10 |     AZURE_OPENAI_API_KEY e.g "xxxxxxxxxxxxxxxxxxxxx",
11 |     OPENAI_API_VERSION, e.g. "2024-03-01-preview" but needs to updated over time as API verison updates,
12 |     AZURE_EMBEDDING_MODEL e.g. "ada2" The Azure OpenAI embedding model deployment name.
13 | 
14 | config.py settings for Azure OpenAI should look like:
15 |     self.embedding_provider = os.getenv('EMBEDDING_PROVIDER', 'azureopenai')
16 |     self.llm_provider = os.getenv('LLM_PROVIDER', "azureopenai")
17 |     self.fast_llm_model = os.getenv('FAST_LLM_MODEL', "gpt-3.5-turbo-16k") #Deployment name of your GPT3.5T model as per azure OpenAI studio deployment section
18 |     self.smart_llm_model = os.getenv('SMART_LLM_MODEL', "gpt4")  #Deployment name of your GPT4 1106-Preview+ (GPT4T) model as per azure OpenAI studio deployment section
19 | '''
20 | class AzureOpenAIProvider:
21 | 
22 |     def __init__(
23 |         self,
24 |         deployment_name,
25 |         temperature,
26 |         max_tokens
27 |     ):
28 |         self.deployment_name = deployment_name
29 |         self.temperature = temperature
30 |         self.max_tokens = max_tokens
31 |         self.api_key = self.get_api_key()
32 |         self.llm = self.get_llm_model()
33 | 
34 |     def get_api_key(self):
35 |         """
36 |         Gets the OpenAI API key
37 |         Returns:
38 | 
39 |         """
40 |         try:
41 |             api_key = os.environ["AZURE_OPENAI_API_KEY"]
42 |         except:
43 |             raise Exception(
44 |                 "Azure OpenAI API key not found. Please set the AZURE_OPENAI_API_KEY environment variable.")
45 |         return api_key
46 | 
47 |     def get_llm_model(self):
48 |         # Initializing the chat model
49 |         llm = AzureChatOpenAI(
50 |             deployment_name=self.deployment_name,
51 |             temperature=self.temperature,
52 |             max_tokens=self.max_tokens,
53 |             api_key=self.api_key
54 |         )
55 | 
56 |         return llm
57 | 
58 |     async def get_chat_response(self, messages, stream, websocket=None):
59 |         if not stream:
60 |             # Getting output from the model chain using ainvoke for asynchronous invoking
61 |             output = await self.llm.ainvoke(messages)
62 | 
63 |             return output.content
64 | 
65 |         else:
66 |             return await self.stream_response(messages, websocket)
67 | 
68 |     async def stream_response(self, messages, websocket=None):
69 |         paragraph = ""
70 |         response = ""
71 | 
72 |         # Streaming the response using the chain astream method from langchain
73 |         async for chunk in self.llm.astream(messages):
74 |             content = chunk.content
75 |             if content is not None:
76 |                 response += content
77 |                 paragraph += content
78 |                 if "\n" in paragraph:
79 |                     if websocket is not None:
80 |                         await websocket.send_json({"type": "report", "output": paragraph})
81 |                     else:
82 |                         print(f"{Fore.GREEN}{paragraph}{Style.RESET_ALL}")
83 |                     paragraph = ""
84 |                     
85 |         return response
86 | 


--------------------------------------------------------------------------------
/gpt_researcher/llm_provider/google/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hwchase17/gpt-researcher/df4350939dbe42a059d7cd00aec0ae61388c9e35/gpt_researcher/llm_provider/google/__init__.py


--------------------------------------------------------------------------------
/gpt_researcher/llm_provider/google/google.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | 
  3 | from colorama import Fore, Style
  4 | from langchain_core.messages import HumanMessage, SystemMessage
  5 | from langchain_google_genai import ChatGoogleGenerativeAI
  6 | 
  7 | 
  8 | class GoogleProvider:
  9 | 
 10 |     def __init__(
 11 |         self,
 12 |         model,
 13 |         temperature,
 14 |         max_tokens
 15 |     ):
 16 |         # May be extended to support more google models in the future
 17 |         self.model = "gemini-pro"
 18 |         self.temperature = temperature
 19 |         self.max_tokens = max_tokens
 20 |         self.api_key = self.get_api_key()
 21 |         self.llm = self.get_llm_model()
 22 | 
 23 |     def get_api_key(self):
 24 |         """
 25 |         Gets the GEMINI_API_KEY
 26 |         Returns:
 27 | 
 28 |         """
 29 |         try:
 30 |             api_key = os.environ["GEMINI_API_KEY"]
 31 |         except:
 32 |             raise Exception(
 33 |                 "GEMINI API key not found. Please set the GEMINI_API_KEY environment variable.")
 34 |         return api_key
 35 | 
 36 |     def get_llm_model(self):
 37 |         # Initializing the chat model
 38 |         llm = ChatGoogleGenerativeAI(
 39 |             convert_system_message_to_human=True,
 40 |             model=self.model,
 41 |             temperature=self.temperature,
 42 |             max_output_tokens=self.max_tokens,
 43 |             google_api_key=self.api_key
 44 |         )
 45 | 
 46 |         return llm
 47 | 
 48 |     def convert_messages(self, messages):
 49 |         """
 50 |         The function `convert_messages` converts messages based on their role into either SystemMessage
 51 |         or HumanMessage objects.
 52 |         
 53 |         Args:
 54 |           messages: It looks like the code snippet you provided is a function called `convert_messages`
 55 |         that takes a list of messages as input and converts each message based on its role into either a
 56 |         `SystemMessage` or a `HumanMessage`.
 57 |         
 58 |         Returns:
 59 |           The `convert_messages` function is returning a list of converted messages based on the input
 60 |         `messages`. The function checks the role of each message in the input list and creates a new
 61 |         `SystemMessage` object if the role is "system" or a new `HumanMessage` object if the role is
 62 |         "user". The function then returns a list of these converted messages.
 63 |         """
 64 |         converted_messages = []
 65 |         for message in messages:
 66 |             if message["role"] == "system":
 67 |                 converted_messages.append(
 68 |                     SystemMessage(content=message["content"]))
 69 |             elif message["role"] == "user":
 70 |                 converted_messages.append(
 71 |                     HumanMessage(content=message["content"]))
 72 | 
 73 |         return converted_messages
 74 | 
 75 |     async def get_chat_response(self, messages, stream, websocket=None):
 76 |         if not stream:
 77 |             # Getting output from the model chain using ainvoke for asynchronous invoking
 78 |             converted_messages = self.convert_messages(messages)
 79 |             output = await self.llm.ainvoke(converted_messages)
 80 | 
 81 |             return output.content
 82 | 
 83 |         else:
 84 |             return await self.stream_response(messages, websocket)
 85 | 
 86 |     async def stream_response(self, messages, websocket=None):
 87 |         paragraph = ""
 88 |         response = ""
 89 | 
 90 |         # Streaming the response using the chain astream method from langchain
 91 |         async for chunk in self.llm.astream(messages):
 92 |             content = chunk.content
 93 |             if content is not None:
 94 |                 response += content
 95 |                 paragraph += content
 96 |                 if "\n" in paragraph:
 97 |                     if websocket is not None:
 98 |                         await websocket.send_json({"type": "report", "output": paragraph})
 99 |                     else:
100 |                         print(f"{Fore.GREEN}{paragraph}{Style.RESET_ALL}")
101 |                     paragraph = ""
102 | 
103 |         return response
104 | 


--------------------------------------------------------------------------------
/gpt_researcher/llm_provider/openai/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hwchase17/gpt-researcher/df4350939dbe42a059d7cd00aec0ae61388c9e35/gpt_researcher/llm_provider/openai/__init__.py


--------------------------------------------------------------------------------
/gpt_researcher/llm_provider/openai/openai.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | 
 3 | from colorama import Fore, Style
 4 | from langchain_openai import ChatOpenAI
 5 | 
 6 | 
 7 | class OpenAIProvider:
 8 | 
 9 |     def __init__(
10 |         self,
11 |         model,
12 |         temperature,
13 |         max_tokens
14 |     ):
15 |         self.model = model
16 |         self.temperature = temperature
17 |         self.max_tokens = max_tokens
18 |         self.api_key = self.get_api_key()
19 |         self.base_url = self.get_base_url()
20 |         self.llm = self.get_llm_model()
21 | 
22 |     def get_api_key(self):
23 |         """
24 |         Gets the OpenAI API key
25 |         Returns:
26 | 
27 |         """
28 |         try:
29 |             api_key = os.environ["OPENAI_API_KEY"]
30 |         except KeyError:
31 |             raise Exception(
32 |                 "OpenAI API key not found. Please set the OPENAI_API_KEY environment variable.")
33 |         return api_key
34 | 
35 |     def get_base_url(self):
36 |         """
37 |         Gets the OpenAI Base URL from the environment variable if defined otherwise use the default one
38 |         Returns:
39 | 
40 |         """
41 |         base_url = os.environ.get("OPENAI_BASE_URL", None)
42 |         return base_url
43 | 
44 | 
45 |     def get_llm_model(self):
46 |         # Initializing the chat model
47 |         llm = ChatOpenAI(
48 |             model=self.model,
49 |             temperature=self.temperature,
50 |             max_tokens=self.max_tokens,
51 |             api_key=self.api_key
52 |         )
53 |         if self.base_url:
54 |             llm.openai_api_base = self.base_url
55 | 
56 |         return llm
57 | 
58 |     async def get_chat_response(self, messages, stream, websocket=None):
59 |         if not stream:
60 |             # Getting output from the model chain using ainvoke for asynchronous invoking
61 |             output = await self.llm.ainvoke(messages)
62 | 
63 |             return output.content
64 | 
65 |         else:
66 |             return await self.stream_response(messages, websocket)
67 | 
68 |     async def stream_response(self, messages, websocket=None):
69 |         paragraph = ""
70 |         response = ""
71 | 
72 |         # Streaming the response using the chain astream method from langchain
73 |         async for chunk in self.llm.astream(messages):
74 |             content = chunk.content
75 |             if content is not None:
76 |                 response += content
77 |                 paragraph += content
78 |                 if "\n" in paragraph:
79 |                     if websocket is not None:
80 |                         await websocket.send_json({"type": "report", "output": paragraph})
81 |                     else:
82 |                         print(f"{Fore.GREEN}{paragraph}{Style.RESET_ALL}")
83 |                     paragraph = ""
84 | 
85 |         return response
86 | 


--------------------------------------------------------------------------------
/gpt_researcher/master/__init__.py:
--------------------------------------------------------------------------------
1 | from .agent import GPTResearcher
2 | 
3 | __all__ = ['GPTResearcher']


--------------------------------------------------------------------------------
/gpt_researcher/memory/__init__.py:
--------------------------------------------------------------------------------
1 | from .embeddings import Memory
2 | 


--------------------------------------------------------------------------------
/gpt_researcher/memory/embeddings.py:
--------------------------------------------------------------------------------
 1 | from langchain_community.vectorstores import FAISS
 2 | import os
 3 | 
 4 | 
 5 | class Memory:
 6 |     def __init__(self, embedding_provider, **kwargs):
 7 | 
 8 |         _embeddings = None
 9 |         match embedding_provider:
10 |             case "ollama":
11 |                 from langchain.embeddings import OllamaEmbeddings
12 |                 _embeddings = OllamaEmbeddings(model="llama2")
13 |             case "openai":
14 |                 from langchain_openai import OpenAIEmbeddings
15 |                 _embeddings = OpenAIEmbeddings(model="text-embedding-3-small")
16 |             case "azureopenai":
17 |                 from langchain_openai import AzureOpenAIEmbeddings
18 |                 _embeddings = AzureOpenAIEmbeddings(deployment=os.environ["AZURE_EMBEDDING_MODEL"], chunk_size=16)
19 |             case "huggingface":
20 |                 from langchain.embeddings import HuggingFaceEmbeddings
21 |                 _embeddings = HuggingFaceEmbeddings()
22 | 
23 |             case _:
24 |                 raise Exception("Embedding provider not found.")
25 | 
26 |         self._embeddings = _embeddings
27 | 
28 |     def get_embeddings(self):
29 |         return self._embeddings
30 | 


--------------------------------------------------------------------------------
/gpt_researcher/retrievers/__init__.py:
--------------------------------------------------------------------------------
 1 | from .tavily_search.tavily_search import TavilySearch
 2 | from .duckduckgo.duckduckgo import Duckduckgo
 3 | from .google.google import GoogleSearch
 4 | from .serper.serper import SerperSearch
 5 | from .serpapi.serpapi import SerpApiSearch
 6 | from .searx.searx import SearxSearch
 7 | from .bing.bing import BingSearch
 8 | from .yahoo.yahoo import YahooSearch
 9 | 
10 | __all__ = [
11 |     "TavilySearch",
12 |     "Duckduckgo",
13 |     "SerperSearch",
14 |     "SerpApiSearch",
15 |     "GoogleSearch",
16 |     "SearxSearch",
17 |     "BingSearch",
18 |     "YahooSearch"
19 | ]
20 | 


--------------------------------------------------------------------------------
/gpt_researcher/retrievers/bing/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hwchase17/gpt-researcher/df4350939dbe42a059d7cd00aec0ae61388c9e35/gpt_researcher/retrievers/bing/__init__.py


--------------------------------------------------------------------------------
/gpt_researcher/retrievers/bing/bing.py:
--------------------------------------------------------------------------------
 1 | # Bing Search Retriever
 2 | 
 3 | # libraries
 4 | import os
 5 | import requests
 6 | import json
 7 | 
 8 | 
 9 | class BingSearch():
10 |     """
11 |     Bing Search Retriever
12 |     """
13 |     def __init__(self, query):
14 |         """
15 |         Initializes the BingSearch object
16 |         Args:
17 |             query:
18 |         """
19 |         self.query = query
20 |         self.api_key = self.get_api_key()
21 | 
22 |     def get_api_key(self):
23 |         """
24 |         Gets the Bing API key
25 |         Returns:
26 | 
27 |         """
28 |         try:
29 |             api_key = os.environ["BING_API_KEY"]
30 |         except:
31 |             raise Exception("Bing API key not found. Please set the BING_API_KEY environment variable.")
32 |         return api_key
33 | 
34 |     def search(self, max_results=7):
35 |         """
36 |         Searches the query
37 |         Returns:
38 | 
39 |         """
40 |         print("Searching with query {0}...".format(self.query))
41 |         """Useful for general internet search queries using the Bing API."""
42 | 
43 | 
44 |         # Search the query
45 |         url = "https://api.bing.microsoft.com/v7.0/search"
46 | 
47 |         headers = {
48 |         'Ocp-Apim-Subscription-Key': self.api_key,
49 |         'Content-Type': 'application/json'
50 |         }
51 |         params = {
52 |             "responseFilter" : "Webpages",
53 |             "q": self.query,
54 |             "count": max_results,
55 |             "setLang": "en-GB",
56 |             "textDecorations": False,
57 |             "textFormat": "HTML",
58 |             "safeSearch": "Strict"
59 |         }
60 |         
61 |         resp = requests.get(url, headers=headers, params=params)
62 | 
63 |         # Preprocess the results
64 |         if resp is None:
65 |             return
66 |         try:
67 |             search_results = json.loads(resp.text)
68 |         except Exception:
69 |             return
70 |         if search_results is None:
71 |             return
72 | 
73 |         results = search_results["webPages"]["value"]
74 |         search_results = []
75 | 
76 |         # Normalize the results to match the format of the other search APIs
77 |         for result in results:
78 |             # skip youtube results
79 |             if "youtube.com" in result["url"]:
80 |                 continue
81 |             search_result = {
82 |                 "title": result["name"],
83 |                 "href": result["url"],
84 |                 "body": result["snippet"],
85 |             }
86 |             search_results.append(search_result)
87 | 
88 |         return search_results
89 | 


--------------------------------------------------------------------------------
/gpt_researcher/retrievers/duckduckgo/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hwchase17/gpt-researcher/df4350939dbe42a059d7cd00aec0ae61388c9e35/gpt_researcher/retrievers/duckduckgo/__init__.py


--------------------------------------------------------------------------------
/gpt_researcher/retrievers/duckduckgo/duckduckgo.py:
--------------------------------------------------------------------------------
 1 | from itertools import islice
 2 | from duckduckgo_search import DDGS
 3 | 
 4 | 
 5 | class Duckduckgo:
 6 |     """
 7 |     Duckduckgo API Retriever
 8 |     """
 9 |     def __init__(self, query):
10 |         self.ddg = DDGS()
11 |         self.query = query
12 | 
13 |     def search(self, max_results=5):
14 |         """
15 |         Performs the search
16 |         :param query:
17 |         :param max_results:
18 |         :return:
19 |         """
20 |         ddgs_gen = self.ddg.text(self.query, region='wt-wt', max_results=max_results)
21 |         return ddgs_gen


--------------------------------------------------------------------------------
/gpt_researcher/retrievers/google/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hwchase17/gpt-researcher/df4350939dbe42a059d7cd00aec0ae61388c9e35/gpt_researcher/retrievers/google/__init__.py


--------------------------------------------------------------------------------
/gpt_researcher/retrievers/google/google.py:
--------------------------------------------------------------------------------
 1 | # Tavily API Retriever
 2 | 
 3 | # libraries
 4 | import os
 5 | import requests
 6 | import json
 7 | from tavily import TavilyClient
 8 | 
 9 | 
10 | class GoogleSearch:
11 |     """
12 |     Tavily API Retriever
13 |     """
14 |     def __init__(self, query):
15 |         """
16 |         Initializes the TavilySearch object
17 |         Args:
18 |             query:
19 |         """
20 |         self.query = query
21 |         self.api_key = self.get_api_key() #GOOGLE_API_KEY
22 |         self.cx_key = self.get_cx_key() #GOOGLE_CX_KEY
23 |         self.client = TavilyClient(self.api_key)
24 | 
25 |     def get_api_key(self):
26 |         """
27 |         Gets the Tavily API key
28 |         Returns:
29 | 
30 |         """
31 |         # Get the API key
32 |         try:
33 |             api_key = os.environ["GOOGLE_API_KEY"]
34 |         except:
35 |             raise Exception("Google API key not found. Please set the GOOGLE_API_KEY environment variable. "
36 |                             "You can get a key at https://developers.google.com/custom-search/v1/overview")
37 |         return api_key
38 | 
39 |     def get_cx_key(self):
40 |         """
41 |         Gets the Tavily API key
42 |         Returns:
43 | 
44 |         """
45 |         # Get the API key
46 |         try:
47 |             api_key = os.environ["GOOGLE_CX_KEY"]
48 |         except:
49 |             raise Exception("Google CX key not found. Please set the GOOGLE_CX_KEY environment variable. "
50 |                             "You can get a key at https://developers.google.com/custom-search/v1/overview")
51 |         return api_key
52 | 
53 |     def search(self, max_results=7):
54 |         """
55 |         Searches the query
56 |         Returns:
57 | 
58 |         """
59 |         """Useful for general internet search queries using the Google API."""
60 |         print("Searching with query {0}...".format(self.query))
61 |         url = f"https://www.googleapis.com/customsearch/v1?key={self.api_key}&cx={self.cx_key}&q={self.query}&start=1"
62 |         resp = requests.get(url)
63 | 
64 |         if resp is None:
65 |             return
66 |         try:
67 |             search_results = json.loads(resp.text)
68 |         except Exception:
69 |             return
70 |         if search_results is None:
71 |             return
72 | 
73 |         results = search_results.get("items", [])
74 |         search_results = []
75 | 
76 |         # Normalizing results to match the format of the other search APIs
77 |         for result in results:
78 |             # skip youtube results
79 |             if "youtube.com" in result["link"]:
80 |                 continue
81 |             search_result = {
82 |                 "title": result["title"],
83 |                 "href": result["link"],
84 |                 "body": result["snippet"],
85 |             }
86 |             search_results.append(search_result)
87 | 
88 |         return search_results
89 | 


--------------------------------------------------------------------------------
/gpt_researcher/retrievers/searx/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hwchase17/gpt-researcher/df4350939dbe42a059d7cd00aec0ae61388c9e35/gpt_researcher/retrievers/searx/__init__.py


--------------------------------------------------------------------------------
/gpt_researcher/retrievers/searx/searx.py:
--------------------------------------------------------------------------------
 1 | # Tavily API Retriever
 2 | 
 3 | # libraries
 4 | import os
 5 | from tavily import TavilyClient
 6 | from langchain_community.utilities import SearxSearchWrapper
 7 | 
 8 | 
 9 | class SearxSearch():
10 |     """
11 |     Tavily API Retriever
12 |     """
13 |     def __init__(self, query):
14 |         """
15 |         Initializes the TavilySearch object
16 |         Args:
17 |             query:
18 |         """
19 |         self.query = query
20 |         self.api_key = self.get_api_key()
21 |         self.client = TavilyClient(self.api_key)
22 | 
23 |     def get_api_key(self):
24 |         """
25 |         Gets the Tavily API key
26 |         Returns:
27 | 
28 |         """
29 |         # Get the API key
30 |         try:
31 |             api_key = os.environ["SEARX_URL"]
32 |         except:
33 |             raise Exception("Searx URL key not found. Please set the SEARX_URL environment variable. "
34 |                             "You can get your key from https://searx.space/")
35 |         return api_key
36 | 
37 |     def search(self, max_results=7):
38 |         """
39 |         Searches the query
40 |         Returns:
41 | 
42 |         """
43 |         searx = SearxSearchWrapper(searx_host=os.environ["SEARX_URL"])
44 |         results = searx.results(self.query, max_results)
45 |         # Normalizing results to match the format of the other search APIs
46 |         search_response = [{"href": obj["link"], "body": obj["snippet"]} for obj in results]
47 |         return search_response
48 | 


--------------------------------------------------------------------------------
/gpt_researcher/retrievers/serpapi/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hwchase17/gpt-researcher/df4350939dbe42a059d7cd00aec0ae61388c9e35/gpt_researcher/retrievers/serpapi/__init__.py


--------------------------------------------------------------------------------
/gpt_researcher/retrievers/serpapi/serpapi.py:
--------------------------------------------------------------------------------
 1 | # SerpApi Retriever
 2 | 
 3 | # libraries
 4 | import os
 5 | import requests
 6 | from duckduckgo_search import DDGS
 7 | import urllib.parse
 8 | 
 9 | 
10 | class SerpApiSearch():
11 |     """
12 |     SerpApi Retriever
13 |     """
14 |     def __init__(self, query):
15 |         """
16 |         Initializes the SerpApiSearch object
17 |         Args:
18 |             query:
19 |         """
20 |         self.query = query
21 |         self.api_key = self.get_api_key()
22 | 
23 |     def get_api_key(self):
24 |         """
25 |         Gets the SerpApi API key
26 |         Returns:
27 | 
28 |         """
29 |         try:
30 |             api_key = os.environ["SERPAPI_API_KEY"]
31 |         except:
32 |             raise Exception("SerpApi API key not found. Please set the SERPAPI_API_KEY environment variable. "
33 |                             "You can get a key at https://serpapi.com/")
34 |         return api_key
35 | 
36 |     def search(self, max_results=7):
37 |         """
38 |         Searches the query
39 |         Returns:
40 | 
41 |         """
42 |         print("SerpApiSearch: Searching with query {0}...".format(self.query))
43 |         """Useful for general internet search queries using SerpApi."""
44 | 
45 | 
46 |         url = "https://serpapi.com/search.json"
47 |         params = {
48 |             "q": self.query,
49 |             "api_key": self.api_key
50 |         }
51 |         encoded_url = url + "?" + urllib.parse.urlencode(params)
52 |         search_response = []
53 |         try:
54 |             response = requests.get(encoded_url, timeout=10)
55 |             if response.status_code == 200:
56 |                 search_results = response.json()
57 |                 if search_results:
58 |                     results = search_results["organic_results"]
59 |                     for result in results:
60 |                         # skip youtube results
61 |                         if "youtube.com" in result["link"]:
62 |                             continue
63 |                         if results_processed >= max_results:
64 |                             break
65 |                         search_result = {
66 |                             "title": result["title"],
67 |                             "href": result["link"],
68 |                             "body": result["snippet"],
69 |                         }
70 |                         search_response.append(search_result)
71 |                         results_processed += 1    
72 |         except Exception as e: # Fallback in case overload on Tavily Search API
73 |             print(f"Error: {e}")
74 |             ddg = DDGS()
75 |             search_response = ddg.text(self.query, region='wt-wt', max_results=max_results)
76 | 
77 |         return search_response
78 | 


--------------------------------------------------------------------------------
/gpt_researcher/retrievers/serper/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hwchase17/gpt-researcher/df4350939dbe42a059d7cd00aec0ae61388c9e35/gpt_researcher/retrievers/serper/__init__.py


--------------------------------------------------------------------------------
/gpt_researcher/retrievers/serper/serper.py:
--------------------------------------------------------------------------------
 1 | # Google Serper Retriever
 2 | 
 3 | # libraries
 4 | import os
 5 | import requests
 6 | import json
 7 | 
 8 | 
 9 | class SerperSearch():
10 |     """
11 |     Google Serper Retriever
12 |     """
13 |     def __init__(self, query):
14 |         """
15 |         Initializes the SerperSearch object
16 |         Args:
17 |             query:
18 |         """
19 |         self.query = query
20 |         self.api_key = self.get_api_key()
21 | 
22 |     def get_api_key(self):
23 |         """
24 |         Gets the Serper API key
25 |         Returns:
26 | 
27 |         """
28 |         try:
29 |             api_key = os.environ["SERPER_API_KEY"]
30 |         except:
31 |             raise Exception("Serper API key not found. Please set the SERPER_API_KEY environment variable. "
32 |                             "You can get a key at https://serper.dev/")
33 |         return api_key
34 | 
35 |     def search(self, max_results=7):
36 |         """
37 |         Searches the query
38 |         Returns:
39 | 
40 |         """
41 |         print("Searching with query {0}...".format(self.query))
42 |         """Useful for general internet search queries using the Serp API."""
43 | 
44 | 
45 |         # Search the query (see https://serper.dev/playground for the format)
46 |         url = "https://google.serper.dev/search"
47 | 
48 |         headers = {
49 |         'X-API-KEY': self.api_key,
50 |         'Content-Type': 'application/json'
51 |         }
52 |         data = json.dumps({"q": self.query, "num": max_results})
53 | 
54 |         resp = requests.request("POST", url, timeout=10, headers=headers, data=data)
55 | 
56 |         # Preprocess the results
57 |         if resp is None:
58 |             return
59 |         try:
60 |             search_results = json.loads(resp.text)
61 |         except Exception:
62 |             return
63 |         if search_results is None:
64 |             return
65 | 
66 |         results = search_results["organic"]
67 |         search_results = []
68 | 
69 |         # Normalize the results to match the format of the other search APIs
70 |         for result in results:
71 |             # skip youtube results
72 |             if "youtube.com" in result["link"]:
73 |                 continue
74 |             search_result = {
75 |                 "title": result["title"],
76 |                 "href": result["link"],
77 |                 "body": result["snippet"],
78 |             }
79 |             search_results.append(search_result)
80 | 
81 |         return search_results
82 | 


--------------------------------------------------------------------------------
/gpt_researcher/retrievers/tavily_search/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hwchase17/gpt-researcher/df4350939dbe42a059d7cd00aec0ae61388c9e35/gpt_researcher/retrievers/tavily_search/__init__.py


--------------------------------------------------------------------------------
/gpt_researcher/retrievers/tavily_search/tavily_search.py:
--------------------------------------------------------------------------------
 1 | # Tavily API Retriever
 2 | 
 3 | # libraries
 4 | import os
 5 | from tavily import TavilyClient
 6 | from duckduckgo_search import DDGS
 7 | from yahoo_search import search
 8 | 
 9 | 
10 | class TavilySearch():
11 |     """
12 |     Tavily API Retriever
13 |     """
14 |     def __init__(self, query, topic="general"):
15 |         """
16 |         Initializes the TavilySearch object
17 |         Args:
18 |             query:
19 |         """
20 |         self.query = query
21 |         self.api_key = self.get_api_key()
22 |         self.client = TavilyClient(self.api_key)
23 |         self.topic = topic
24 | 
25 |     def get_api_key(self):
26 |         """
27 |         Gets the Tavily API key
28 |         Returns:
29 | 
30 |         """
31 |         # Get the API key
32 |         try:
33 |             api_key = os.environ["TAVILY_API_KEY"]
34 |         except:
35 |             raise Exception("Tavily API key not found. Please set the TAVILY_API_KEY environment variable. "
36 |                             "You can get a key at https://app.tavily.com")
37 |         return api_key
38 | 
39 |     def search(self, max_results=7):
40 |         """
41 |         Searches the query
42 |         Returns:
43 | 
44 |         """
45 |         try:
46 |             # Search the query
47 |             results = self.client.search(self.query, search_depth="basic", max_results=max_results, topic=self.topic)
48 |             sources = results.get("results", [])
49 |             if not sources:
50 |                 raise Exception("No results found with Tavily API search.")
51 |             # Return the results
52 |             search_response = [{"href": obj["url"], "body": obj["content"]} for obj in sources]
53 |         except Exception as e: # Fallback in case overload on Tavily Search API
54 |             print(f"Error: {e}. Fallback to DuckDuckGo Search API...")
55 |             try:
56 |                 ddg = DDGS()
57 |                 search_response = ddg.text(self.query, region='wt-wt', max_results=max_results)
58 |             except Exception as e:
59 |                 print(f"Error: {e}. Fallback to Yahoo Search API...")
60 |                 search_response = [{"href": obj.link, "body": obj.text, "title": obj.title} for obj in search(self.query).pages]
61 |         return search_response
62 | 


--------------------------------------------------------------------------------
/gpt_researcher/retrievers/yahoo/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hwchase17/gpt-researcher/df4350939dbe42a059d7cd00aec0ae61388c9e35/gpt_researcher/retrievers/yahoo/__init__.py


--------------------------------------------------------------------------------
/gpt_researcher/retrievers/yahoo/yahoo.py:
--------------------------------------------------------------------------------
 1 | # Tavily API Retriever
 2 | 
 3 | # libraries
 4 | import os
 5 | from yahoo_search import search
 6 | 
 7 | 
 8 | class YahooSearch:
 9 |     """
10 |     Tavily API Retriever
11 |     """
12 |     def __init__(self, query, topic="general"):
13 |         """
14 |         Initializes the TavilySearch object
15 |         Args:
16 |             query:
17 |         """
18 |         self.query = query
19 | 
20 |     def get_api_key(self):
21 |         """
22 |         Gets the Tavily API key
23 |         Returns:
24 | 
25 |         """
26 |         return "No API Key is required for this library"
27 | 
28 |     def search(self, max_results=7):
29 |         """
30 |         Searches the query
31 |         Returns:
32 | 
33 |         """
34 |         try:
35 |             # Search the query
36 |             results = search(self.query)
37 |             sources = results.pages
38 |             if not sources:
39 |                 raise Exception("No results found with Tavily API search.")
40 |             # Return the results
41 |             search_response = [{"href": obj.link, "body": obj.text, "title": obj.title} for obj in sources]
42 |         except Exception as e: # Fallback in case overload on Tavily Search API
43 |             print(f"Error: {e}")
44 |             search_response = []
45 |         return search_response
46 | 


--------------------------------------------------------------------------------
/gpt_researcher/scraper/__init__.py:
--------------------------------------------------------------------------------
 1 | 
 2 | from .beautiful_soup.beautiful_soup import BeautifulSoupScraper
 3 | from .newspaper.newspaper import NewspaperScraper
 4 | from .web_base_loader.web_base_loader import WebBaseLoaderScraper
 5 | from .arxiv.arxiv import ArxivScraper
 6 | from .pymupdf.pymupdf import PyMuPDFScraper
 7 | 
 8 | __all__ = [
 9 |     "BeautifulSoupScraper",
10 |     "NewspaperScraper",
11 |     "WebBaseLoaderScraper",
12 |     "ArxivScraper",
13 |     "PyMuPDFScraper"
14 | ]


--------------------------------------------------------------------------------
/gpt_researcher/scraper/arxiv/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hwchase17/gpt-researcher/df4350939dbe42a059d7cd00aec0ae61388c9e35/gpt_researcher/scraper/arxiv/__init__.py


--------------------------------------------------------------------------------
/gpt_researcher/scraper/arxiv/arxiv.py:
--------------------------------------------------------------------------------
 1 | from langchain_community.retrievers import ArxivRetriever
 2 | 
 3 | 
 4 | class ArxivScraper:
 5 | 
 6 |     def __init__(self, link, session=None):
 7 |         self.link = link
 8 |         self.session = session
 9 | 
10 |     def scrape(self):
11 |         """
12 |         The function scrapes relevant documents from Arxiv based on a given link and returns the content
13 |         of the first document.
14 |         
15 |         Returns:
16 |           The code is returning the page content of the first document retrieved by the ArxivRetriever
17 |         for a given query extracted from the link.
18 |         """
19 |         query = self.link.split("/")[-1]
20 |         retriever = ArxivRetriever(load_max_docs=2, doc_content_chars_max=None)
21 |         docs = retriever.get_relevant_documents(query=query)
22 |         return docs[0].page_content
23 | 


--------------------------------------------------------------------------------
/gpt_researcher/scraper/beautiful_soup/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hwchase17/gpt-researcher/df4350939dbe42a059d7cd00aec0ae61388c9e35/gpt_researcher/scraper/beautiful_soup/__init__.py


--------------------------------------------------------------------------------
/gpt_researcher/scraper/beautiful_soup/beautiful_soup.py:
--------------------------------------------------------------------------------
 1 | from bs4 import BeautifulSoup
 2 | 
 3 | 
 4 | class BeautifulSoupScraper:
 5 | 
 6 |     def __init__(self, link, session=None):
 7 |         self.link = link
 8 |         self.session = session
 9 | 
10 |     def scrape(self):
11 |         """
12 |         This function scrapes content from a webpage by making a GET request, parsing the HTML using
13 |         BeautifulSoup, and extracting script and style elements before returning the cleaned content.
14 |         
15 |         Returns:
16 |           The `scrape` method is returning the cleaned and extracted content from the webpage specified
17 |         by the `self.link` attribute. The method fetches the webpage content, removes script and style
18 |         tags, extracts the text content, and returns the cleaned content as a string. If any exception
19 |         occurs during the process, an error message is printed and an empty string is returned.
20 |         """
21 |         try:
22 |             response = self.session.get(self.link, timeout=4)
23 |             soup = BeautifulSoup(
24 |                 response.content, "lxml", from_encoding=response.encoding
25 |             )
26 | 
27 |             for script_or_style in soup(["script", "style"]):
28 |                 script_or_style.extract()
29 | 
30 |             raw_content = self.get_content_from_url(soup)
31 |             lines = (line.strip() for line in raw_content.splitlines())
32 |             chunks = (phrase.strip() for line in lines for phrase in line.split("  "))
33 |             content = "\n".join(chunk for chunk in chunks if chunk)
34 |             return content
35 | 
36 |         except Exception as e:
37 |             print("Error! : " + str(e))
38 |             return ""
39 |         
40 |     def get_content_from_url(self, soup):
41 |         """Get the text from the soup
42 | 
43 |         Args:
44 |             soup (BeautifulSoup): The soup to get the text from
45 | 
46 |         Returns:
47 |             str: The text from the soup
48 |         """
49 |         text = ""
50 |         tags = ["p", "h1", "h2", "h3", "h4", "h5"]
51 |         for element in soup.find_all(tags):  # Find all the <p> elements
52 |             text += element.text + "\n"
53 |         return text
54 | 


--------------------------------------------------------------------------------
/gpt_researcher/scraper/newspaper/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hwchase17/gpt-researcher/df4350939dbe42a059d7cd00aec0ae61388c9e35/gpt_researcher/scraper/newspaper/__init__.py


--------------------------------------------------------------------------------
/gpt_researcher/scraper/newspaper/newspaper.py:
--------------------------------------------------------------------------------
 1 | from newspaper import Article
 2 | 
 3 | 
 4 | class NewspaperScraper:
 5 | 
 6 |     def __init__(self, link, session=None):
 7 |         self.link = link
 8 |         self.session = session
 9 | 
10 |     def scrape(self) -> str:
11 |         """
12 |         This Python function scrapes an article from a given link, extracts the title and text content,
13 |         and returns them concatenated with a colon.
14 |         
15 |         Returns:
16 |           The `scrape` method returns a string that contains the title of the article followed by a
17 |         colon and the text of the article. If the title or text is not present, an empty string is
18 |         returned. If an exception occurs during the scraping process, an error message is printed and an
19 |         empty string is returned.
20 |         """
21 |         try:
22 |             article = Article(
23 |                 self.link,
24 |                 language="en",
25 |                 memoize_articles=False,
26 |                 fetch_images=False,
27 |             )
28 |             article.download()
29 |             article.parse()
30 | 
31 |             title = article.title
32 |             text = article.text
33 | 
34 |             # If title, summary are not present then return None
35 |             if not (title and text):
36 |                 return ""
37 | 
38 |             return f"{title} : {text}"
39 | 
40 |         except Exception as e:
41 |             print("Error! : " + str(e))
42 |             return ""
43 | 


--------------------------------------------------------------------------------
/gpt_researcher/scraper/pymupdf/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hwchase17/gpt-researcher/df4350939dbe42a059d7cd00aec0ae61388c9e35/gpt_researcher/scraper/pymupdf/__init__.py


--------------------------------------------------------------------------------
/gpt_researcher/scraper/pymupdf/pymupdf.py:
--------------------------------------------------------------------------------
 1 | from langchain_community.document_loaders import PyMuPDFLoader
 2 | 
 3 | 
 4 | class PyMuPDFScraper:
 5 | 
 6 |     def __init__(self, link, session=None):
 7 |         self.link = link
 8 |         self.session = session
 9 | 
10 |     def scrape(self) -> str:
11 |         """
12 |         The `scrape` function uses PyMuPDFLoader to load a document from a given link and returns it as
13 |         a string.
14 |         
15 |         Returns:
16 |           The `scrape` method is returning a string representation of the `doc` object, which is loaded
17 |         using PyMuPDFLoader from the provided link.
18 |         """
19 |         loader = PyMuPDFLoader(self.link)
20 |         doc = loader.load()
21 |         return str(doc)
22 | 


--------------------------------------------------------------------------------
/gpt_researcher/scraper/scraper.py:
--------------------------------------------------------------------------------
 1 | from concurrent.futures.thread import ThreadPoolExecutor
 2 | from functools import partial
 3 | 
 4 | import requests
 5 | 
 6 | from gpt_researcher.scraper import (
 7 |     ArxivScraper,
 8 |     BeautifulSoupScraper,
 9 |     NewspaperScraper,
10 |     PyMuPDFScraper,
11 |     WebBaseLoaderScraper,
12 | )
13 | 
14 | 
15 | class Scraper:
16 |     """
17 |     Scraper class to extract the content from the links
18 |     """
19 | 
20 |     def __init__(self, urls, user_agent, scraper):
21 |         """
22 |         Initialize the Scraper class.
23 |         Args:
24 |             urls:
25 |         """
26 |         self.urls = urls
27 |         self.session = requests.Session()
28 |         self.session.headers.update({"User-Agent": user_agent})
29 |         self.scraper = scraper
30 | 
31 |     def run(self):
32 |         """
33 |         Extracts the content from the links
34 |         """
35 |         partial_extract = partial(self.extract_data_from_link, session=self.session)
36 |         with ThreadPoolExecutor(max_workers=20) as executor:
37 |             contents = executor.map(partial_extract, self.urls)
38 |         res = [content for content in contents if content["raw_content"] is not None]
39 |         return res
40 | 
41 |     def extract_data_from_link(self, link, session):
42 |         """
43 |         Extracts the data from the link
44 |         """
45 |         content = ""
46 |         try:
47 |             Scraper = self.get_scraper(link)
48 |             scraper = Scraper(link, session)
49 |             content = scraper.scrape()
50 | 
51 |             if len(content) < 100:
52 |                 return {"url": link, "raw_content": None}
53 |             return {"url": link, "raw_content": content}
54 |         except Exception as e:
55 |             return {"url": link, "raw_content": None}
56 | 
57 |     def get_scraper(self, link):
58 |         """
59 |         The function `get_scraper` determines the appropriate scraper class based on the provided link
60 |         or a default scraper if none matches.
61 | 
62 |         Args:
63 |           link: The `get_scraper` method takes a `link` parameter which is a URL link to a webpage or a
64 |         PDF file. Based on the type of content the link points to, the method determines the appropriate
65 |         scraper class to use for extracting data from that content.
66 | 
67 |         Returns:
68 |           The `get_scraper` method returns the scraper class based on the provided link. The method
69 |         checks the link to determine the appropriate scraper class to use based on predefined mappings
70 |         in the `SCRAPER_CLASSES` dictionary. If the link ends with ".pdf", it selects the
71 |         `PyMuPDFScraper` class. If the link contains "arxiv.org", it selects the `ArxivScraper
72 |         """
73 | 
74 |         SCRAPER_CLASSES = {
75 |             "pdf": PyMuPDFScraper,
76 |             "arxiv": ArxivScraper,
77 |             "newspaper": NewspaperScraper,
78 |             "bs": BeautifulSoupScraper,
79 |             "web_base_loader": WebBaseLoaderScraper,
80 |         }
81 | 
82 |         scraper_key = None
83 | 
84 |         if link.endswith(".pdf"):
85 |             scraper_key = "pdf"
86 |         elif "arxiv.org" in link:
87 |             scraper_key = "arxiv"
88 |         else:
89 |             scraper_key = self.scraper
90 | 
91 |         scraper_class = SCRAPER_CLASSES.get(scraper_key)
92 |         if scraper_class is None:
93 |             raise Exception("Scraper not found.")
94 | 
95 |         return scraper_class
96 | 


--------------------------------------------------------------------------------
/gpt_researcher/scraper/web_base_loader/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hwchase17/gpt-researcher/df4350939dbe42a059d7cd00aec0ae61388c9e35/gpt_researcher/scraper/web_base_loader/__init__.py


--------------------------------------------------------------------------------
/gpt_researcher/scraper/web_base_loader/web_base_loader.py:
--------------------------------------------------------------------------------
 1 | from langchain_community.document_loaders import WebBaseLoader
 2 | 
 3 | 
 4 | class WebBaseLoaderScraper:
 5 | 
 6 |     def __init__(self, link, session=None):
 7 |         self.link = link
 8 |         self.session = session
 9 | 
10 |     def scrape(self) -> str:
11 |         """
12 |         This Python function scrapes content from a webpage using a WebBaseLoader object and returns the
13 |         concatenated page content.
14 |         
15 |         Returns:
16 |           The `scrape` method is returning a string variable named `content` which contains the
17 |         concatenated page content from the documents loaded by the `WebBaseLoader`. If an exception
18 |         occurs during the process, an error message is printed and an empty string is returned.
19 |         """
20 |         try:
21 |             loader = WebBaseLoader(self.link)
22 |             loader.requests_kwargs = {"verify": False}
23 |             docs = loader.load()
24 |             content = ""
25 | 
26 |             for doc in docs:
27 |                 content += doc.page_content
28 | 
29 |             return content
30 | 
31 |         except Exception as e:
32 |             print("Error! : " + str(e))
33 |             return ""
34 | 


--------------------------------------------------------------------------------
/gpt_researcher/utils/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hwchase17/gpt-researcher/df4350939dbe42a059d7cd00aec0ae61388c9e35/gpt_researcher/utils/__init__.py


--------------------------------------------------------------------------------
/gpt_researcher/utils/enum.py:
--------------------------------------------------------------------------------
 1 | from enum import Enum
 2 | class ReportType(Enum):
 3 |     ResearchReport = 'research_report'
 4 |     ResourceReport = 'resource_report'
 5 |     OutlineReport = 'outline_report'
 6 |     CustomReport = 'custom_report'
 7 |     DetailedReport = 'detailed_report'
 8 |     SubtopicReport = 'subtopic_report'
 9 |     
10 | class ReportSource(Enum):
11 |     Web = 'web'
12 |     Local = 'local'
13 | 


--------------------------------------------------------------------------------
/gpt_researcher/utils/llm.py:
--------------------------------------------------------------------------------
  1 | # libraries
  2 | from __future__ import annotations
  3 | 
  4 | import json
  5 | import logging
  6 | from typing import Optional
  7 | 
  8 | from colorama import Fore, Style
  9 | from fastapi import WebSocket
 10 | from langchain.output_parsers import PydanticOutputParser
 11 | from langchain.prompts import PromptTemplate
 12 | from langchain_openai import ChatOpenAI
 13 | 
 14 | from gpt_researcher.master.prompts import auto_agent_instructions, generate_subtopics_prompt
 15 | 
 16 | from .validators import Subtopics
 17 | 
 18 | 
 19 | def get_provider(llm_provider):
 20 |     match llm_provider:
 21 |         case "openai":
 22 |             from ..llm_provider import OpenAIProvider
 23 |             llm_provider = OpenAIProvider
 24 |         case "azureopenai":
 25 |             from ..llm_provider import AzureOpenAIProvider
 26 |             llm_provider = AzureOpenAIProvider
 27 |         case "google":
 28 |             from ..llm_provider import GoogleProvider
 29 |             llm_provider = GoogleProvider
 30 | 
 31 |         case _:
 32 |             raise Exception("LLM provider not found.")
 33 | 
 34 |     return llm_provider
 35 | 
 36 | 
 37 | async def create_chat_completion(
 38 |         messages: list,  # type: ignore
 39 |         model: Optional[str] = None,
 40 |         temperature: float = 1.0,
 41 |         max_tokens: Optional[int] = None,
 42 |         llm_provider: Optional[str] = None,
 43 |         stream: Optional[bool] = False,
 44 |         websocket: WebSocket | None = None,
 45 | ) -> str:
 46 |     """Create a chat completion using the OpenAI API
 47 |     Args:
 48 |         messages (list[dict[str, str]]): The messages to send to the chat completion
 49 |         model (str, optional): The model to use. Defaults to None.
 50 |         temperature (float, optional): The temperature to use. Defaults to 0.9.
 51 |         max_tokens (int, optional): The max tokens to use. Defaults to None.
 52 |         stream (bool, optional): Whether to stream the response. Defaults to False.
 53 |         llm_provider (str, optional): The LLM Provider to use.
 54 |         webocket (WebSocket): The websocket used in the currect request
 55 |     Returns:
 56 |         str: The response from the chat completion
 57 |     """
 58 | 
 59 |     # validate input
 60 |     if model is None:
 61 |         raise ValueError("Model cannot be None")
 62 |     if max_tokens is not None and max_tokens > 8001:
 63 |         raise ValueError(
 64 |             f"Max tokens cannot be more than 8001, but got {max_tokens}")
 65 | 
 66 |     # Get the provider from supported providers
 67 |     ProviderClass = get_provider(llm_provider)
 68 |     provider = ProviderClass(
 69 |         model,
 70 |         temperature,
 71 |         max_tokens
 72 |     )
 73 | 
 74 |     # create response
 75 |     for _ in range(10):  # maximum of 10 attempts
 76 |         response = await provider.get_chat_response(
 77 |             messages, stream, websocket
 78 |         )
 79 |         return response
 80 | 
 81 |     logging.error("Failed to get response from OpenAI API")
 82 |     raise RuntimeError("Failed to get response from OpenAI API")
 83 | 
 84 | 
 85 | def choose_agent(smart_llm_model: str, llm_provider: str, task: str) -> dict:
 86 |     """Determines what server should be used
 87 |     Args:
 88 |         task (str): The research question the user asked
 89 |         smart_llm_model (str): the llm model to be used
 90 |         llm_provider (str): the llm provider used
 91 |     Returns:
 92 |         server - The server that will be used
 93 |         agent_role_prompt (str): The prompt for the server
 94 |     """
 95 |     try:
 96 |         response = create_chat_completion(
 97 |             model=smart_llm_model,
 98 |             messages=[
 99 |                 {"role": "system", "content": f"{auto_agent_instructions()}"},
100 |                 {"role": "user", "content": f"task: {task}"}],
101 |             temperature=0,
102 |             llm_provider=llm_provider
103 |         )
104 |         agent_dict = json.loads(response)
105 |         print(f"Agent: {agent_dict.get('server')}")
106 |         return agent_dict
107 |     except Exception as e:
108 |         print(f"{Fore.RED}Error in choose_agent: {e}{Style.RESET_ALL}")
109 |         return {"server": "Default Agent",
110 |                 "agent_role_prompt": "You are an AI critical thinker research assistant. Your sole purpose is to write well written, critically acclaimed, objective and structured reports on given text."}
111 | 
112 | 
113 | async def construct_subtopics(task: str, data: str, config, subtopics: list = []) -> list:
114 |     try:
115 |         parser = PydanticOutputParser(pydantic_object=Subtopics)
116 | 
117 |         prompt = PromptTemplate(
118 |             template=generate_subtopics_prompt(),
119 |             input_variables=["task", "data", "subtopics", "max_subtopics"],
120 |             partial_variables={
121 |                 "format_instructions": parser.get_format_instructions()},
122 |         )
123 | 
124 |         print(f"\n🤖 Calling {config.smart_llm_model}...\n")
125 | 
126 |         if config.llm_provider == "openai":
127 |             model = ChatOpenAI(model=config.smart_llm_model)
128 |         elif config.llm_provider == "azureopenai":
129 |             from langchain_openai import AzureChatOpenAI
130 |             model = AzureChatOpenAI(model=config.smart_llm_model)
131 |         else:
132 |             return []
133 | 
134 |         chain = prompt | model | parser
135 | 
136 |         output = chain.invoke({
137 |             "task": task,
138 |             "data": data,
139 |             "subtopics": subtopics,
140 |             "max_subtopics": config.max_subtopics
141 |         })
142 | 
143 |         return output
144 | 
145 |     except Exception as e:
146 |         print("Exception in parsing subtopics : ", e)
147 |         return subtopics


--------------------------------------------------------------------------------
/gpt_researcher/utils/validators.py:
--------------------------------------------------------------------------------
 1 | from typing import List
 2 | 
 3 | from pydantic import BaseModel, Field
 4 | 
 5 | class Subtopic(BaseModel):
 6 |     task: str = Field(description="Task name", min_length=1)
 7 | 
 8 | class Subtopics(BaseModel):
 9 |     subtopics: List[Subtopic] = []
10 | 


--------------------------------------------------------------------------------
/main.py:
--------------------------------------------------------------------------------
1 | from backend.server import app
2 | from dotenv import load_dotenv
3 | load_dotenv()
4 | 
5 | if __name__ == "__main__":
6 |     import uvicorn
7 | 
8 |     uvicorn.run(app, host="0.0.0.0", port=8000)


--------------------------------------------------------------------------------
/multi_agents/README.md:
--------------------------------------------------------------------------------
  1 | # LangGraph x GPT Researcher
  2 | [LangGraph](https://python.langchain.com/docs/langgraph) is a library for building stateful, multi-actor applications with LLMs. 
  3 | This example uses Langgraph to automate the process of an in depth research on any given topic.
  4 | 
  5 | ## Use case
  6 | By using Langgraph, the research process can be significantly improved in depth and quality by leveraging multiple agents with specialized skills. 
  7 | Inspired by the recent [STORM](https://arxiv.org/abs/2402.14207) paper, this example showcases how a team of AI agents can work together to conduct research on a given topic, from planning to publication.
  8 | 
  9 | An average run generates a 5-6 page research report in multiple formats such as PDF, Docx and Markdown.
 10 | 
 11 | ## The Multi Agent Team
 12 | The research team is made up of 7 AI agents:
 13 | - **Chief Editor** - Oversees the research process and manages the team. This is the "master" agent that coordinates the other agents using Langgraph.
 14 | - **Researcher** (gpt-researcher) - A specialized autonomous agent that conducts in depth research on a given topic.
 15 | - **Editor** - Responsible for planning the research outline and structure.
 16 | - **Reviewer** - Validates the correctness of the research results given a set of criteria.
 17 | - **Revisor** - Revises the research results based on the feedback from the reviewer.
 18 | - **Writer** - Responsible for compiling and writing the final report.
 19 | - **Publisher** - Responsible for publishing the final report in various formats.
 20 | 
 21 | ## How it works
 22 | Generally, the process is based on the following stages: 
 23 | 1. Planning stage
 24 | 2. Data collection and analysis
 25 | 3. Review and revision
 26 | 4. Writing and submission
 27 | 5. Publication
 28 | 
 29 | ### Architecture
 30 | <div align="center">
 31 | <img align="center" height="600" src="https://cowriter-images.s3.amazonaws.com/gptr-langgraph-architecture.png">
 32 | </div>
 33 | <br clear="all"/>
 34 | 
 35 | ### Steps
 36 | More specifically (as seen in the architecture diagram) the process is as follows:
 37 | - Browser (gpt-researcher) - Browses the internet for initial research based on the given research task.
 38 | - Editor - Plans the report outline and structure based on the initial research.
 39 | - For each outline topic (in parallel):
 40 |   - Researcher (gpt-researcher) - Runs an in depth research on the subtopics and writes a draft.
 41 |   - Reviewer - Validates the correctness of the draft given a set of criteria and provides feedback.
 42 |   - Revisor - Revises the draft until it is satisfactory based on the reviewer feedback.
 43 | - Writer - Compiles and writes the final report including an introduction, conclusion and references section from the given research findings.
 44 | - Publisher - Publishes the final report to multi formats such as PDF, Docx, Markdown, etc.
 45 | 
 46 | ## How to run
 47 | 1. Install required packages:
 48 |     ```bash
 49 |     pip install -r requirements.txt
 50 |     ```
 51 | 3. Update env variables
 52 |    ```bash
 53 |    export OPENAI_API_KEY={Your OpenAI API Key here}
 54 |    export TAVILY_API_KEY={Your Tavily API Key here}
 55 |    ```
 56 | 2. Run the application:
 57 |     ```bash
 58 |     python main.py
 59 |     ```
 60 | 
 61 | ## Usage
 62 | To change the research query and customize the report, edit the `task.json` file in the main directory.
 63 | #### Task.json contains the following fields:
 64 | - `query` - The research query or task.
 65 | - `model` - The OpenAI LLM to use for the agents.
 66 | - `max_sections` - The maximum number of sections in the report. Each section is a subtopic of the research query.
 67 | - `publish_formats` - The formats to publish the report in. The reports will be written in the `output` directory.
 68 | - `follow_guidelines` - If true, the research report will follow the guidelines below. It will take longer to complete. If false, the report will be generated faster but may not follow the guidelines.
 69 | - `guidelines` - A list of guidelines that the report must follow.
 70 | - `verbose` - If true, the application will print detailed logs to the console.
 71 | 
 72 | #### For example:
 73 | ```json
 74 | {
 75 |   "query": "Is AI in a hype cycle?",
 76 |   "model": "gpt-4o",
 77 |   "max_sections": 3, 
 78 |   "publish_formats": { 
 79 |     "markdown": true,
 80 |     "pdf": true,
 81 |     "docx": true
 82 |   },
 83 |   "follow_guidelines": true,
 84 |   "guidelines": [
 85 |     "The report MUST fully answer the original question",
 86 |     "The report MUST be written in apa format",
 87 |     "The report MUST be written in english"
 88 |   ],
 89 |   "verbose": true
 90 | }
 91 | ```
 92 | 
 93 | ## To Deploy
 94 | 
 95 | ```shell
 96 | pip install langgraph-cli
 97 | langgraph up
 98 | ```
 99 | 
100 | From there, see documentation [here](https://github.com/langchain-ai/langgraph-example) on how to use the streaming and async endpoints, as well as the playground.


--------------------------------------------------------------------------------
/multi_agents/agent.py:
--------------------------------------------------------------------------------
 1 | from agents import ChiefEditorAgent
 2 | 
 3 | chief_editor = ChiefEditorAgent({
 4 |   "query": "Is AI in a hype cycle?",
 5 |   "max_sections": 3,
 6 |   "follow_guidelines": False,
 7 |   "model": "gpt-4o",
 8 |   "guidelines": [
 9 |     "The report MUST be written in APA format",
10 |     "Each sub section MUST include supporting sources using hyperlinks. If none exist, erase the sub section or rewrite it to be a part of the previous section",
11 |     "The report MUST be written in spanish"
12 |   ],
13 |   "verbose": False
14 | })
15 | graph = chief_editor.init_research_team()
16 | graph = graph.compile()
17 | 


--------------------------------------------------------------------------------
/multi_agents/agents/__init__.py:
--------------------------------------------------------------------------------
 1 | from .researcher import ResearchAgent
 2 | from .writer import WriterAgent
 3 | from .publisher import PublisherAgent
 4 | from .reviser import ReviserAgent
 5 | from .reviewer import ReviewerAgent
 6 | from .editor import EditorAgent
 7 | from .master import ChiefEditorAgent
 8 | 
 9 | __all__ = [
10 |     "ChiefEditorAgent",
11 |     "ResearchAgent",
12 |     "WriterAgent",
13 |     "EditorAgent",
14 |     "PublisherAgent",
15 |     "ReviserAgent",
16 |     "ReviewerAgent"
17 | ]


--------------------------------------------------------------------------------
/multi_agents/agents/editor.py:
--------------------------------------------------------------------------------
 1 | from datetime import datetime
 2 | from .utils.views import print_agent_output
 3 | from .utils.llms import call_model
 4 | from langgraph.graph import StateGraph, END
 5 | import asyncio
 6 | import json
 7 | 
 8 | from memory.draft import DraftState
 9 | from . import \
10 |     ResearchAgent, \
11 |     ReviewerAgent, \
12 |     ReviserAgent
13 | 
14 | 
15 | class EditorAgent:
16 |     def __init__(self):
17 |         pass
18 | 
19 |     def plan_research(self, research_state: dict):
20 |         """
21 |         Curate relevant sources for a query
22 |         :param summary_report:
23 |         :return:
24 |         :param total_sub_headers:
25 |         :return:
26 |         """
27 | 
28 |         initial_research = research_state.get("initial_research")
29 |         task = research_state.get("task")
30 |         max_sections = task.get("max_sections")
31 |         prompt = [{
32 |             "role": "system",
33 |             "content": "You are a research director. Your goal is to oversee the research project"
34 |                        " from inception to completion.\n "
35 |         }, {
36 |             "role": "user",
37 |             "content": f"Today's date is {datetime.now().strftime('%d/%m/%Y')}\n."
38 |                        f"Research summary report: '{initial_research}'\n\n"
39 |                        f"Your task is to generate an outline of sections headers for the research project"
40 |                        f" based on the research summary report above.\n"
41 |                        f"You must generate a maximum of {max_sections} section headers.\n"
42 |                        f"You must focus ONLY on related research topics for subheaders and do NOT include introduction, conclusion and references.\n"
43 |                        f"You must return nothing but a JSON with the fields 'title' (str) and "
44 |                        f"'sections' (maximum {max_sections} section headers) with the following structure: "
45 |                        f"'{{title: string research title, date: today's date, "
46 |                        f"sections: ['section header 1', 'section header 2', 'section header 3' ...]}}.\n "
47 |         }]
48 | 
49 |         print_agent_output(f"Planning an outline layout based on initial research...", agent="EDITOR")
50 |         response = call_model(prompt=prompt, model=task.get("model"), response_format="json")
51 |         plan = json.loads(response)
52 | 
53 |         return {
54 |             "title": plan.get("title"),
55 |             "date": plan.get("date"),
56 |             "sections": plan.get("sections")
57 |         }
58 | 
59 |     async def run_parallel_research(self, research_state: dict):
60 |         research_agent = ResearchAgent()
61 |         reviewer_agent = ReviewerAgent()
62 |         reviser_agent = ReviserAgent()
63 |         queries = research_state.get("sections")
64 |         title = research_state.get("title")
65 |         workflow = StateGraph(DraftState)
66 | 
67 |         workflow.add_node("researcher", research_agent.run_depth_research)
68 |         workflow.add_node("reviewer", reviewer_agent.run)
69 |         workflow.add_node("reviser", reviser_agent.run)
70 | 
71 |         # set up edges researcher->reviewer->reviser->reviewer...
72 |         workflow.set_entry_point("researcher")
73 |         workflow.add_edge('researcher', 'reviewer')
74 |         workflow.add_edge('reviser', 'reviewer')
75 |         workflow.add_conditional_edges('reviewer',
76 |                                        (lambda draft: "accept" if draft['review'] is None else "revise"),
77 |                                        {"accept": END, "revise": "reviser"})
78 | 
79 |         chain = workflow.compile()
80 | 
81 |         # Execute the graph for each query in parallel
82 |         print_agent_output(f"Running the following research tasks in parallel: {queries}...", agent="EDITOR")
83 |         final_drafts = [chain.ainvoke({"task": research_state.get("task"), "topic": query, "title": title})
84 |                         for query in queries]
85 |         research_results = [result['draft'] for result in await asyncio.gather(*final_drafts)]
86 | 
87 |         return {"research_data": research_results}
88 | 


--------------------------------------------------------------------------------
/multi_agents/agents/master.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import time
 3 | from langgraph.graph import StateGraph, END
 4 | from .utils.views import print_agent_output
 5 | from memory.research import ResearchState
 6 | 
 7 | # Import agent classes
 8 | from . import \
 9 |     WriterAgent, \
10 |     EditorAgent, \
11 |     PublisherAgent, \
12 |     ResearchAgent
13 | 
14 | 
15 | class ChiefEditorAgent:
16 |     def __init__(self, task: dict):
17 |         self.task_id = int(time.time()) # Currently time based, but can be any unique identifier
18 |         self.output_dir = f"./outputs/run_{self.task_id}_{task.get('query')[0:40]}"
19 |         self.task = task
20 |         os.makedirs(self.output_dir, exist_ok=True)
21 | 
22 |     def init_research_team(self):
23 |         # Initialize agents
24 |         writer_agent = WriterAgent()
25 |         editor_agent = EditorAgent()
26 |         research_agent = ResearchAgent()
27 |         publisher_agent = PublisherAgent(self.output_dir)
28 | 
29 |         # Define a Langchain StateGraph with the ResearchState
30 |         workflow = StateGraph(ResearchState)
31 | 
32 |         # Add nodes for each agent
33 |         workflow.add_node("browser", research_agent.run_initial_research)
34 |         workflow.add_node("planner", editor_agent.plan_research)
35 |         workflow.add_node("researcher", editor_agent.run_parallel_research)
36 |         workflow.add_node("writer", writer_agent.run)
37 |         workflow.add_node("publisher", publisher_agent.run)
38 | 
39 |         workflow.add_edge('browser', 'planner')
40 |         workflow.add_edge('planner', 'researcher')
41 |         workflow.add_edge('researcher', 'writer')
42 |         workflow.add_edge('writer', 'publisher')
43 | 
44 |         # set up start and end nodes
45 |         workflow.set_entry_point("browser")
46 |         workflow.add_edge('publisher', END)
47 | 
48 |         return workflow
49 | 
50 |     async def run_research_task(self):
51 |         research_team = self.init_research_team()
52 | 
53 |         # compile the graph
54 |         chain = research_team.compile()
55 | 
56 |         print_agent_output(f"Starting the research process for query '{self.task.get('query')}'...", "MASTER")
57 |         result = await chain.ainvoke({"task": self.task})
58 | 
59 |         return result
60 | 


--------------------------------------------------------------------------------
/multi_agents/agents/publisher.py:
--------------------------------------------------------------------------------
 1 | from .utils.file_formats import \
 2 |     write_md_to_pdf, \
 3 |     write_md_to_word, \
 4 |     write_text_to_md
 5 | 
 6 | from .utils.views import print_agent_output
 7 | 
 8 | 
 9 | class PublisherAgent:
10 |     def __init__(self, output_dir: str):
11 |         self.output_dir = output_dir
12 | 
13 |     async def publish_research_report(self, research_state: dict, publish_formats: dict):
14 |         layout = self.generate_layout(research_state)
15 |         await self.write_report_by_formats(layout, publish_formats)
16 | 
17 |         return layout
18 | 
19 |     def generate_layout(self, research_state: dict):
20 |         sections = '\n\n'.join(f"{value}"
21 |                                  for subheader in research_state.get("research_data")
22 |                                  for key, value in subheader.items())
23 |         references = '\n'.join(f"{reference}" for reference in research_state.get("sources"))
24 |         headers = research_state.get("headers")
25 |         layout = f"""# {headers.get('title')}
26 | #### {headers.get("date")}: {research_state.get('date')}
27 | 
28 | ## {headers.get("introduction")}
29 | {research_state.get('introduction')}
30 | 
31 | ## {headers.get("table_of_contents")}
32 | {research_state.get('table_of_contents')}
33 | 
34 | {sections}
35 | 
36 | ## {headers.get("conclusion")}
37 | {research_state.get('conclusion')}
38 | 
39 | ## {headers.get("references")}
40 | {references}
41 | """
42 |         return layout
43 | 
44 |     async def write_report_by_formats(self, layout:str, publish_formats: dict):
45 |         if publish_formats.get("pdf"):
46 |             await write_md_to_pdf(layout, self.output_dir)
47 |         if publish_formats.get("docx"):
48 |             await write_md_to_word(layout, self.output_dir)
49 |         if publish_formats.get("markdown"):
50 |             await write_text_to_md(layout, self.output_dir)
51 | 
52 |     async def run(self, research_state: dict):
53 |         task = research_state.get("task")
54 |         publish_formats = task.get("publish_formats")
55 |         print_agent_output(output="Publishing final research report based on retrieved data...", agent="PUBLISHER")
56 |         final_research_report = await self.publish_research_report(research_state, publish_formats)
57 |         return {"report": final_research_report}
58 | 


--------------------------------------------------------------------------------
/multi_agents/agents/researcher.py:
--------------------------------------------------------------------------------
 1 | from gpt_researcher import GPTResearcher
 2 | from colorama import Fore, Style
 3 | from .utils.views import print_agent_output
 4 | 
 5 | 
 6 | class ResearchAgent:
 7 |     def __init__(self):
 8 |         pass
 9 | 
10 |     async def research(self, query: str, research_report: str = "research_report", parent_query: str = "", verbose=True):
11 |         # Initialize the researcher
12 |         researcher = GPTResearcher(query=query, report_type=research_report, parent_query=parent_query, verbose=verbose)
13 |         # Conduct research on the given query
14 |         await researcher.conduct_research()
15 |         # Write the report
16 |         report = await researcher.write_report()
17 | 
18 |         return report
19 | 
20 |     async def run_subtopic_research(self, parent_query: str, subtopic: str, verbose: bool = True):
21 |         try:
22 |             report = await self.research(parent_query=parent_query, query=subtopic,
23 |                                          research_report="subtopic_report", verbose=verbose)
24 |         except Exception as e:
25 |             print(f"{Fore.RED}Error in researching topic {subtopic}: {e}{Style.RESET_ALL}")
26 |             report = None
27 |         return {subtopic: report}
28 | 
29 |     async def run_initial_research(self, research_state: dict):
30 |         task = research_state.get("task")
31 |         query = task.get("query")
32 |         print_agent_output(f"Running initial research on the following query: {query}", agent="RESEARCHER")
33 |         return {"task": task, "initial_research": await self.research(query=query, verbose=task.get("verbose"))}
34 | 
35 |     async def run_depth_research(self, draft_state: dict):
36 |         task = draft_state.get("task")
37 |         topic = draft_state.get("topic")
38 |         parent_query = task.get("query")
39 |         verbose = task.get("verbose")
40 |         print_agent_output(f"Running in depth research on the following report topic: {topic}", agent="RESEARCHER")
41 |         research_draft = await self.run_subtopic_research(parent_query, topic, verbose)
42 |         return {"draft": research_draft}
43 | 


--------------------------------------------------------------------------------
/multi_agents/agents/reviewer.py:
--------------------------------------------------------------------------------
 1 | from .utils.views import print_agent_output
 2 | from .utils.llms import call_model
 3 | 
 4 | TEMPLATE = """You are an expert research article reviewer. \
 5 | Your goal is to review research drafts and provide feedback to the reviser only based on specific guidelines. \
 6 | """
 7 | 
 8 | class ReviewerAgent:
 9 |     def __init__(self):
10 |         pass
11 | 
12 |     def review_draft(self, draft_state: dict):
13 |         """
14 |         Review a draft article
15 |         :param draft_state:
16 |         :return:
17 |         """
18 |         task = draft_state.get("task")
19 |         guidelines = '- '.join(guideline for guideline in task.get("guidelines"))
20 |         revision_notes = draft_state.get("revision_notes")
21 | 
22 |         revise_prompt = f"""The reviser has already revised the draft based on your previous review notes with the following feedback:
23 | {revision_notes}\n
24 | Please provide additional feedback ONLY if critical since the reviser has already made changes based on your previous feedback.
25 | If you think the article is sufficient or that non critical revisions are required, please aim to return None.
26 | """
27 | 
28 |         review_prompt = f"""You have been tasked with reviewing the draft which was written by a non-expert based on specific guidelines.
29 | Please accept the draft if it is good enough to publish, or send it for revision, along with your notes to guide the revision.
30 | If not all of the guideline criteria are met, you should send appropriate revision notes.
31 | If the draft meets all the guidelines, please return None.
32 | {revise_prompt if revision_notes else ""}
33 | 
34 | Guidelines: {guidelines}\nDraft: {draft_state.get("draft")}\n
35 | """
36 |         prompt = [{
37 |             "role": "system",
38 |             "content": TEMPLATE
39 |         }, {
40 |             "role": "user",
41 |             "content": review_prompt
42 |         }]
43 | 
44 |         response = call_model(prompt, model=task.get("model"))
45 | 
46 |         if task.get("verbose"):
47 |             print_agent_output(f"Review feedback is: {response}...", agent="REVIEWER")
48 | 
49 |         if 'None' in response:
50 |             return None
51 |         return response
52 | 
53 |     def run(self, draft_state: dict):
54 |         task = draft_state.get("task")
55 |         guidelines = task.get("guidelines")
56 |         to_follow_guidelines = task.get("follow_guidelines")
57 |         review = None
58 |         if to_follow_guidelines:
59 |             print_agent_output(f"Reviewing draft...", agent="REVIEWER")
60 | 
61 |             if task.get("verbose"):
62 |                 print_agent_output(f"Following guidelines {guidelines}...", agent="REVIEWER")
63 | 
64 |             review = self.review_draft(draft_state)
65 |         else:
66 |             print_agent_output(f"Ignoring guidelines...", agent="REVIEWER")
67 |         return {"review": review}
68 | 


--------------------------------------------------------------------------------
/multi_agents/agents/reviser.py:
--------------------------------------------------------------------------------
 1 | from .utils.views import print_agent_output
 2 | from .utils.llms import call_model
 3 | import json
 4 | 
 5 | sample_revision_notes = """
 6 | {
 7 |   "draft": { 
 8 |     draft title: The revised draft that you are submitting for review 
 9 |   },
10 |   "revision_notes": Your message to the reviewer about the changes you made to the draft based on their feedback
11 | }
12 | """
13 | 
14 | class ReviserAgent:
15 |     def __init__(self):
16 |         pass
17 | 
18 |     def revise_draft(self, draft_state: dict):
19 |         """
20 |         Review a draft article
21 |         :param draft_state:
22 |         :return:
23 |         """
24 |         review = draft_state.get("review")
25 |         task = draft_state.get("task")
26 |         draft_report = draft_state.get("draft")
27 |         prompt = [{
28 |             "role": "system",
29 |             "content": "You are an expert writer. Your goal is to revise drafts based on reviewer notes."
30 |         }, {
31 |             "role": "user",
32 |             "content": f"""Draft:\n{draft_report}" + "Reviewer's notes:\n{review}\n\n
33 | You have been tasked by your reviewer with revising the following draft, which was written by a non-expert.
34 | If you decide to follow the reviewer's notes, please write a new draft and make sure to address all of the points they raised.
35 | Please keep all other aspects of the draft the same.
36 | You MUST return nothing but a JSON in the following format:
37 | {sample_revision_notes}
38 | """
39 |         }]
40 | 
41 |         response = call_model(prompt, model=task.get("model"), response_format='json')
42 |         return json.loads(response)
43 | 
44 |     def run(self, draft_state: dict):
45 |         print_agent_output(f"Rewriting draft based on feedback...", agent="REVISOR")
46 |         revision = self.revise_draft(draft_state)
47 | 
48 |         if draft_state.get("task").get("verbose"):
49 |             print_agent_output(f"Revision notes: {revision.get('revision_notes')}", agent="REVISOR")
50 | 
51 |         return {"draft": revision.get("draft"),
52 |                 "revision_notes": revision.get("revision_notes")}
53 | 


--------------------------------------------------------------------------------
/multi_agents/agents/utils/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hwchase17/gpt-researcher/df4350939dbe42a059d7cd00aec0ae61388c9e35/multi_agents/agents/utils/__init__.py


--------------------------------------------------------------------------------
/multi_agents/agents/utils/file_formats.py:
--------------------------------------------------------------------------------
 1 | import aiofiles
 2 | import urllib
 3 | import uuid
 4 | import mistune
 5 | 
 6 | 
 7 | async def write_to_file(filename: str, text: str) -> None:
 8 |     """Asynchronously write text to a file in UTF-8 encoding.
 9 | 
10 |     Args:
11 |         filename (str): The filename to write to.
12 |         text (str): The text to write.
13 |     """
14 |     # Convert text to UTF-8, replacing any problematic characters
15 |     text_utf8 = text.encode('utf-8', errors='replace').decode('utf-8')
16 | 
17 |     async with aiofiles.open(filename, "w", encoding='utf-8') as file:
18 |         await file.write(text_utf8)
19 | 
20 | 
21 | async def write_text_to_md(text: str, path: str) -> str:
22 |     """Writes text to a Markdown file and returns the file path.
23 | 
24 |     Args:
25 |         text (str): Text to write to the Markdown file.
26 | 
27 |     Returns:
28 |         str: The file path of the generated Markdown file.
29 |     """
30 |     task = uuid.uuid4().hex
31 |     file_path = f"{path}/{task}.md"
32 |     await write_to_file(file_path, text)
33 |     print(f"Report written to {file_path}")
34 |     return file_path
35 | 
36 | 
37 | async def write_md_to_pdf(text: str, path: str) -> str:
38 |     """Converts Markdown text to a PDF file and returns the file path.
39 | 
40 |     Args:
41 |         text (str): Markdown text to convert.
42 | 
43 |     Returns:
44 |         str: The encoded file path of the generated PDF.
45 |     """
46 |     task = uuid.uuid4().hex
47 |     file_path = f"{path}/{task}.pdf"
48 | 
49 |     try:
50 |         # Moved imports to inner function to avoid known import errors with gobject-2.0
51 |         from md2pdf.core import md2pdf
52 |         md2pdf(file_path,
53 |                md_content=text,
54 |                # md_file_path=f"{file_path}.md",
55 |                css_file_path="./agents/utils/pdf_styles.css",
56 |                base_url=None)
57 |         print(f"Report written to {file_path}")
58 |     except Exception as e:
59 |         print(f"Error in converting Markdown to PDF: {e}")
60 |         return ""
61 | 
62 |     encoded_file_path = urllib.parse.quote(file_path)
63 |     return encoded_file_path
64 | 
65 | 
66 | async def write_md_to_word(text: str, path: str) -> str:
67 |     """Converts Markdown text to a DOCX file and returns the file path.
68 | 
69 |     Args:
70 |         text (str): Markdown text to convert.
71 | 
72 |     Returns:
73 |         str: The encoded file path of the generated DOCX.
74 |     """
75 |     task = uuid.uuid4().hex
76 |     file_path = f"{path}/{task}.docx"
77 | 
78 |     try:
79 |         from htmldocx import HtmlToDocx
80 |         from docx import Document
81 |         # Convert report markdown to HTML
82 |         html = mistune.html(text)
83 |         # Create a document object
84 |         doc = Document()
85 |         # Convert the html generated from the report to document format
86 |         HtmlToDocx().add_html_to_document(html, doc)
87 | 
88 |         # Saving the docx document to file_path
89 |         doc.save(file_path)
90 | 
91 |         print(f"Report written to {file_path}")
92 | 
93 |         encoded_file_path = urllib.parse.quote(f"{file_path}.docx")
94 |         return encoded_file_path
95 | 
96 |     except Exception as e:
97 |         print(f"Error in converting Markdown to DOCX: {e}")
98 |         return ""
99 | 


--------------------------------------------------------------------------------
/multi_agents/agents/utils/llms.py:
--------------------------------------------------------------------------------
 1 | from langchain.adapters.openai import convert_openai_messages
 2 | from langchain_openai import ChatOpenAI
 3 | 
 4 | 
 5 | def call_model(prompt: list, model: str, max_retries: int = 2, response_format: str = None) -> str:
 6 | 
 7 |     optional_params = {}
 8 |     if response_format == 'json':
 9 |         optional_params = {
10 |             "response_format": {"type": "json_object"}
11 |         }
12 | 
13 |     lc_messages = convert_openai_messages(prompt)
14 |     response = ChatOpenAI(model=model, max_retries=max_retries, model_kwargs=optional_params).invoke(lc_messages).content
15 |     return response


--------------------------------------------------------------------------------
/multi_agents/agents/utils/pdf_styles.css:
--------------------------------------------------------------------------------
 1 | body {
 2 |     font-family: 'Libre Baskerville', serif;
 3 |     font-size: 12pt; /* standard size for academic papers */
 4 |     line-height: 1.6; /* for readability */
 5 |     color: #333; /* softer on the eyes than black */
 6 |     background-color: #fff; /* white background */
 7 |     margin: 0;
 8 |     padding: 0;
 9 | }
10 | 
11 | h1, h2, h3, h4, h5, h6 {
12 |     font-family: 'Libre Baskerville', serif;
13 |     color: #000; /* darker than the body text */
14 |     margin-top: 1em; /* space above headers */
15 | }
16 | 
17 | h1 {
18 |     font-size: 2em; /* make h1 twice the size of the body text */
19 | }
20 | 
21 | h2 {
22 |     font-size: 1.5em;
23 | }
24 | 
25 | /* Add some space between paragraphs */
26 | p {
27 |     margin-bottom: 1em;
28 | }
29 | 
30 | /* Style for blockquotes, often used in academic papers */
31 | blockquote {
32 |     font-style: italic;
33 |     margin: 1em 0;
34 |     padding: 1em;
35 |     background-color: #f9f9f9; /* a light grey background */
36 | }
37 | 
38 | /* You might want to style tables, figures, etc. too */
39 | table {
40 |     border-collapse: collapse;
41 |     width: 100%;
42 | }
43 | 
44 | table, th, td {
45 |     border: 1px solid #ddd;
46 |     text-align: left;
47 |     padding: 8px;
48 | }
49 | 
50 | th {
51 |     background-color: #f2f2f2;
52 |     color: black;
53 | }


--------------------------------------------------------------------------------
/multi_agents/agents/utils/views.py:
--------------------------------------------------------------------------------
 1 | from colorama import Fore, Style
 2 | from enum import Enum
 3 | 
 4 | 
 5 | class AgentColor(Enum):
 6 |     RESEARCHER = Fore.LIGHTBLUE_EX
 7 |     EDITOR = Fore.YELLOW
 8 |     WRITER = Fore.LIGHTGREEN_EX
 9 |     PUBLISHER = Fore.MAGENTA
10 |     REVIEWER = Fore.CYAN
11 |     REVISOR = Fore.LIGHTWHITE_EX
12 |     MASTER = Fore.LIGHTYELLOW_EX
13 | 
14 | 
15 | def print_agent_output(output:str, agent: str="RESEARCHER"):
16 |     print(f"{AgentColor[agent].value}{agent}: {output}{Style.RESET_ALL}")


--------------------------------------------------------------------------------
/multi_agents/agents/writer.py:
--------------------------------------------------------------------------------
 1 | from datetime import datetime
 2 | import json5 as json
 3 | from .utils.views import print_agent_output
 4 | from .utils.llms import call_model
 5 | 
 6 | sample_json = """
 7 | {
 8 |   "table_of_contents": A table of contents in markdown syntax (using '-') based on the research headers and subheaders,
 9 |   "introduction": An indepth introduction to the topic in markdown syntax and hyperlink references to relevant sources,
10 |   "conclusion": A conclusion to the entire research based on all research data in markdown syntax and hyperlink references to relevant sources,
11 |   "sources": A list with strings of all used source links in the entire research data in markdown syntax and apa citation format. For example: ['-  Title, year, Author [source url](source)', ...]
12 | }
13 | """
14 | 
15 | 
16 | class WriterAgent:
17 |     def __init__(self):
18 |         pass
19 | 
20 |     def get_headers(self, research_state: dict):
21 |         return {
22 |             "title": research_state.get("title"),
23 |             "date": "Date",
24 |             "introduction": "Introduction",
25 |             "table_of_contents": "Table of Contents",
26 |             "conclusion": "Conclusion",
27 |             "references": "References"
28 |         }
29 | 
30 |     def write_sections(self, research_state: dict):
31 |         query = research_state.get("title")
32 |         data = research_state.get("research_data")
33 |         task = research_state.get("task")
34 |         follow_guidelines = task.get("follow_guidelines")
35 |         guidelines = task.get("guidelines")
36 | 
37 |         prompt = [{
38 |             "role": "system",
39 |             "content": "You are a research writer. Your sole purpose is to write a well-written "
40 |                        "research reports about a "
41 |                        "topic based on research findings and information.\n "
42 |         }, {
43 |             "role": "user",
44 |             "content": f"Today's date is {datetime.now().strftime('%d/%m/%Y')}\n."
45 |                        f"Query or Topic: {query}\n"
46 |                        f"Research data: {str(data)}\n"
47 |                        f"Your task is to write an in depth, well written and detailed "
48 |                        f"introduction and conclusion to the research report based on the provided research data. "
49 |                        f"Do not include headers in the results.\n"
50 |                        f"You MUST include any relevant sources to the introduction and conclusion as markdown hyperlinks -"
51 |                        f"For example: 'This is a sample text. ([url website](url))'\n\n"
52 |                        f"{f'You must follow the guidelines provided: {guidelines}' if follow_guidelines else ''}\n"
53 |                        f"You MUST return nothing but a JSON in the following format (without json markdown):\n"
54 |                        f"{sample_json}\n\n"
55 | 
56 |         }]
57 | 
58 |         response = call_model(prompt, task.get("model"), max_retries=2, response_format='json')
59 |         return json.loads(response)
60 | 
61 |     def revise_headers(self, task: dict, headers: dict):
62 |         prompt = [{
63 |             "role": "system",
64 |             "content": """You are a research writer. 
65 | Your sole purpose is to revise the headers data based on the given guidelines."""
66 |         }, {
67 |             "role": "user",
68 |             "content": f"""Your task is to revise the given headers JSON based on the guidelines given.
69 | You are to follow the guidelines but the values should be in simple strings, ignoring all markdown syntax.
70 | You must return nothing but a JSON in the same format as given in headers data.
71 | Guidelines: {task.get("guidelines")}\n
72 | Headers Data: {headers}\n
73 | """
74 | 
75 |         }]
76 | 
77 |         response = call_model(prompt, task.get("model"), response_format='json')
78 |         return {"headers": json.loads(response)}
79 | 
80 |     def run(self, research_state: dict):
81 |         print_agent_output(f"Writing final research report based on research data...", agent="WRITER")
82 |         research_layout_content = self.write_sections(research_state)
83 | 
84 |         if research_state.get("task").get("verbose"):
85 |             print_agent_output(research_layout_content, agent="WRITER")
86 | 
87 |         headers = self.get_headers(research_state)
88 |         if research_state.get("task").get("follow_guidelines"):
89 |             print_agent_output("Rewriting layout based on guidelines...", agent="WRITER")
90 |             headers = self.revise_headers(task=research_state.get("task"), headers=headers).get("headers")
91 | 
92 |         return {**research_layout_content, "headers": headers}
93 | 


--------------------------------------------------------------------------------
/multi_agents/langgraph.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "python_version": "3.11",
 3 |   "dependencies": [
 4 |     "."
 5 |   ],
 6 |   "graphs": {
 7 |     "agent": "./agent.py:graph"
 8 |   },
 9 |   "env": ".env"
10 | }


--------------------------------------------------------------------------------
/multi_agents/main.py:
--------------------------------------------------------------------------------
 1 | from dotenv import load_dotenv
 2 | from agents import ChiefEditorAgent
 3 | import asyncio
 4 | import json
 5 | import os
 6 | 
 7 | # Run with LangSmith if API key is set
 8 | if os.environ.get("LANGCHAIN_API_KEY"):
 9 |     os.environ["LANGCHAIN_TRACING_V2"] = "true"
10 | load_dotenv()
11 | 
12 | 
13 | def open_task():
14 |     with open('task.json', 'r') as f:
15 |         task = json.load(f)
16 | 
17 |     if not task:
18 |         raise Exception("No task provided. Please include a task.json file in the root directory.")
19 | 
20 |     return task
21 | 
22 | 
23 | async def main():
24 |     task = open_task()
25 | 
26 |     chief_editor = ChiefEditorAgent(task)
27 |     research_report = await chief_editor.run_research_task()
28 | 
29 |     return research_report
30 | 
31 | if __name__ == "__main__":
32 |     asyncio.run(main())
33 | 


--------------------------------------------------------------------------------
/multi_agents/memory/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hwchase17/gpt-researcher/df4350939dbe42a059d7cd00aec0ae61388c9e35/multi_agents/memory/__init__.py


--------------------------------------------------------------------------------
/multi_agents/memory/draft.py:
--------------------------------------------------------------------------------
 1 | from typing import TypedDict, List, Annotated
 2 | import operator
 3 | 
 4 | 
 5 | class DraftState(TypedDict):
 6 |     task: dict
 7 |     topic: str
 8 |     draft: dict
 9 |     review: str
10 |     revision_notes: str


--------------------------------------------------------------------------------
/multi_agents/memory/research.py:
--------------------------------------------------------------------------------
 1 | from typing import TypedDict, List, Annotated
 2 | import operator
 3 | 
 4 | 
 5 | class ResearchState(TypedDict):
 6 |     task: dict
 7 |     initial_research: str
 8 |     sections: List[str]
 9 |     research_data: List[dict]
10 |     # Report layout
11 |     title: str
12 |     headers: dict
13 |     date: str
14 |     table_of_contents: str
15 |     introduction: str
16 |     conclusion: str
17 |     sources: List[str]
18 |     report: str
19 | 
20 | 
21 | 


--------------------------------------------------------------------------------
/multi_agents/requirements.txt:
--------------------------------------------------------------------------------
1 | langgraph
2 | gpt_researcher
3 | langchain-community
4 | python-dotenv
5 | weasyprint
6 | json5
7 | 


--------------------------------------------------------------------------------
/multi_agents/task.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "query": "Is AI in a hype cycle?",
 3 |   "max_sections": 3,
 4 |   "publish_formats": {
 5 |     "markdown": true,
 6 |     "pdf": true,
 7 |     "docx": true
 8 |   },
 9 |   "follow_guidelines": false,
10 |   "model": "gpt-4o",
11 |   "guidelines": [
12 |     "The report MUST be written in APA format",
13 |     "Each sub section MUST include supporting sources using hyperlinks. If none exist, erase the sub section or rewrite it to be a part of the previous section",
14 |     "The report MUST be written in spanish"
15 |   ],
16 |   "verbose": true
17 | }


--------------------------------------------------------------------------------
/poetry.toml:
--------------------------------------------------------------------------------
1 | [virtualenvs]
2 | in-project = true


--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
 1 | [tool.poetry]
 2 | name = "gpt-researcher"
 3 | version = "0.0.5"
 4 | description = "GPT Researcher is an autonomous agent designed for comprehensive online research on a variety of tasks."
 5 | authors = ["Tavily <support@tavily.com>"]
 6 | license = "MIT"
 7 | readme = "README.md"
 8 | 
 9 | [tool.poetry.dependencies]
10 | python = ">=3.10,<3.12"
11 | beautifulsoup4 = ">=4.12.2"
12 | colorama = ">=0.4.6"
13 | duckduckgo_search = ">=4.1.1"
14 | md2pdf = ">=1.0.1"
15 | openai = ">=1.3.3"
16 | playwright = ">=1.39.0"
17 | python-dotenv = ">=1.0.0"
18 | pyyaml = ">=6.0.1"
19 | uvicorn = ">=0.24.0.post1"
20 | pydantic = ">=2.5.1"
21 | fastapi = ">=0.104.1"
22 | python-multipart = ">=0.0.6"
23 | markdown = ">=3.5.1"
24 | langchain = ">=0.0.350"
25 | langgraph = ">=0.0.29"
26 | tavily-python = ">=0.2.8"
27 | permchain = ">=0.0.6"
28 | arxiv = ">=2.0.0"
29 | PyMuPDF = ">=1.23.6"
30 | requests = ">=2.31.0"
31 | jinja2 = ">=3.1.2"
32 | aiofiles = ">=23.2.1"
33 | newspaper3k = ">=0.2.8"
34 | langchain_community = ">=0.0.28"
35 | SQLAlchemy = ">=2.0.28"
36 | mistune = "^3.0.2"
37 | htmldocx = "^0.0.6"
38 | python-docx = "^1.1.0"
39 | langchain-openai = "^0.1.1"
40 | langchain-google-genai = "^0.0.11"
41 | lxml = { version = ">=4.9.2", extras = ["html_clean"] }
42 | unstructured = "^0.13.0"
43 | 
44 | [build-system]
45 | requires = ["poetry-core"]
46 | build-backend = "poetry.core.masonry.api"
47 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | # dependencies
 2 | beautifulsoup4
 3 | colorama
 4 | duckduckgo_search
 5 | yahoo-search-py
 6 | md2pdf
 7 | playwright
 8 | openai
 9 | python-dotenv
10 | pyyaml
11 | uvicorn
12 | pydantic
13 | fastapi
14 | python-multipart
15 | markdown
16 | langchain
17 | langchain-openai
18 | langchain-google-genai
19 | langchain_community
20 | tavily-python
21 | arxiv
22 | PyMuPDF
23 | requests
24 | jinja2
25 | aiofiles
26 | newspaper3k
27 | SQLAlchemy
28 | mistune
29 | python-docx
30 | htmldocx
31 | lxml[html_clean]
32 | websockets
33 | unstructured
34 | pytest
35 | pytest-asyncio


--------------------------------------------------------------------------------
/scraping/js/overlay.js:
--------------------------------------------------------------------------------
 1 | const overlay = document.createElement('div');
 2 | Object.assign(overlay.style, {
 3 |     position: 'fixed',
 4 |     zIndex: 999999,
 5 |     top: 0,
 6 |     left: 0,
 7 |     width: '100%',
 8 |     height: '100%',
 9 |     background: 'rgba(0, 0, 0, 0.7)',
10 |     color: '#fff',
11 |     fontSize: '24px',
12 |     fontWeight: 'bold',
13 |     display: 'flex',
14 |     justifyContent: 'center',
15 |     alignItems: 'center',
16 | });
17 | const textContent = document.createElement('div');
18 | Object.assign(textContent.style, {
19 |     textAlign: 'center',
20 | });
21 | textContent.textContent = 'Tavily AI: Analyzing Page';
22 | overlay.appendChild(textContent);
23 | document.body.append(overlay);
24 | document.body.style.overflow = 'hidden';
25 | let dotCount = 0;
26 | setInterval(() => {
27 |     textContent.textContent = 'Tavily AI: Analyzing Page' + '.'.repeat(dotCount);
28 |     dotCount = (dotCount + 1) % 4;
29 | }, 1000);
30 | 


--------------------------------------------------------------------------------
/scraping/processing/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hwchase17/gpt-researcher/df4350939dbe42a059d7cd00aec0ae61388c9e35/scraping/processing/__init__.py


--------------------------------------------------------------------------------
/scraping/processing/html.py:
--------------------------------------------------------------------------------
 1 | """HTML processing functions"""
 2 | from __future__ import annotations
 3 | 
 4 | from bs4 import BeautifulSoup
 5 | from requests.compat import urljoin
 6 | 
 7 | 
 8 | def extract_hyperlinks(soup: BeautifulSoup, base_url: str) -> list[tuple[str, str]]:
 9 |     """Extract hyperlinks from a BeautifulSoup object
10 | 
11 |     Args:
12 |         soup (BeautifulSoup): The BeautifulSoup object
13 |         base_url (str): The base URL
14 | 
15 |     Returns:
16 |         List[Tuple[str, str]]: The extracted hyperlinks
17 |     """
18 |     return [
19 |         (link.text, urljoin(base_url, link["href"]))
20 |         for link in soup.find_all("a", href=True)
21 |     ]
22 | 
23 | 
24 | def format_hyperlinks(hyperlinks: list[tuple[str, str]]) -> list[str]:
25 |     """Format hyperlinks to be displayed to the user
26 | 
27 |     Args:
28 |         hyperlinks (List[Tuple[str, str]]): The hyperlinks to format
29 | 
30 |     Returns:
31 |         List[str]: The formatted hyperlinks
32 |     """
33 |     return [f"{link_text} ({link_url})" for link_text, link_url in hyperlinks]
34 | 


--------------------------------------------------------------------------------
/scraping/scrape_skills.py:
--------------------------------------------------------------------------------
 1 | from langchain.document_loaders import PyMuPDFLoader
 2 | from langchain.retrievers import ArxivRetriever
 3 | 
 4 | 
 5 | def scrape_pdf_with_pymupdf(url) -> str:
 6 |     """Scrape a pdf with pymupdf
 7 | 
 8 |     Args:
 9 |         url (str): The url of the pdf to scrape
10 | 
11 |     Returns:
12 |         str: The text scraped from the pdf
13 |     """
14 |     loader = PyMuPDFLoader(url)
15 |     doc = loader.load()
16 |     return str(doc)
17 | 
18 | 
19 | def scrape_pdf_with_arxiv(query) -> str:
20 |     """Scrape a pdf with arxiv
21 |     default document length of 70000 about ~15 pages or None for no limit
22 | 
23 |     Args:
24 |         query (str): The query to search for
25 | 
26 |     Returns:
27 |         str: The text scraped from the pdf
28 |     """
29 |     retriever = ArxivRetriever(load_max_docs=2, doc_content_chars_max=None)
30 |     docs = retriever.get_relevant_documents(query=query)
31 |     return docs[0].page_content


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | from setuptools import find_packages, setup
 2 | 
 3 | with open(r"README.md", "r", encoding="utf-8") as f:
 4 |     long_description = f.read()
 5 | 
 6 | with open("requirements.txt", "r") as f:
 7 |     reqs = [line.strip() for line in f if ('selenium' not in line and 'webdriver' not in line)]
 8 | 
 9 | setup(
10 |     name="gpt-researcher",
11 |     version="0.5.3",
12 |     description="GPT Researcher is an autonomous agent designed for comprehensive online research on a variety of tasks.",
13 |     package_dir={'gpt_researcher': 'gpt_researcher'},
14 |     packages=find_packages(),
15 |     long_description=long_description,
16 |     long_description_content_type="text/markdown",
17 |     url="https://github.com/assafelovic/gpt-researcher",
18 |     author="Assaf Elovic",
19 |     author_email="assaf.elovic@gmail.com",
20 |     license="MIT",
21 |     classifiers=[
22 |         "License :: OSI Approved :: MIT License",
23 |         "Intended Audience :: Developers",
24 |         "Intended Audience :: Education",
25 |         "Intended Audience :: Science/Research",
26 |         "Programming Language :: Python :: 3.11",
27 |         "Programming Language :: Python :: 3.12",
28 |         "Topic :: Scientific/Engineering :: Artificial Intelligence",
29 |     ],
30 |     install_requires=reqs,
31 | 
32 | 
33 | )


--------------------------------------------------------------------------------
/tests/all-6-report-types.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import asyncio
 3 | import pytest
 4 | from gpt_researcher import GPTResearcher
 5 | 
 6 | # Define the report types to test
 7 | report_types = [
 8 |     "research_report",
 9 |     "custom_report",
10 |     "subtopic_report",
11 |     "summary_report",
12 |     "detailed_report",
13 |     "quick_report"
14 | ]
15 | 
16 | # Define a common query and sources for testing
17 | query = "What are the latest advancements in AI?"
18 | sources = ["https://en.wikipedia.org/wiki/Artificial_intelligence", "https://www.ibm.com/watson/ai"]
19 | 
20 | # Define the output directory
21 | output_dir = "./outputs"
22 | 
23 | @pytest.mark.asyncio
24 | @pytest.mark.parametrize("report_type", report_types)
25 | async def test_gpt_researcher(report_type):
26 |     # Ensure the output directory exists
27 |     if not os.path.exists(output_dir):
28 |         os.makedirs(output_dir)
29 |         
30 |     # Create an instance of GPTResearcher
31 |     researcher = GPTResearcher(query=query, report_type=report_type, source_urls=sources)
32 |     
33 |     # Conduct research and write the report
34 |     await researcher.conduct_research()
35 |     report = await researcher.write_report()
36 |     
37 |     # Define the expected output filenames
38 |     pdf_filename = os.path.join(output_dir, f"{report_type}.pdf")
39 |     docx_filename = os.path.join(output_dir, f"{report_type}.docx")
40 |     
41 |     # Check if the PDF and DOCX files are created
42 |     # assert os.path.exists(pdf_filename), f"PDF file not found for report type: {report_type}"
43 |     # assert os.path.exists(docx_filename), f"DOCX file not found for report type: {report_type}"
44 | 
45 |     # Clean up the generated files (optional)
46 |     # os.remove(pdf_filename)
47 |     # os.remove(docx_filename)
48 | 
49 | if __name__ == "__main__":
50 |     pytest.main()


--------------------------------------------------------------------------------
/tests/documents-report-source.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import asyncio
 3 | import pytest
 4 | from gpt_researcher.master.agent import GPTResearcher  # Ensure this path is correct
 5 | from dotenv import load_dotenv
 6 | load_dotenv()
 7 | 
 8 | # Define the report types to test
 9 | report_types = [
10 |     "research_report",
11 |     "custom_report",
12 |     "subtopic_report",
13 |     "summary_report",
14 |     "detailed_report",
15 |     "quick_report"
16 | ]
17 | 
18 | # Define a common query and sources for testing
19 | query = "What can you tell me about myself based on my documents?"
20 | 
21 | # Define the output directory
22 | output_dir = "./outputs"
23 | 
24 | @pytest.mark.asyncio
25 | @pytest.mark.parametrize("report_type", report_types)
26 | async def test_gpt_researcher(report_type):
27 |     # Ensure the output directory exists
28 |     if not os.path.exists(output_dir):
29 |         os.makedirs(output_dir)
30 |         
31 |     # Create an instance of GPTResearcher with report_source set to "documents"
32 |     researcher = GPTResearcher(query=query, report_type=report_type, report_source="documents")
33 |     
34 |     # Conduct research and write the report
35 |     await researcher.conduct_research()
36 |     report = await researcher.write_report()
37 |     
38 |     # Define the expected output filenames
39 |     pdf_filename = os.path.join(output_dir, f"{report_type}.pdf")
40 |     docx_filename = os.path.join(output_dir, f"{report_type}.docx")
41 |     
42 |     # Check if the PDF and DOCX files are created
43 |     # assert os.path.exists(pdf_filename), f"PDF file not found for report type: {report_type}"
44 |     # assert os.path.exists(docx_filename), f"DOCX file not found for report type: {report_type}"
45 | 
46 |     # Clean up the generated files (optional)
47 |     # os.remove(pdf_filename)
48 |     # os.remove(docx_filename)
49 | 
50 | if __name__ == "__main__":
51 |     pytest.main()


--------------------------------------------------------------------------------