├── .cursorrules
├── .dockerignore
├── .env.example
├── .github
├── ISSUE_TEMPLATE
│ ├── bug_report.md
│ └── feature_request.md
├── dependabot.yml
└── workflows
│ ├── docker-build.yml
│ └── docker-push.yml
├── .gitignore
├── .python-version
├── CODE_OF_CONDUCT.md
├── CONTRIBUTING.md
├── CURSOR_RULES.md
├── Dockerfile
├── LICENSE
├── Procfile
├── README-ja_JP.md
├── README-ko_KR.md
├── README-zh_CN.md
├── README.md
├── backend
├── __init__.py
├── chat
│ ├── __init__.py
│ └── chat.py
├── memory
│ ├── __init__.py
│ ├── draft.py
│ └── research.py
├── report_type
│ ├── __init__.py
│ ├── basic_report
│ │ ├── __init__.py
│ │ └── basic_report.py
│ ├── deep_research
│ │ ├── README.md
│ │ ├── __init__.py
│ │ ├── example.py
│ │ └── main.py
│ └── detailed_report
│ │ ├── README.md
│ │ ├── __init__.py
│ │ └── detailed_report.py
├── server
│ ├── __init__.py
│ ├── app.py
│ ├── logging_config.py
│ ├── server.py
│ ├── server_utils.py
│ └── websocket_manager.py
└── utils.py
├── citation.cff
├── cli.py
├── docker-compose.yml
├── docs
├── CNAME
├── README.md
├── babel.config.js
├── blog
│ ├── 2023-09-22-gpt-researcher
│ │ ├── architecture.png
│ │ ├── index.md
│ │ └── planner.jpeg
│ ├── 2023-11-12-openai-assistant
│ │ ├── diagram-1.png
│ │ ├── diagram-assistant.jpeg
│ │ └── index.md
│ ├── 2024-05-19-gptr-langgraph
│ │ ├── architecture.jpeg
│ │ ├── blog-langgraph.jpeg
│ │ └── index.md
│ ├── 2024-09-7-hybrid-research
│ │ ├── gptr-hybrid.png
│ │ └── index.md
│ ├── 2025-02-26-deep-research
│ │ └── index.md
│ ├── 2025-03-10-stepping-into-the-story
│ │ └── index.md
│ └── authors.yml
├── discord-bot
│ ├── Dockerfile
│ ├── Dockerfile.dev
│ ├── commands
│ │ └── ask.js
│ ├── deploy-commands.js
│ ├── gptr-webhook.js
│ ├── index.js
│ ├── package.json
│ └── server.js
├── docs
│ ├── contribute.md
│ ├── examples
│ │ ├── custom_prompt.py
│ │ ├── detailed_report.md
│ │ ├── examples.ipynb
│ │ ├── examples.md
│ │ ├── hybrid_research.md
│ │ ├── pip-run.ipynb
│ │ ├── sample_report.py
│ │ └── sample_sources_only.py
│ ├── faq.md
│ ├── gpt-researcher
│ │ ├── context
│ │ │ ├── azure-storage.md
│ │ │ ├── data-ingestion.md
│ │ │ ├── filtering-by-domain.md
│ │ │ ├── img
│ │ │ │ ├── gptr-hybrid.png
│ │ │ │ ├── nextjs-filter-by-domain.JPG
│ │ │ │ └── vanilla-filter-by-domains.png
│ │ │ ├── local-docs.md
│ │ │ ├── tailored-research.md
│ │ │ └── vector-stores.md
│ │ ├── frontend
│ │ │ ├── discord-bot.md
│ │ │ ├── embed-script.md
│ │ │ ├── img
│ │ │ │ ├── bot-permissions.png
│ │ │ │ └── oath2-url-generator.png
│ │ │ ├── introduction.md
│ │ │ ├── nextjs-frontend.md
│ │ │ ├── react-package.md
│ │ │ ├── vanilla-js-frontend.md
│ │ │ └── visualizing-websockets.md
│ │ ├── getting-started
│ │ │ ├── cli.md
│ │ │ ├── getting-started-with-docker.md
│ │ │ ├── getting-started.md
│ │ │ ├── how-to-choose.md
│ │ │ ├── introduction.md
│ │ │ └── linux-deployment.md
│ │ ├── gptr
│ │ │ ├── automated-tests.md
│ │ │ ├── config.md
│ │ │ ├── deep_research.md
│ │ │ ├── example.md
│ │ │ ├── npm-package.md
│ │ │ ├── pip-package.md
│ │ │ ├── querying-the-backend.md
│ │ │ ├── scraping.md
│ │ │ └── troubleshooting.md
│ │ ├── handling-logs
│ │ │ ├── all-about-logs.md
│ │ │ ├── langsmith-logs.md
│ │ │ ├── langsmith.png
│ │ │ └── simple-logs-example.md
│ │ ├── llms
│ │ │ ├── llms.md
│ │ │ ├── running-with-azure.md
│ │ │ ├── running-with-ollama.md
│ │ │ ├── supported-llms.md
│ │ │ └── testing-your-llm.md
│ │ ├── mcp-server
│ │ │ ├── advanced-usage.md
│ │ │ ├── claude-integration.md
│ │ │ └── getting-started.md
│ │ ├── multi_agents
│ │ │ └── langgraph.md
│ │ └── search-engines
│ │ │ ├── retrievers.md
│ │ │ └── test-your-retriever.md
│ ├── reference
│ │ ├── config
│ │ │ ├── config.md
│ │ │ └── singleton.md
│ │ ├── processing
│ │ │ ├── html.md
│ │ │ └── text.md
│ │ └── sidebar.json
│ ├── roadmap.md
│ └── welcome.md
├── docusaurus.config.js
├── npm
│ ├── Readme.md
│ ├── index.js
│ └── package.json
├── package.json
├── pydoc-markdown.yml
├── sidebars.js
├── src
│ ├── components
│ │ ├── HomepageFeatures.js
│ │ └── HomepageFeatures.module.css
│ ├── css
│ │ └── custom.css
│ └── pages
│ │ ├── index.js
│ │ └── index.module.css
└── static
│ ├── .nojekyll
│ ├── CNAME
│ └── img
│ ├── architecture.png
│ ├── banner1.jpg
│ ├── examples.png
│ ├── gptr-logo.png
│ ├── leaderboard.png
│ └── multi-agent.png
├── evals
├── README.md
├── __init__.py
└── simple_evals
│ ├── .gitignore
│ ├── __init__.py
│ ├── logs
│ ├── .gitkeep
│ ├── README.md
│ └── SimpleQA Eval 100 Problems 2-22-25.txt
│ ├── problems
│ └── Simple QA Test Set.csv
│ ├── requirements.txt
│ ├── run_eval.py
│ └── simpleqa_eval.py
├── frontend
├── README.md
├── index.html
├── nextjs
│ ├── .babelrc.build.json
│ ├── .dockerignore
│ ├── .eslintrc.json
│ ├── .example.env
│ ├── .gitignore
│ ├── .prettierrc
│ ├── Dockerfile
│ ├── Dockerfile.dev
│ ├── README.md
│ ├── actions
│ │ └── apiActions.ts
│ ├── app
│ │ ├── globals.css
│ │ ├── layout.tsx
│ │ └── page.tsx
│ ├── components
│ │ ├── Footer.tsx
│ │ ├── Header.tsx
│ │ ├── Hero.tsx
│ │ ├── HumanFeedback.tsx
│ │ ├── Images
│ │ │ ├── ImageModal.tsx
│ │ │ └── ImagesAlbum.tsx
│ │ ├── Langgraph
│ │ │ └── Langgraph.js
│ │ ├── LoadingDots.tsx
│ │ ├── ResearchBlocks
│ │ │ ├── AccessReport.tsx
│ │ │ ├── Answer.tsx
│ │ │ ├── ImageSection.tsx
│ │ │ ├── LogsSection.tsx
│ │ │ ├── Question.tsx
│ │ │ ├── Sources.tsx
│ │ │ └── elements
│ │ │ │ ├── InputArea.tsx
│ │ │ │ ├── LogMessage.tsx
│ │ │ │ ├── SourceCard.tsx
│ │ │ │ └── SubQuestions.tsx
│ │ ├── ResearchResults.tsx
│ │ ├── ResearchSidebar.tsx
│ │ ├── Settings
│ │ │ ├── ChatBox.tsx
│ │ │ ├── FileUpload.tsx
│ │ │ ├── Modal.tsx
│ │ │ ├── Settings.css
│ │ │ └── ToneSelector.tsx
│ │ ├── SimilarTopics.tsx
│ │ ├── Task
│ │ │ ├── Accordion.tsx
│ │ │ ├── AgentLogs.tsx
│ │ │ ├── Report.tsx
│ │ │ └── ResearchForm.tsx
│ │ └── TypeAnimation.tsx
│ ├── config
│ │ └── task.ts
│ ├── helpers
│ │ ├── findDifferences.ts
│ │ ├── getHost.ts
│ │ └── markdownHelper.ts
│ ├── hooks
│ │ ├── useAnalytics.ts
│ │ ├── useResearchHistory.ts
│ │ └── useWebSocket.ts
│ ├── next.config.mjs
│ ├── nginx
│ │ └── default.conf
│ ├── package.json
│ ├── package.lib.json
│ ├── postcss.config.mjs
│ ├── public
│ │ ├── embed.js
│ │ ├── favicon.ico
│ │ ├── img
│ │ │ ├── F.svg
│ │ │ ├── Info.svg
│ │ │ ├── W.svg
│ │ │ ├── agents
│ │ │ │ ├── academicResearchAgentAvatar.png
│ │ │ │ ├── businessAnalystAgentAvatar.png
│ │ │ │ ├── computerSecurityanalystAvatar.png
│ │ │ │ ├── defaultAgentAvatar.JPG
│ │ │ │ ├── financeAgentAvatar.png
│ │ │ │ ├── mathAgentAvatar.png
│ │ │ │ └── travelAgentAvatar.png
│ │ │ ├── arrow-circle-up-right.svg
│ │ │ ├── arrow-narrow-right.svg
│ │ │ ├── browser.svg
│ │ │ ├── chat-check.svg
│ │ │ ├── chat.svg
│ │ │ ├── copy-white.svg
│ │ │ ├── copy.svg
│ │ │ ├── dinosaur.svg
│ │ │ ├── discord.svg
│ │ │ ├── docker-blue.svg
│ │ │ ├── docker.svg
│ │ │ ├── dunk.svg
│ │ │ ├── github-blue.svg
│ │ │ ├── github-footer.svg
│ │ │ ├── github.svg
│ │ │ ├── globe.svg
│ │ │ ├── gptr-logo.png
│ │ │ ├── hiker.svg
│ │ │ ├── icon _atom_.svg
│ │ │ ├── icon _dumbell_.svg
│ │ │ ├── icon _leaf_.svg
│ │ │ ├── image.svg
│ │ │ ├── indeed.svg
│ │ │ ├── link.svg
│ │ │ ├── message-question-circle.svg
│ │ │ ├── news.svg
│ │ │ ├── search.svg
│ │ │ ├── share.svg
│ │ │ ├── similarTopics.svg
│ │ │ ├── sources.svg
│ │ │ ├── stock.svg
│ │ │ ├── stock2.svg
│ │ │ ├── thinking.svg
│ │ │ ├── white-books.svg
│ │ │ └── x.svg
│ │ ├── next.svg
│ │ └── vercel.svg
│ ├── rollup.config.js
│ ├── src
│ │ ├── GPTResearcher.tsx
│ │ ├── index.css
│ │ ├── index.d.ts
│ │ ├── index.ts
│ │ └── utils
│ │ │ └── imageTransformPlugin.js
│ ├── styles
│ │ └── markdown.css
│ ├── tailwind.config.ts
│ ├── tsconfig.json
│ ├── tsconfig.lib.json
│ ├── types
│ │ ├── data.ts
│ │ └── react-ga4.d.ts
│ └── utils
│ │ ├── consolidateBlocks.ts
│ │ └── dataProcessing.ts
├── pdf_styles.css
├── scripts.js
├── static
│ ├── academicResearchAgentAvatar.png
│ ├── businessAnalystAgentAvatar.png
│ ├── computerSecurityanalystAvatar.png
│ ├── defaultAgentAvatar.JPG
│ ├── favicon.ico
│ ├── financeAgentAvatar.png
│ ├── gptr-logo.png
│ ├── mathAgentAvatar.png
│ └── travelAgentAvatar.png
└── styles.css
├── gpt_researcher
├── README.md
├── __init__.py
├── actions
│ ├── __init__.py
│ ├── agent_creator.py
│ ├── markdown_processing.py
│ ├── query_processing.py
│ ├── report_generation.py
│ ├── retriever.py
│ ├── utils.py
│ └── web_scraping.py
├── agent.py
├── config
│ ├── __init__.py
│ ├── config.py
│ └── variables
│ │ ├── __init__.py
│ │ ├── base.py
│ │ ├── default.py
│ │ └── test_local.json
├── context
│ ├── __init__.py
│ ├── compression.py
│ └── retriever.py
├── document
│ ├── __init__.py
│ ├── azure_document_loader.py
│ ├── document.py
│ ├── langchain_document.py
│ └── online_document.py
├── llm_provider
│ ├── __init__.py
│ └── generic
│ │ ├── __init__.py
│ │ └── base.py
├── memory
│ ├── __init__.py
│ └── embeddings.py
├── prompts.py
├── retrievers
│ ├── __init__.py
│ ├── arxiv
│ │ ├── __init__.py
│ │ └── arxiv.py
│ ├── bing
│ │ ├── __init__.py
│ │ └── bing.py
│ ├── custom
│ │ ├── __init__.py
│ │ └── custom.py
│ ├── duckduckgo
│ │ ├── __init__.py
│ │ └── duckduckgo.py
│ ├── exa
│ │ ├── __init__.py
│ │ └── exa.py
│ ├── google
│ │ ├── __init__.py
│ │ └── google.py
│ ├── pubmed_central
│ │ ├── __init__.py
│ │ └── pubmed_central.py
│ ├── searchapi
│ │ ├── __init__.py
│ │ └── searchapi.py
│ ├── searx
│ │ ├── __init__.py
│ │ └── searx.py
│ ├── semantic_scholar
│ │ ├── __init__.py
│ │ └── semantic_scholar.py
│ ├── serpapi
│ │ ├── __init__.py
│ │ └── serpapi.py
│ ├── serper
│ │ ├── __init__.py
│ │ └── serper.py
│ ├── tavily
│ │ ├── __init__.py
│ │ └── tavily_search.py
│ └── utils.py
├── scraper
│ ├── __init__.py
│ ├── arxiv
│ │ ├── __init__.py
│ │ └── arxiv.py
│ ├── beautiful_soup
│ │ ├── __init__.py
│ │ └── beautiful_soup.py
│ ├── browser
│ │ ├── __init__.py
│ │ ├── browser.py
│ │ ├── js
│ │ │ └── overlay.js
│ │ ├── nodriver_scraper.py
│ │ └── processing
│ │ │ ├── __init__.py
│ │ │ ├── html.py
│ │ │ └── scrape_skills.py
│ ├── firecrawl
│ │ ├── __init__.py
│ │ └── firecrawl.py
│ ├── pymupdf
│ │ ├── __init__.py
│ │ └── pymupdf.py
│ ├── scraper.py
│ ├── tavily_extract
│ │ ├── __init__.py
│ │ └── tavily_extract.py
│ ├── utils.py
│ └── web_base_loader
│ │ ├── __init__.py
│ │ └── web_base_loader.py
├── skills
│ ├── __init__.py
│ ├── browser.py
│ ├── context_manager.py
│ ├── curator.py
│ ├── deep_research.py
│ ├── researcher.py
│ └── writer.py
├── utils
│ ├── __init__.py
│ ├── costs.py
│ ├── enum.py
│ ├── llm.py
│ ├── logger.py
│ ├── logging_config.py
│ ├── validators.py
│ └── workers.py
└── vector_store
│ ├── __init__.py
│ └── vector_store.py
├── langgraph.json
├── main.py
├── mcp-server
└── README.md
├── multi_agents
├── README.md
├── __init__.py
├── agent.py
├── agents
│ ├── __init__.py
│ ├── editor.py
│ ├── human.py
│ ├── orchestrator.py
│ ├── publisher.py
│ ├── researcher.py
│ ├── reviewer.py
│ ├── reviser.py
│ ├── utils
│ │ ├── __init__.py
│ │ ├── file_formats.py
│ │ ├── llms.py
│ │ ├── pdf_styles.css
│ │ ├── utils.py
│ │ └── views.py
│ └── writer.py
├── langgraph.json
├── main.py
├── memory
│ ├── __init__.py
│ ├── draft.py
│ └── research.py
├── package.json
├── requirements.txt
└── task.json
├── poetry.toml
├── pyproject.toml
├── requirements.txt
├── requirements_minimal.txt
├── setup.py
└── tests
├── __init__.py
├── docs
└── doc.pdf
├── documents-report-source.py
├── gptr-logs-handler.py
├── report-types.py
├── research_test.py
├── test-loaders.py
├── test-openai-llm.py
├── test-your-embeddings.py
├── test-your-llm.py
├── test-your-retriever.py
├── test_logging.py
├── test_logging_output.py
├── test_logs.py
├── test_researcher_logging.py
└── vector-store.py
/.dockerignore:
--------------------------------------------------------------------------------
1 | .git
2 | output/
3 |
--------------------------------------------------------------------------------
/.env.example:
--------------------------------------------------------------------------------
1 | OPENAI_API_KEY=
2 | TAVILY_API_KEY=
3 | DOC_PATH=./my-docs
4 |
5 | # NEXT_PUBLIC_GPTR_API_URL=http://0.0.0.0:8000 # Defaults to localhost:8000 if not set
--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/bug_report.md:
--------------------------------------------------------------------------------
1 | ---
2 | name: Bug report
3 | about: Create a report to help us improve
4 | title: ''
5 | labels: ''
6 | assignees: ''
7 |
8 | ---
9 |
10 | **Describe the bug**
11 | A clear and concise description of what the bug is.
12 |
13 | **To Reproduce**
14 | Steps to reproduce the behavior:
15 | 1. Go to '...'
16 | 2. Click on '....'
17 | 3. Scroll down to '....'
18 | 4. See error
19 |
20 | **Expected behavior**
21 | A clear and concise description of what you expected to happen.
22 |
23 | **Screenshots**
24 | If applicable, add screenshots to help explain your problem.
25 |
26 | **Desktop (please complete the following information):**
27 | - OS: [e.g. iOS]
28 | - Browser [e.g. chrome, safari]
29 | - Version [e.g. 22]
30 |
31 | **Smartphone (please complete the following information):**
32 | - Device: [e.g. iPhone6]
33 | - OS: [e.g. iOS8.1]
34 | - Browser [e.g. stock browser, safari]
35 | - Version [e.g. 22]
36 |
37 | **Additional context**
38 | Add any other context about the problem here.
39 |
--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/feature_request.md:
--------------------------------------------------------------------------------
1 | ---
2 | name: Feature request
3 | about: Suggest an idea for this project
4 | title: ''
5 | labels: ''
6 | assignees: ''
7 |
8 | ---
9 |
10 | **Is your feature request related to a problem? Please describe.**
11 | A clear and concise description of what the problem is. Ex. I'm always frustrated when [...]
12 |
13 | **Describe the solution you'd like**
14 | A clear and concise description of what you want to happen.
15 |
16 | **Describe alternatives you've considered**
17 | A clear and concise description of any alternative solutions or features you've considered.
18 |
19 | **Additional context**
20 | Add any other context or screenshots about the feature request here.
21 |
--------------------------------------------------------------------------------
/.github/dependabot.yml:
--------------------------------------------------------------------------------
1 | # To get started with Dependabot version updates, you'll need to specify which
2 | # package ecosystems to update and where the package manifests are located.
3 | # Please see the documentation for all configuration options:
4 | # https://docs.github.com/github/administering-a-repository/configuration-options-for-dependency-updates
5 |
6 | version: 2
7 | updates:
8 | - package-ecosystem: "pip" # See documentation for possible values
9 | directory: "/" # Location of package manifests
10 | schedule:
11 | interval: "weekly"
12 | - package-ecosystem: "docker"
13 | directory: "/"
14 | schedule:
15 | interval: "weekly"
16 |
--------------------------------------------------------------------------------
/.github/workflows/docker-build.yml:
--------------------------------------------------------------------------------
1 | name: GPTR tests
2 | run-name: ${{ github.actor }} ran the GPTR tests flow
3 | permissions:
4 | contents: read
5 | pull-requests: write
6 | on:
7 | workflow_dispatch: # Add this line to enable manual triggering
8 | # pull_request:
9 | # types: [opened, synchronize]
10 |
11 | jobs:
12 | docker:
13 | runs-on: ubuntu-latest
14 | environment: tests # Specify the environment to use for this job
15 | env:
16 | # Ensure these environment variables are set for the entire job
17 | OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
18 | TAVILY_API_KEY: ${{ secrets.TAVILY_API_KEY }}
19 | LANGCHAIN_API_KEY: ${{ secrets.LANGCHAIN_API_KEY }}
20 | steps:
21 | - name: Git checkout
22 | uses: actions/checkout@v3
23 |
24 | - name: Set up QEMU
25 | uses: docker/setup-qemu-action@v2
26 |
27 | - name: Set up Docker Buildx
28 | uses: docker/setup-buildx-action@v2
29 | with:
30 | driver: docker
31 |
32 | # - name: Build Docker images
33 | # uses: docker/build-push-action@v4
34 | # with:
35 | # push: false
36 | # tags: gptresearcher/gpt-researcher:latest
37 | # file: Dockerfile
38 |
39 | - name: Set up Docker Compose
40 | run: |
41 | sudo curl -L "https://github.com/docker/compose/releases/download/1.29.2/docker-compose-$(uname -s)-$(uname -m)" -o /usr/local/bin/docker-compose
42 | sudo chmod +x /usr/local/bin/docker-compose
43 | - name: Run tests with Docker Compose
44 | run: |
45 | docker-compose --profile test run --rm gpt-researcher-tests
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | #Ignore env containing secrets
2 | .env
3 | .venv
4 | .envrc
5 |
6 | #Ignore Virtual Env
7 | env/
8 | venv/
9 | .venv/
10 |
11 | # Other Environments
12 | ENV/
13 | env.bak/
14 | venv.bak/
15 |
16 | #Ignore generated outputs
17 | outputs/
18 | *.lock
19 | dist/
20 | gpt_researcher.egg-info/
21 |
22 | #Ignore my local docs
23 | my-docs/
24 |
25 | #Ignore pycache
26 | **/__pycache__/
27 |
28 | #Ignore mypy cache
29 | .mypy_cache/
30 | node_modules
31 | .idea
32 | .DS_Store
33 | .docusaurus
34 | build
35 | docs/build
36 |
37 | .vscode/launch.json
38 | .langgraph-data/
39 | .next/
40 | package-lock.json
41 |
42 | #Vim swp files
43 | *.swp
44 |
45 | # Log files
46 | logs/
47 | *.orig
48 | *.log
49 | server_log.txt
50 |
51 | #Cursor Rules
52 | .cursorrules
53 | CURSOR_RULES.md
54 | /.history
55 |
--------------------------------------------------------------------------------
/.python-version:
--------------------------------------------------------------------------------
1 | 3.11
2 |
--------------------------------------------------------------------------------
/Procfile:
--------------------------------------------------------------------------------
1 | web: python -m uvicorn backend.server.server:app --host=0.0.0.0 --port=${PORT}
--------------------------------------------------------------------------------
/backend/__init__.py:
--------------------------------------------------------------------------------
1 | from multi_agents import agents
--------------------------------------------------------------------------------
/backend/chat/__init__.py:
--------------------------------------------------------------------------------
1 | from .chat import ChatAgentWithMemory
--------------------------------------------------------------------------------
/backend/memory/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/assafelovic/gpt-researcher/9d085d867fce49c24cf296659eebf5d7595cbed2/backend/memory/__init__.py
--------------------------------------------------------------------------------
/backend/memory/draft.py:
--------------------------------------------------------------------------------
1 | from typing import TypedDict, List, Annotated
2 | import operator
3 |
4 |
5 | class DraftState(TypedDict):
6 | task: dict
7 | topic: str
8 | draft: dict
9 | review: str
10 | revision_notes: str
--------------------------------------------------------------------------------
/backend/memory/research.py:
--------------------------------------------------------------------------------
1 | from typing import TypedDict, List, Annotated
2 | import operator
3 |
4 |
5 | class ResearchState(TypedDict):
6 | task: dict
7 | initial_research: str
8 | sections: List[str]
9 | research_data: List[dict]
10 | # Report layout
11 | title: str
12 | headers: dict
13 | date: str
14 | table_of_contents: str
15 | introduction: str
16 | conclusion: str
17 | sources: List[str]
18 | report: str
19 |
20 |
21 |
--------------------------------------------------------------------------------
/backend/report_type/__init__.py:
--------------------------------------------------------------------------------
1 | from .basic_report.basic_report import BasicReport
2 | from .detailed_report.detailed_report import DetailedReport
3 |
4 | __all__ = [
5 | "BasicReport",
6 | "DetailedReport"
7 | ]
--------------------------------------------------------------------------------
/backend/report_type/basic_report/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/assafelovic/gpt-researcher/9d085d867fce49c24cf296659eebf5d7595cbed2/backend/report_type/basic_report/__init__.py
--------------------------------------------------------------------------------
/backend/report_type/basic_report/basic_report.py:
--------------------------------------------------------------------------------
1 | from fastapi import WebSocket
2 | from typing import Any
3 |
4 | from gpt_researcher import GPTResearcher
5 |
6 |
7 | class BasicReport:
8 | def __init__(
9 | self,
10 | query: str,
11 | query_domains: list,
12 | report_type: str,
13 | report_source: str,
14 | source_urls,
15 | document_urls,
16 | tone: Any,
17 | config_path: str,
18 | websocket: WebSocket,
19 | headers=None
20 | ):
21 | self.query = query
22 | self.query_domains = query_domains
23 | self.report_type = report_type
24 | self.report_source = report_source
25 | self.source_urls = source_urls
26 | self.document_urls = document_urls
27 | self.tone = tone
28 | self.config_path = config_path
29 | self.websocket = websocket
30 | self.headers = headers or {}
31 |
32 | # Initialize researcher
33 | self.gpt_researcher = GPTResearcher(
34 | query=self.query,
35 | query_domains=self.query_domains,
36 | report_type=self.report_type,
37 | report_source=self.report_source,
38 | source_urls=self.source_urls,
39 | document_urls=self.document_urls,
40 | tone=self.tone,
41 | config_path=self.config_path,
42 | websocket=self.websocket,
43 | headers=self.headers
44 | )
45 |
46 | async def run(self):
47 | await self.gpt_researcher.conduct_research()
48 | report = await self.gpt_researcher.write_report()
49 | return report
50 |
--------------------------------------------------------------------------------
/backend/report_type/deep_research/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/assafelovic/gpt-researcher/9d085d867fce49c24cf296659eebf5d7595cbed2/backend/report_type/deep_research/__init__.py
--------------------------------------------------------------------------------
/backend/report_type/deep_research/main.py:
--------------------------------------------------------------------------------
1 | from gpt_researcher import GPTResearcher
2 | from backend.utils import write_md_to_pdf
3 | import asyncio
4 |
5 |
6 | async def main(task: str):
7 | # Progress callback
8 | def on_progress(progress):
9 | print(f"Depth: {progress.current_depth}/{progress.total_depth}")
10 | print(f"Breadth: {progress.current_breadth}/{progress.total_breadth}")
11 | print(f"Queries: {progress.completed_queries}/{progress.total_queries}")
12 | if progress.current_query:
13 | print(f"Current query: {progress.current_query}")
14 |
15 | # Initialize researcher with deep research type
16 | researcher = GPTResearcher(
17 | query=task,
18 | report_type="deep", # This will trigger deep research
19 | )
20 |
21 | # Run research with progress tracking
22 | print("Starting deep research...")
23 | context = await researcher.conduct_research(on_progress=on_progress)
24 | print("\nResearch completed. Generating report...")
25 |
26 | # Generate the final report
27 | report = await researcher.write_report()
28 | await write_md_to_pdf(report, "deep_research_report")
29 | print(f"\nFinal Report: {report}")
30 |
31 | if __name__ == "__main__":
32 | query = "What are the most effective ways for beginners to start investing?"
33 | asyncio.run(main(query))
--------------------------------------------------------------------------------
/backend/report_type/detailed_report/README.md:
--------------------------------------------------------------------------------
1 | ## Detailed Reports
2 |
3 | Introducing long and detailed reports, with a completely new architecture inspired by the latest [STORM](https://arxiv.org/abs/2402.14207) paper.
4 |
5 | In this method we do the following:
6 |
7 | 1. Trigger Initial GPT Researcher report based on task
8 | 2. Generate subtopics from research summary
9 | 3. For each subtopic the headers of the subtopic report are extracted and accumulated
10 | 4. For each subtopic a report is generated making sure that any information about the headers accumulated until now are not re-generated.
11 | 5. An additional introduction section is written along with a table of contents constructed from the entire report.
12 | 6. The final report is constructed by appending these : Intro + Table of contents + Subsection reports
--------------------------------------------------------------------------------
/backend/report_type/detailed_report/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/assafelovic/gpt-researcher/9d085d867fce49c24cf296659eebf5d7595cbed2/backend/report_type/detailed_report/__init__.py
--------------------------------------------------------------------------------
/backend/server/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/assafelovic/gpt-researcher/9d085d867fce49c24cf296659eebf5d7595cbed2/backend/server/__init__.py
--------------------------------------------------------------------------------
/backend/server/app.py:
--------------------------------------------------------------------------------
1 | from fastapi import FastAPI, WebSocket
2 | from fastapi.middleware.cors import CORSMiddleware
3 | import logging
4 | from backend.chat.chat import ChatAgentWithMemory
5 |
6 | logger = logging.getLogger(__name__)
7 |
8 | app = FastAPI()
9 |
10 | # Add CORS middleware
11 | app.add_middleware(
12 | CORSMiddleware,
13 | allow_origins=["*"], # In production, replace with your frontend domain
14 | allow_credentials=True,
15 | allow_methods=["*"],
16 | allow_headers=["*"],
17 | )
18 |
19 | @app.get("/")
20 | async def read_root():
21 | return {"message": "Welcome to GPT Researcher"}
22 |
23 | @app.websocket("/ws")
24 | async def websocket_endpoint(websocket: WebSocket):
25 | await websocket.accept()
26 | chat_agent = ChatAgentWithMemory(report="Sample report", config_path="path/to/config", headers={})
27 | try:
28 | while True:
29 | data = await websocket.receive_text()
30 | await chat_agent.chat(data, websocket)
31 | except WebSocketDisconnect:
32 | await websocket.close()
33 |
--------------------------------------------------------------------------------
/citation.cff:
--------------------------------------------------------------------------------
1 | cff-version: 1.0.0
2 | message: "If you use this software, please cite it as below."
3 | authors:
4 | - family-names: Elovic
5 | given-names: Assaf
6 | title: gpt-researcher
7 | version: 0.5.4
8 | date-released: 2023-07-23
9 | repository-code: https://github.com/assafelovic/gpt-researcher
10 | url: https://gptr.dev
--------------------------------------------------------------------------------
/docker-compose.yml:
--------------------------------------------------------------------------------
1 | services:
2 | gpt-researcher:
3 | pull_policy: build
4 | image: gptresearcher/gpt-researcher
5 | build: ./
6 | environment:
7 | OPENAI_API_KEY: ${OPENAI_API_KEY}
8 | TAVILY_API_KEY: ${TAVILY_API_KEY}
9 | LANGCHAIN_API_KEY: ${LANGCHAIN_API_KEY}
10 | LOGGING_LEVEL: INFO
11 | volumes:
12 | - ${PWD}/my-docs:/usr/src/app/my-docs:rw
13 | - ${PWD}/outputs:/usr/src/app/outputs:rw
14 | - ${PWD}/logs:/usr/src/app/logs:rw
15 | user: root
16 | restart: always
17 | ports:
18 | - 8000:8000
19 |
20 | gptr-nextjs:
21 | pull_policy: build
22 | image: gptresearcher/gptr-nextjs
23 | stdin_open: true
24 | environment:
25 | CHOKIDAR_USEPOLLING: "true"
26 | LOGGING_LEVEL: INFO
27 | NEXT_PUBLIC_GA_MEASUREMENT_ID: ${NEXT_PUBLIC_GA_MEASUREMENT_ID}
28 | NEXT_PUBLIC_GPTR_API_URL: ${NEXT_PUBLIC_GPTR_API_URL}
29 | build:
30 | dockerfile: Dockerfile.dev
31 | context: frontend/nextjs
32 | volumes:
33 | - /app/node_modules
34 | - ./frontend/nextjs:/app
35 | - ./frontend/nextjs/.next:/app/.next
36 | - ./outputs:/app/outputs
37 | restart: always
38 | ports:
39 | - 3000:3000
40 |
41 | gpt-researcher-tests:
42 | image: gptresearcher/gpt-researcher-tests
43 | build: ./
44 | environment:
45 | OPENAI_API_KEY: ${OPENAI_API_KEY}
46 | TAVILY_API_KEY: ${TAVILY_API_KEY}
47 | LANGCHAIN_API_KEY: ${LANGCHAIN_API_KEY}
48 | LOGGING_LEVEL: INFO
49 | profiles: ["test"]
50 | command: >
51 | /bin/sh -c "
52 | pip install pytest pytest-asyncio faiss-cpu &&
53 | python -m pytest tests/report-types.py &&
54 | python -m pytest tests/vector-store.py
55 | "
56 |
57 | discord-bot:
58 | build:
59 | context: ./docs/discord-bot
60 | dockerfile: Dockerfile.dev
61 | environment:
62 | - DISCORD_BOT_TOKEN=${DISCORD_BOT_TOKEN}
63 | - DISCORD_CLIENT_ID=${DISCORD_CLIENT_ID}
64 | volumes:
65 | - ./docs/discord-bot:/app
66 | - /app/node_modules
67 | ports:
68 | - 3001:3000
69 | profiles: ["discord"]
70 | restart: always
71 |
--------------------------------------------------------------------------------
/docs/CNAME:
--------------------------------------------------------------------------------
1 | docs.gptr.dev
--------------------------------------------------------------------------------
/docs/README.md:
--------------------------------------------------------------------------------
1 | # Website
2 |
3 | This website is built using [Docusaurus 2](https://docusaurus.io/), a modern static website generator.
4 |
5 | ## Prerequisites
6 |
7 | To build and test documentation locally, begin by downloading and installing [Node.js](https://nodejs.org/en/download/), and then installing [Yarn](https://classic.yarnpkg.com/en/).
8 | On Windows, you can install via the npm package manager (npm) which comes bundled with Node.js:
9 |
10 | ```console
11 | npm install --global yarn
12 | ```
13 |
14 | ## Installation
15 |
16 | ```console
17 | pip install pydoc-markdown
18 | cd website
19 | yarn install
20 | ```
21 |
22 | ## Local Development
23 |
24 | Navigate to the website folder and run:
25 |
26 | ```console
27 | pydoc-markdown
28 | yarn start
29 | ```
30 |
31 | This command starts a local development server and opens up a browser window. Most changes are reflected live without having to restart the server.
32 |
--------------------------------------------------------------------------------
/docs/babel.config.js:
--------------------------------------------------------------------------------
1 | module.exports = {
2 | presets: [require.resolve('@docusaurus/core/lib/babel/preset')],
3 | };
4 |
--------------------------------------------------------------------------------
/docs/blog/2023-09-22-gpt-researcher/architecture.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/assafelovic/gpt-researcher/9d085d867fce49c24cf296659eebf5d7595cbed2/docs/blog/2023-09-22-gpt-researcher/architecture.png
--------------------------------------------------------------------------------
/docs/blog/2023-09-22-gpt-researcher/planner.jpeg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/assafelovic/gpt-researcher/9d085d867fce49c24cf296659eebf5d7595cbed2/docs/blog/2023-09-22-gpt-researcher/planner.jpeg
--------------------------------------------------------------------------------
/docs/blog/2023-11-12-openai-assistant/diagram-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/assafelovic/gpt-researcher/9d085d867fce49c24cf296659eebf5d7595cbed2/docs/blog/2023-11-12-openai-assistant/diagram-1.png
--------------------------------------------------------------------------------
/docs/blog/2023-11-12-openai-assistant/diagram-assistant.jpeg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/assafelovic/gpt-researcher/9d085d867fce49c24cf296659eebf5d7595cbed2/docs/blog/2023-11-12-openai-assistant/diagram-assistant.jpeg
--------------------------------------------------------------------------------
/docs/blog/2024-05-19-gptr-langgraph/architecture.jpeg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/assafelovic/gpt-researcher/9d085d867fce49c24cf296659eebf5d7595cbed2/docs/blog/2024-05-19-gptr-langgraph/architecture.jpeg
--------------------------------------------------------------------------------
/docs/blog/2024-05-19-gptr-langgraph/blog-langgraph.jpeg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/assafelovic/gpt-researcher/9d085d867fce49c24cf296659eebf5d7595cbed2/docs/blog/2024-05-19-gptr-langgraph/blog-langgraph.jpeg
--------------------------------------------------------------------------------
/docs/blog/2024-09-7-hybrid-research/gptr-hybrid.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/assafelovic/gpt-researcher/9d085d867fce49c24cf296659eebf5d7595cbed2/docs/blog/2024-09-7-hybrid-research/gptr-hybrid.png
--------------------------------------------------------------------------------
/docs/blog/authors.yml:
--------------------------------------------------------------------------------
1 | assafe:
2 | name: Assaf Elovic
3 | title: Creator @ GPT Researcher and Tavily
4 | url: https://github.com/assafelovic
5 | image_url: https://lh3.googleusercontent.com/a/ACg8ocJtrLku69VG_2Y0sJa5mt66gIGNaEBX5r_mgE6CRPEb7A=s96-c
6 |
7 | elishakay:
8 | name: Elisha Kramer
9 | title: Core Contributor @ GPT Researcher
10 | url: https://github.com/ElishaKay
11 | image_url: https://avatars.githubusercontent.com/u/16700452
12 |
--------------------------------------------------------------------------------
/docs/discord-bot/Dockerfile:
--------------------------------------------------------------------------------
1 | FROM node:18.17.0-alpine
2 | WORKDIR /app
3 | COPY ./package.json ./
4 | RUN npm install --legacy-peer-deps
5 | COPY . .
6 | CMD ["node", "index.js"]
--------------------------------------------------------------------------------
/docs/discord-bot/Dockerfile.dev:
--------------------------------------------------------------------------------
1 | FROM node:18.17.0-alpine
2 | WORKDIR /app
3 | COPY ./package.json ./
4 | RUN npm install --legacy-peer-deps
5 | RUN npm install -g nodemon
6 | COPY . .
7 | CMD ["nodemon", "index.js"]
--------------------------------------------------------------------------------
/docs/discord-bot/commands/ask.js:
--------------------------------------------------------------------------------
1 | const { SlashCommandBuilder } = require('discord.js');
2 |
3 | module.exports = {
4 | data: new SlashCommandBuilder()
5 | .setName('ask')
6 | .setDescription('Ask a question to the bot'),
7 | async execute(interaction) {
8 | await interaction.reply('Please provide your question.');
9 | }
10 | };
11 |
--------------------------------------------------------------------------------
/docs/discord-bot/deploy-commands.js:
--------------------------------------------------------------------------------
1 | const { Client, GatewayIntentBits, REST, Routes } = require('discord.js');
2 | require('dotenv').config();
3 |
4 | // Create a new REST client and set your bot token
5 | const rest = new REST({ version: '10' }).setToken(process.env.DISCORD_BOT_TOKEN);
6 |
7 | // Define commands
8 | const commands = [
9 | {
10 | name: 'ping',
11 | description: 'Replies with Pong!',
12 | },
13 | {
14 | name: 'ask',
15 | description: 'Ask a question to the bot',
16 | },
17 | ];
18 |
19 | // Deploy commands to Discord
20 | (async () => {
21 | try {
22 | console.log('Started refreshing application (/) commands.');
23 |
24 | await rest.put(Routes.applicationCommands(process.env.DISCORD_CLIENT_ID), {
25 | body: commands,
26 | });
27 |
28 | console.log('Successfully reloaded application (/) commands.');
29 | } catch (error) {
30 | console.error(error);
31 | }
32 | })();
33 |
--------------------------------------------------------------------------------
/docs/discord-bot/package.json:
--------------------------------------------------------------------------------
1 | {
2 | "name": "Discord-Bot-JS",
3 | "version": "1.0.0",
4 | "description": "",
5 | "main": "index.js",
6 | "dependencies": {
7 | "discord.js": "^14.16.1",
8 | "dotenv": "^16.4.5",
9 | "express": "^4.17.1",
10 | "jsonrepair": "^3.8.0",
11 | "nodemon": "^3.1.4",
12 | "ws": "^8.18.0"
13 | },
14 | "scripts": {
15 | "test": "echo \"Error: no test specified\" && exit 1",
16 | "dev": "nodemon --legacy-watch index.js"
17 | },
18 | "keywords": [],
19 | "author": "",
20 | "license": "ISC"
21 | }
22 |
--------------------------------------------------------------------------------
/docs/discord-bot/server.js:
--------------------------------------------------------------------------------
1 | const express = require("express")
2 |
3 | const server = express()
4 |
5 | server.all("/", (req, res) => {
6 | res.send("Bot is running!")
7 | })
8 |
9 | function keepAlive() {
10 | server.listen(5000, () => {
11 | console.log("Server is ready.")
12 | })
13 |
14 | // Handle uncaught exceptions
15 | process.on("uncaughtException", (err) => {
16 | console.error("Uncaught Exception:", err);
17 | // Graceful shutdown logic
18 | // process.exit(1); // Exit process to trigger Docker's restart policy
19 | });
20 |
21 | // Handle unhandled promise rejections
22 | process.on("unhandledRejection", (reason, promise) => {
23 | console.error("Unhandled Rejection at:", promise, "reason:", reason);
24 | // Graceful shutdown logic
25 | // process.exit(1); // Exit process to trigger Docker's restart policy
26 | });
27 | }
28 |
29 | module.exports = keepAlive
--------------------------------------------------------------------------------
/docs/docs/contribute.md:
--------------------------------------------------------------------------------
1 | # Contribute
2 |
3 | We highly welcome contributions! Please check out [contributing](https://github.com/assafelovic/gpt-researcher/blob/master/CONTRIBUTING.md) if you're interested.
4 |
5 | Please check out our [roadmap](https://trello.com/b/3O7KBePw/gpt-researcher-roadmap) page and reach out to us via our [Discord community](https://discord.gg/QgZXvJAccX) if you're interested in joining our mission.
--------------------------------------------------------------------------------
/docs/docs/examples/sample_report.py:
--------------------------------------------------------------------------------
1 | import nest_asyncio # required for notebooks
2 |
3 | nest_asyncio.apply()
4 |
5 | from gpt_researcher import GPTResearcher
6 | import asyncio
7 |
8 |
9 | async def get_report(query: str, report_type: str, custom_prompt: str = None):
10 | researcher = GPTResearcher(query, report_type)
11 | research_result = await researcher.conduct_research()
12 |
13 | # Generate report with optional custom prompt
14 | report = await researcher.write_report(custom_prompt=custom_prompt)
15 |
16 | # Get additional information
17 | research_context = researcher.get_research_context()
18 | research_costs = researcher.get_costs()
19 | research_images = researcher.get_research_images()
20 | research_sources = researcher.get_research_sources()
21 |
22 | return report, research_context, research_costs, research_images, research_sources
23 |
24 |
25 | if __name__ == "__main__":
26 | query = "Should I invest in Nvidia?"
27 | report_type = "research_report"
28 |
29 | # Standard report
30 | report, context, costs, images, sources = asyncio.run(get_report(query, report_type))
31 |
32 | print("Standard Report:")
33 | print(report)
34 |
35 | # Custom report with specific formatting requirements
36 | custom_prompt = "Answer in short, 2 paragraphs max without citations. Focus on the most important facts for investors."
37 | custom_report, _, _, _, _ = asyncio.run(get_report(query, report_type, custom_prompt))
38 |
39 | print("\nCustomized Short Report:")
40 | print(custom_report)
41 |
42 | print("\nResearch Costs:")
43 | print(costs)
44 | print("\nNumber of Research Images:")
45 | print(len(images))
46 | print("\nNumber of Research Sources:")
47 | print(len(sources))
--------------------------------------------------------------------------------
/docs/docs/examples/sample_sources_only.py:
--------------------------------------------------------------------------------
1 | from gpt_researcher import GPTResearcher
2 | import asyncio
3 |
4 |
5 | async def get_report(query: str, report_source: str, sources: list) -> str:
6 | researcher = GPTResearcher(query=query, report_source=report_source, source_urls=sources)
7 | research_context = await researcher.conduct_research()
8 | return await researcher.write_report()
9 |
10 | if __name__ == "__main__":
11 | query = "What are the biggest trends in AI lately?"
12 | report_source = "static"
13 | sources = [
14 | "https://en.wikipedia.org/wiki/Artificial_intelligence",
15 | "https://www.ibm.com/think/insights/artificial-intelligence-trends",
16 | "https://www.forbes.com/advisor/business/ai-statistics"
17 | ]
18 |
19 | report = asyncio.run(get_report(query=query, report_source=report_source, sources=sources))
20 | print(report)
21 |
--------------------------------------------------------------------------------
/docs/docs/gpt-researcher/context/azure-storage.md:
--------------------------------------------------------------------------------
1 | # Azure Storage
2 |
3 | If you want to use Azure Blob Storage as the source for your GPT Researcher report context, follow these steps:
4 |
5 | > **Step 1** - Set these environment variables with a .env file in the root folder
6 |
7 | ```bash
8 | AZURE_CONNECTION_STRING=
9 | AZURE_CONTAINER_NAME=
10 | ```
11 |
12 | > **Step 2** - Add the `azure-storage-blob` dependency to your requirements.txt file
13 |
14 | ```bash
15 | azure-storage-blob
16 | ```
17 |
18 | > **Step 3** - When running the GPTResearcher class, pass the `report_source` as `azure`
19 |
20 | ```python
21 | report = GPTResearcher(
22 | query="What happened in the latest burning man floods?",
23 | report_type="research_report",
24 | report_source="azure",
25 | )
26 | ```
--------------------------------------------------------------------------------
/docs/docs/gpt-researcher/context/img/gptr-hybrid.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/assafelovic/gpt-researcher/9d085d867fce49c24cf296659eebf5d7595cbed2/docs/docs/gpt-researcher/context/img/gptr-hybrid.png
--------------------------------------------------------------------------------
/docs/docs/gpt-researcher/context/img/nextjs-filter-by-domain.JPG:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/assafelovic/gpt-researcher/9d085d867fce49c24cf296659eebf5d7595cbed2/docs/docs/gpt-researcher/context/img/nextjs-filter-by-domain.JPG
--------------------------------------------------------------------------------
/docs/docs/gpt-researcher/context/img/vanilla-filter-by-domains.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/assafelovic/gpt-researcher/9d085d867fce49c24cf296659eebf5d7595cbed2/docs/docs/gpt-researcher/context/img/vanilla-filter-by-domains.png
--------------------------------------------------------------------------------
/docs/docs/gpt-researcher/context/local-docs.md:
--------------------------------------------------------------------------------
1 | # Local Documents
2 |
3 | ## Just Local Docs
4 |
5 | You can instruct the GPT Researcher to run research tasks based on your local documents. Currently supported file formats are: PDF, plain text, CSV, Excel, Markdown, PowerPoint, and Word documents.
6 |
7 | Step 1: Add the env variable `DOC_PATH` pointing to the folder where your documents are located.
8 |
9 | ```bash
10 | export DOC_PATH="./my-docs"
11 | ```
12 |
13 | Step 2:
14 | - If you're running the frontend app on localhost:8000, simply select "My Documents" from the "Report Source" Dropdown Options.
15 | - If you're running GPT Researcher with the [PIP package](https://docs.tavily.com/docs/gpt-researcher/gptr/pip-package), pass the `report_source` argument as "local" when you instantiate the `GPTResearcher` class [code sample here](https://docs.gptr.dev/docs/gpt-researcher/context/tailored-research).
16 |
17 | ## Local Docs + Web (Hybrid)
18 |
19 | 
20 |
21 | Check out the blog post on [Hybrid Research](https://docs.gptr.dev/blog/gptr-hybrid) to learn more about how to combine local documents with web research.
22 | ```
23 |
--------------------------------------------------------------------------------
/docs/docs/gpt-researcher/frontend/discord-bot.md:
--------------------------------------------------------------------------------
1 | # Discord Bot
2 |
3 | ## Intro
4 |
5 | You can either leverage the official GPTR Discord bot or create your own custom bot.
6 |
7 | To add the official GPTR Discord bot, simply [click here to invite GPTR to your Discord server](https://discord.com/oauth2/authorize?client_id=1281438963034361856&permissions=1689934339898432&integration_type=0&scope=bot).
8 |
9 |
10 | ## To create your own discord bot with GPTR functionality
11 |
12 | Add a .env file in the root of the project and add the following:
13 |
14 | ```
15 | DISCORD_BOT_TOKEN=
16 | DISCORD_CLIENT_ID=
17 | ```
18 | You can fetch the token from the Discord Developer Portal by following these steps:
19 |
20 | 1. Go to https://discord.com/developers/applications/
21 | 2. Click the "New Application" button and give your bot a name
22 | 3. Navigate to the OAuth2 tab to generate an invite URL for your bot
23 | 4. Under "Scopes", select "bot"
24 |
25 | 
26 |
27 | 5. Select the appropriate bot permissions
28 |
29 | 
30 |
31 | 6. Copy your bot's token and paste it into the `.env` file you created earlier
32 |
33 |
34 | ### Deploying the bot commands
35 |
36 | ```bash
37 | node deploy-commands.js
38 | ```
39 |
40 | In our case, this will make the "ask" and "ping" commands available to users of the bot.
41 |
42 |
43 | ### Running the bot via Docker
44 |
45 | ```bash
46 | docker compose --profile discord run --rm discord-bot
47 | ```
48 |
49 | ### Running the bot via CLI
50 |
51 | ```bash
52 | # install dependencies
53 | npm install
54 |
55 | # run the bot
56 | npm run dev
57 | ```
58 |
59 | ### Installing NodeJS and NPM on Ubuntu
60 |
61 | ```bash
62 | #install nvm
63 | wget -qO- https://raw.githubusercontent.com/nvm-sh/nvm/v0.39.4/install.sh | bash
64 |
65 | export NVM_DIR="$([ -z "${XDG_CONFIG_HOME-}" ] && printf %s "${HOME}/.nvm" || printf %s "${XDG_CONFIG_HOME}/nvm")"
66 | [ -s "$NVM_DIR/nvm.sh" ] && \. "$NVM_DIR/nvm.sh" # This loads nvm
67 |
68 | # install nodejs
69 | nvm install 18.17.0
70 |
71 | # install npm
72 | sudo apt-get install npm
73 | ```
74 |
--------------------------------------------------------------------------------
/docs/docs/gpt-researcher/frontend/embed-script.md:
--------------------------------------------------------------------------------
1 | # Embed Script
2 |
3 | The embed script enables you to embed the latest GPTR NextJS app into your web app.
4 |
5 | To achieve this, simply add these 2 script tags into your HTML:
6 |
7 | ```javascript
8 |
9 |
10 | ```
11 |
12 | Here's a minmalistic HTML example (P.S. You can also save this as an index.html file and open it with your Web Browser)
13 |
14 | ```html
15 |
16 |
17 |
18 |
19 |
20 | GPT Researcher Embed Demo
21 |
22 |
23 |
24 |
25 |
26 |
27 |
28 | ```
29 |
30 | This example relies on setting a custom localstorage value for `GPTR_API_URL`. To point your embedded frontend at a custom GPTR API Server, feel free to edit `http://localhost:8000` to your custom GPTR server address.
--------------------------------------------------------------------------------
/docs/docs/gpt-researcher/frontend/img/bot-permissions.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/assafelovic/gpt-researcher/9d085d867fce49c24cf296659eebf5d7595cbed2/docs/docs/gpt-researcher/frontend/img/bot-permissions.png
--------------------------------------------------------------------------------
/docs/docs/gpt-researcher/frontend/img/oath2-url-generator.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/assafelovic/gpt-researcher/9d085d867fce49c24cf296659eebf5d7595cbed2/docs/docs/gpt-researcher/frontend/img/oath2-url-generator.png
--------------------------------------------------------------------------------
/docs/docs/gpt-researcher/frontend/introduction.md:
--------------------------------------------------------------------------------
1 | # Intro to the Frontends
2 |
3 | The frontends enhance GPT-Researcher by providing:
4 |
5 | 1. Intuitive Research Interface: Streamlined input for research queries.
6 | 2. Real-time Progress Tracking: Visual feedback on ongoing research tasks.
7 | 3. Interactive Results Display: Easy-to-navigate presentation of findings.
8 | 4. Customizable Settings: Adjust research parameters to suit specific needs.
9 | 5. Responsive Design: Optimal experience across various devices.
10 |
11 | These features aim to make the research process more efficient and user-friendly, complementing GPT-Researcher's powerful agent capabilities.
12 |
13 | ## Choosing an Option
14 |
15 | - Static Frontend: Quick setup, lightweight deployment.
16 | - NextJS Frontend: Feature-rich, scalable, better performance and SEO (For production, NextJS is recommended)
17 | - Discord Bot: Integrate GPT-Researcher into your Discord server.
--------------------------------------------------------------------------------
/docs/docs/gpt-researcher/frontend/react-package.md:
--------------------------------------------------------------------------------
1 | # React Package
2 |
3 | The GPTR React package is an abstraction on top of the NextJS app meant to empower users to easily import the GPTR frontend into any React App. The package is [available on npm](https://www.npmjs.com/package/gpt-researcher-ui).
4 |
5 |
6 | ## Installation
7 |
8 | ```bash
9 | npm install gpt-researcher-ui
10 | ```
11 |
12 | ## Usage
13 |
14 | ```javascript
15 | import React from 'react';
16 | import { GPTResearcher } from 'gpt-researcher-ui';
17 |
18 | function App() {
19 | return (
20 |
21 | console.log('Research results:', results)}
25 | />
26 |
27 | );
28 | }
29 |
30 | export default App;
31 | ```
32 |
33 |
34 | ## Publishing to a private npm registry
35 |
36 | If you'd like to build and publish the package into your own private npm registry, you can do so by running the following commands:
37 |
38 | ```bash
39 | cd frontend/nextjs/
40 | npm run build:lib
41 | npm run build:types
42 | npm publish
43 | ```
44 |
45 |
--------------------------------------------------------------------------------
/docs/docs/gpt-researcher/frontend/vanilla-js-frontend.md:
--------------------------------------------------------------------------------
1 | # Vanilla JS Frontend
2 |
3 | The VanillaJS frontend is a lightweight solution leveraging FastAPI to serve static files.
4 |
5 | ### Demo
6 |
7 |
8 | #### Prerequisites
9 | - Python 3.11+
10 | - pip
11 |
12 | #### Setup and Running
13 |
14 | 1. Install required packages:
15 | ```
16 | pip install -r requirements.txt
17 | ```
18 |
19 | 2. Start the server:
20 | ```
21 | python -m uvicorn main:app
22 | ```
23 |
24 | 3. Access at `http://localhost:8000`
25 |
--------------------------------------------------------------------------------
/docs/docs/gpt-researcher/frontend/visualizing-websockets.md:
--------------------------------------------------------------------------------
1 | # Visualizing Websockets
2 |
3 | The GPTR Frontend is powered by Websockets streaming back from the Backend. This allows for real-time updates on the status of your research tasks, as well as the ability to interact with the Backend directly from the Frontend.
4 |
5 |
6 | ## Inspecting Websockets
7 |
8 | When running reports via the frontend, you can inspect the websocket messages in the Network Tab.
9 |
10 | Here's how:
11 |
12 | 
13 |
14 |
15 | ## Am I polling the right URL?
16 |
17 | If you're concerned that your frontend isn't hitting the right API Endpoint, you can check the URL in the Network Tab.
18 |
19 | Click into the WS request & go to the "Headers" tab
20 |
21 | 
22 |
23 | For debugging, have a look at the getHost function.
--------------------------------------------------------------------------------
/docs/docs/gpt-researcher/getting-started/getting-started-with-docker.md:
--------------------------------------------------------------------------------
1 | # Docker: Quickstart
2 |
3 | > **Step 1** - Install & Open Docker Desktop
4 |
5 | Follow instructions at https://www.docker.com/products/docker-desktop/
6 |
7 |
8 | > **Step 2** - [Follow this flow](https://www.youtube.com/watch?v=x1gKFt_6Us4)
9 |
10 | This mainly includes cloning the '.env.example' file, adding your API Keys to the cloned file and saving the file as '.env'
11 |
12 | In `requirements.txt` add the relevant langchain packages for the LLM your choose (langchain-google-genai, langchain-deepseek, langchain_mistralai for example)
13 |
14 | > **Step 3** - Within root, run with Docker.
15 |
16 | ```bash
17 | docker-compose up --build
18 | ```
19 |
20 | If that doesn't work, try running it without the dash:
21 | ```bash
22 | docker compose up --build
23 | ```
24 |
25 | > **Step 4** - By default, if you haven't uncommented anything in your docker-compose file, this flow will start 2 processes:
26 | - the Python server running on localhost:8000
27 | - the React app running on localhost:3000
28 |
29 | Visit localhost:3000 on any browser and enjoy researching!
30 |
31 |
32 | ## Running with the Docker CLI
33 |
34 | If you want to run the Docker container without using docker-compose, you can use the following command:
35 |
36 | ```bash
37 | docker run -it --name gpt-researcher -p 8000:8000 --env-file .env -v /absolute/path/to/gptr_docs:/my-docs gpt-researcher
38 | ```
39 |
40 | This will run the Docker container and mount the `/gptr_docs` directory to the container's `/my-docs` directory for analysis by the GPTR API Server.
41 |
--------------------------------------------------------------------------------
/docs/docs/gpt-researcher/gptr/automated-tests.md:
--------------------------------------------------------------------------------
1 | # Automated Tests
2 |
3 | ## Automated Testing with Github Actions
4 |
5 | This repository contains the code for the automated testing of the GPT-Researcher Repo using Github Actions.
6 |
7 | The tests are triggered in a docker container which runs the tests via the `pytest` module.
8 |
9 | ## Running the Tests
10 |
11 | You can run the tests:
12 |
13 | ### Via a docker command
14 |
15 | ```bash
16 | docker-compose --profile test run --rm gpt-researcher-tests
17 | ```
18 |
19 | ### Via a Github Action
20 |
21 | 
22 |
23 | Attaching here the required settings & screenshots on the github repo level:
24 |
25 | Step 1: Within the repo, press the "Settings" tab
26 |
27 | Step 2: Create a new environment named "tests" (all lowercase)
28 |
29 | Step 3: Click into the "tests" environment & add environment secrets of ```OPENAI_API_KEY``` & ```TAVILY_API_KEY```
30 |
31 | Get the keys from here:
32 |
33 | https://app.tavily.com/sign-in
34 |
35 | https://platform.openai.com/api-keys
36 |
37 |
38 | 
39 | 
40 |
41 | If configured correctly, here's what the Github action should look like when opening a new PR or committing to an open PR:
42 |
43 | 
--------------------------------------------------------------------------------
/docs/docs/gpt-researcher/gptr/example.md:
--------------------------------------------------------------------------------
1 | # Agent Example
2 |
3 | If you're interested in using GPT Researcher as a standalone agent, you can easily import it into any existing Python project. Below, is an example of calling the agent to generate a research report:
4 |
5 | ```python
6 | from gpt_researcher import GPTResearcher
7 | import asyncio
8 |
9 | async def fetch_report(query):
10 | """
11 | Fetch a research report based on the provided query and report type.
12 | """
13 | researcher = GPTResearcher(query=query)
14 | await researcher.conduct_research()
15 | report = await researcher.write_report()
16 | return report
17 |
18 | async def generate_research_report(query):
19 | """
20 | This is a sample script that executes an async main function to run a research report.
21 | """
22 | report = await fetch_report(query)
23 | print(report)
24 |
25 | if __name__ == "__main__":
26 | QUERY = "What happened in the latest burning man floods?"
27 | asyncio.run(generate_research_report(query=QUERY))
28 | ```
29 |
30 | You can further enhance this example to use the returned report as context for generating valuable content such as news article, marketing content, email templates, newsletters, etc.
31 |
32 | You can also use GPT Researcher to gather information about code documentation, business analysis, financial information and more. All of which can be used to complete much more complex tasks that require factual and high quality realtime information.
33 |
--------------------------------------------------------------------------------
/docs/docs/gpt-researcher/gptr/npm-package.md:
--------------------------------------------------------------------------------
1 | # npm package
2 |
3 | The [gpt-researcher npm package](https://www.npmjs.com/package/gpt-researcher) is a WebSocket client for interacting with GPT Researcher.
4 |
5 | ## Installation
6 |
7 | ```bash
8 | npm install gpt-researcher
9 | ```
10 |
11 | ## Usage
12 |
13 | ```javascript
14 | const GPTResearcher = require('gpt-researcher');
15 |
16 | const researcher = new GPTResearcher({
17 | host: 'localhost:8000',
18 | logListener: (data) => console.log('logListener logging data: ',data)
19 | });
20 |
21 | researcher.sendMessage({
22 | query: 'Does providing better context reduce LLM hallucinations?',
23 | moreContext: 'Provide a detailed answer'
24 | });
25 | ```
26 |
--------------------------------------------------------------------------------
/docs/docs/gpt-researcher/handling-logs/langsmith-logs.md:
--------------------------------------------------------------------------------
1 | # Langsmith Logs
2 |
3 | With the help of Langsmith, you can easily visualize logs on cost and errors within your Langsmith Dashboard (calculated per LLM call or grouped by project)
4 |
5 | Here are the steps to setup Langsmith:
6 |
7 | Step 1: Setup a Langsmith account at: [smith.langchain.com](https://smith.langchain.com)
8 |
9 | Step 2: Create a new API key at: [smith.langchain.com/settings](https://smith.langchain.com/settings)
10 |
11 | Step 3: Add these 2 environment variables:
12 |
13 | ```bash
14 | LANGCHAIN_TRACING_V2=true
15 | LANGCHAIN_API_KEY=Set this to your API key
16 | ```
17 |
18 | Here's what this looks like in the Langsmith Dashboard:
19 |
20 | 
21 |
22 | This can be helpful for:
23 |
24 | - Enabling users to visualize and inspect the backend data flow
25 | - Quality assurance debugging - where can the input or output of our AI flows use improvement
26 | - Cost analysis - where are we spending the most on LLM calls
27 | - Error analysis - where are we getting the most errors
28 | - Optimizing speed - which parts of the flow are taking the most time
29 |
--------------------------------------------------------------------------------
/docs/docs/gpt-researcher/handling-logs/langsmith.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/assafelovic/gpt-researcher/9d085d867fce49c24cf296659eebf5d7595cbed2/docs/docs/gpt-researcher/handling-logs/langsmith.png
--------------------------------------------------------------------------------
/docs/docs/gpt-researcher/llms/running-with-azure.md:
--------------------------------------------------------------------------------
1 | # Running with Azure
2 |
3 | ## Example: Azure OpenAI Configuration
4 |
5 | If you are not using OpenAI's models, but other model providers, besides the general configuration above, also additional environment variables are required.
6 |
7 | Here is an example for [Azure OpenAI](https://learn.microsoft.com/en-us/azure/ai-services/openai/concepts/models) configuration:
8 |
9 | ```bash
10 | OPENAI_API_VERSION="2024-05-01-preview" # or whatever you are using
11 | AZURE_OPENAI_ENDPOINT="https://CHANGEMEN.openai.azure.com/" # change to the name of your deployment
12 | AZURE_OPENAI_API_KEY="[Your Key]" # change to your API key
13 |
14 | EMBEDDING="azure_openai:text-embedding-ada-002" # change to the deployment of your embedding model
15 |
16 | FAST_LLM="azure_openai:gpt-4o-mini" # change to the name of your deployment (not model-name)
17 | FAST_TOKEN_LIMIT=4000
18 |
19 | SMART_LLM="azure_openai:gpt-4o" # change to the name of your deployment (not model-name)
20 | SMART_TOKEN_LIMIT=4000
21 |
22 | RETRIEVER="bing" # if you are using Bing as your search engine (which is likely if you use Azure)
23 | BING_API_KEY="[Your Key]"
24 | ```
25 |
26 | For more details on what each variable does, you can check out the [GPTR Config Docs](https://docs.gptr.dev/docs/gpt-researcher/gptr/config)
--------------------------------------------------------------------------------
/docs/docs/gpt-researcher/llms/supported-llms.md:
--------------------------------------------------------------------------------
1 | # Supported LLMs
2 |
3 | The following LLMs are supported by GPTR (though you'll need to install the relevant langchain package separately if you're not using OpenAI).
4 |
5 | - openai
6 | - anthropic
7 | - azure_openai
8 | - cohere
9 | - google_vertexai
10 | - google_genai
11 | - fireworks
12 | - gigachat
13 | - ollama
14 | - together
15 | - mistralai
16 | - huggingface
17 | - groq
18 | - bedrock
19 | - dashscope
20 | - xai
21 | - deepseek
22 | - litellm
23 | - openrouter
24 | - vllm
25 |
26 | If you'd like to know the name of the langchain package for each LLM, you can check the [Langchain documentation](https://python.langchain.com/v0.2/docs/integrations/platforms/), or run GPTR as is and inspect the error message.
27 |
28 | The GPTR LLM Module is built on top of the [Langchain LLM Module](https://python.langchain.com/v0.2/docs/integrations/llms/).
29 |
30 | If you'd like to add a new LLM into GPTR, you can start with the [langchain documentation](https://python.langchain.com/v0.2/docs/integrations/platforms/) and then look into integrating it into the [GPTR LLM Module](https://github.com/assafelovic/gpt-researcher/blob/master/gpt_researcher/llm_provider/generic/base.py).
--------------------------------------------------------------------------------
/docs/docs/gpt-researcher/llms/testing-your-llm.md:
--------------------------------------------------------------------------------
1 | # Testing your LLM
2 |
3 | Here is a snippet of code to help you verify that your LLM-related environment variables are set up correctly.
4 |
5 | ```python
6 | from gpt_researcher.config.config import Config
7 | from gpt_researcher.utils.llm import create_chat_completion
8 | import asyncio
9 | from dotenv import load_dotenv
10 | load_dotenv()
11 |
12 | async def main():
13 | cfg = Config()
14 |
15 | try:
16 | report = await create_chat_completion(
17 | model=cfg.smart_llm_model,
18 | messages = [{"role": "user", "content": "sup?"}],
19 | temperature=0.35,
20 | llm_provider=cfg.smart_llm_provider,
21 | stream=True,
22 | max_tokens=cfg.smart_token_limit,
23 | llm_kwargs=cfg.llm_kwargs
24 | )
25 | except Exception as e:
26 | print(f"Error in calling LLM: {e}")
27 |
28 | # Run the async function
29 | asyncio.run(main())
30 | ```
--------------------------------------------------------------------------------
/docs/docs/reference/config/singleton.md:
--------------------------------------------------------------------------------
1 | ---
2 | sidebar_label: singleton
3 | title: config.singleton
4 | ---
5 |
6 | The singleton metaclass for ensuring only one instance of a class.
7 |
8 | ## Singleton Objects
9 |
10 | ```python
11 | class Singleton(abc.ABCMeta, type)
12 | ```
13 |
14 | Singleton metaclass for ensuring only one instance of a class.
15 |
16 | #### \_\_call\_\_
17 |
18 | ```python
19 | def __call__(cls, *args, **kwargs)
20 | ```
21 |
22 | Call method for the singleton metaclass.
23 |
24 | ## AbstractSingleton Objects
25 |
26 | ```python
27 | class AbstractSingleton(abc.ABC, metaclass=Singleton)
28 | ```
29 |
30 | Abstract singleton class for ensuring only one instance of a class.
31 |
32 |
--------------------------------------------------------------------------------
/docs/docs/reference/processing/html.md:
--------------------------------------------------------------------------------
1 | ---
2 | sidebar_label: html
3 | title: processing.html
4 | ---
5 |
6 | HTML processing functions
7 |
8 | #### extract\_hyperlinks
9 |
10 | ```python
11 | def extract_hyperlinks(soup: BeautifulSoup,
12 | base_url: str) -> list[tuple[str, str]]
13 | ```
14 |
15 | Extract hyperlinks from a BeautifulSoup object
16 |
17 | **Arguments**:
18 |
19 | - `soup` _BeautifulSoup_ - The BeautifulSoup object
20 | - `base_url` _str_ - The base URL
21 |
22 |
23 | **Returns**:
24 |
25 | List[Tuple[str, str]]: The extracted hyperlinks
26 |
27 | #### format\_hyperlinks
28 |
29 | ```python
30 | def format_hyperlinks(hyperlinks: list[tuple[str, str]]) -> list[str]
31 | ```
32 |
33 | Format hyperlinks to be displayed to the user
34 |
35 | **Arguments**:
36 |
37 | - `hyperlinks` _List[Tuple[str, str]]_ - The hyperlinks to format
38 |
39 |
40 | **Returns**:
41 |
42 | - `List[str]` - The formatted hyperlinks
43 |
44 |
--------------------------------------------------------------------------------
/docs/docs/reference/sidebar.json:
--------------------------------------------------------------------------------
1 | {
2 | "items": [],
3 | "label": "Reference",
4 | "type": "category"
5 | }
--------------------------------------------------------------------------------
/docs/docs/roadmap.md:
--------------------------------------------------------------------------------
1 | # Roadmap
2 |
3 | We're constantly working on additional features and improvements to our products and services. We're also working on new products and services to help you build better AI applications using [GPT Researcher](https://gptr.dev).
4 |
5 | Our vision is to build the #1 autonomous research agent for AI developers and researchers, and we're excited to have you join us on this journey!
6 |
7 | The roadmap is prioritized based on the following goals: Performance, Quality, Modularity and Conversational flexibility. The roadmap is public and can be found [here](https://trello.com/b/3O7KBePw/gpt-researcher-roadmap).
8 |
9 | Interested in collaborating or contributing? Check out our [contributing page](/docs/contribute) for more information.
--------------------------------------------------------------------------------
/docs/docs/welcome.md:
--------------------------------------------------------------------------------
1 | # Welcome
2 |
3 | Hey there! 👋
4 |
5 | We're a team of AI researchers and developers who are passionate about building the next generation of AI assistants.
6 | Our mission is to empower individuals and organizations with accurate, unbiased, and factual information.
7 |
8 | ### GPT Researcher
9 | Quickly accessing relevant and trustworthy information is more crucial than ever. However, we've learned that none of today's search engines provide a suitable tool that provides factual, explicit and objective answers without the need to continuously click and explore multiple sites for a given research task.
10 |
11 | This is why we've built the trending open source **[GPT Researcher](https://github.com/assafelovic/gpt-researcher)**. GPT Researcher is an autonomous agent that takes care of the tedious task of research for you, by scraping, filtering and aggregating over 20+ web sources per a single research task.
12 |
13 | To learn more about GPT Researcher, check out the [documentation page](/docs/gpt-researcher/getting-started/introduction).
14 |
--------------------------------------------------------------------------------
/docs/npm/package.json:
--------------------------------------------------------------------------------
1 | {
2 | "name": "gpt-researcher",
3 | "version": "1.0.27",
4 | "description": "WebSocket client for GPT Researcher",
5 | "main": "index.js",
6 | "scripts": {
7 | "test": "echo \"Error: no test specified\" && exit 1"
8 | },
9 | "keywords": [
10 | "gpt-researcher",
11 | "websocket",
12 | "ai",
13 | "research"
14 | ],
15 | "dependencies": {
16 | "ws": "^8.18.0"
17 | },
18 | "repository": {
19 | "type": "git",
20 | "url": "git+https://github.com/assafelovic/gpt-researcher.git"
21 | },
22 | "author": "GPT Researcher Team",
23 | "license": "MIT",
24 | "bugs": {
25 | "url": "https://github.com/assafelovic/gpt-researcher/issues"
26 | },
27 | "homepage": "https://github.com/assafelovic/gpt-researcher#readme"
28 | }
--------------------------------------------------------------------------------
/docs/package.json:
--------------------------------------------------------------------------------
1 | {
2 | "name": "website",
3 | "version": "0.0.0",
4 | "private": true,
5 | "resolutions": {
6 | "nth-check": "2.0.1",
7 | "trim": "0.0.3",
8 | "got": "11.8.5",
9 | "node-forge": "1.3.0",
10 | "minimatch": "3.0.5",
11 | "loader-utils": "2.0.4",
12 | "eta": "2.0.0",
13 | "@sideway/formula": "3.0.1",
14 | "http-cache-semantics": "4.1.1"
15 | },
16 | "scripts": {
17 | "docusaurus": "docusaurus",
18 | "start": "docusaurus start",
19 | "build": "docusaurus build",
20 | "swizzle": "docusaurus swizzle",
21 | "deploy": "docusaurus deploy",
22 | "clear": "docusaurus clear",
23 | "serve": "docusaurus serve",
24 | "write-translations": "docusaurus write-translations",
25 | "write-heading-ids": "docusaurus write-heading-ids"
26 | },
27 | "dependencies": {
28 | "@docusaurus/core": "3.7.0",
29 | "@docusaurus/preset-classic": "3.7.0",
30 | "@easyops-cn/docusaurus-search-local": "^0.49.2",
31 | "@mdx-js/react": "^3.1.0",
32 | "@svgr/webpack": "^8.1.0",
33 | "clsx": "^1.1.1",
34 | "file-loader": "^6.2.0",
35 | "hast-util-is-element": "1.1.0",
36 | "minimatch": "3.0.5",
37 | "react": "^18.0.1",
38 | "react-dom": "^18.0.1",
39 | "rehype-katex": "^7.0.1",
40 | "remark-math": "3",
41 | "trim": "^0.0.3",
42 | "url-loader": "^4.1.1"
43 | },
44 | "browserslist": {
45 | "production": [
46 | ">0.5%",
47 | "not dead",
48 | "not op_mini all"
49 | ],
50 | "development": [
51 | "last 1 chrome version",
52 | "last 1 firefox version",
53 | "last 1 safari version"
54 | ]
55 | }
56 | }
57 |
--------------------------------------------------------------------------------
/docs/pydoc-markdown.yml:
--------------------------------------------------------------------------------
1 | loaders:
2 | - type: python
3 | search_path: [../docs]
4 | processors:
5 | - type: filter
6 | skip_empty_modules: true
7 | - type: smart
8 | - type: crossref
9 | renderer:
10 | type: docusaurus
11 | docs_base_path: docs
12 | relative_output_path: reference
13 | relative_sidebar_path: sidebar.json
14 | sidebar_top_level_label: Reference
15 | markdown:
16 | escape_html_in_docstring: false
17 |
--------------------------------------------------------------------------------
/docs/src/components/HomepageFeatures.module.css:
--------------------------------------------------------------------------------
1 | /* stylelint-disable docusaurus/copyright-header */
2 |
3 | .features {
4 | display: flex;
5 | align-items: center;
6 | padding: 2rem 0;
7 | width: 100%;
8 | }
9 |
10 | .featureSvg {
11 | height: 120px;
12 | width: 200px;
13 | }
14 |
--------------------------------------------------------------------------------
/docs/src/pages/index.js:
--------------------------------------------------------------------------------
1 | import React from 'react';
2 | import clsx from 'clsx';
3 | import Layout from '@theme/Layout';
4 | import Link from '@docusaurus/Link';
5 | import useDocusaurusContext from '@docusaurus/useDocusaurusContext';
6 | import styles from './index.module.css';
7 | import HomepageFeatures from '../components/HomepageFeatures';
8 |
9 | function HomepageHeader() {
10 | const {siteConfig} = useDocusaurusContext();
11 | return (
12 |
25 | );
26 | }
27 |
28 | export default function Home() {
29 | const {siteConfig} = useDocusaurusContext();
30 | return (
31 |
34 |
35 |
36 |
37 |
38 |
39 | );
40 | }
41 |
--------------------------------------------------------------------------------
/docs/src/pages/index.module.css:
--------------------------------------------------------------------------------
1 | /* stylelint-disable docusaurus/copyright-header */
2 |
3 | /**
4 | * CSS files with the .module.css suffix will be treated as CSS modules
5 | * and scoped locally.
6 | */
7 |
8 | .heroBanner {
9 | padding: 5rem 0;
10 | text-align: center;
11 | position: relative;
12 | overflow: hidden;
13 | }
14 |
15 | @media screen and (max-width: 966px) {
16 | .heroBanner {
17 | padding: 2rem;
18 | }
19 | }
20 |
21 | .buttons {
22 | display: flex;
23 | align-items: center;
24 | justify-content: center;
25 | }
26 |
--------------------------------------------------------------------------------
/docs/static/.nojekyll:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/assafelovic/gpt-researcher/9d085d867fce49c24cf296659eebf5d7595cbed2/docs/static/.nojekyll
--------------------------------------------------------------------------------
/docs/static/CNAME:
--------------------------------------------------------------------------------
1 | docs.gptr.dev
--------------------------------------------------------------------------------
/docs/static/img/architecture.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/assafelovic/gpt-researcher/9d085d867fce49c24cf296659eebf5d7595cbed2/docs/static/img/architecture.png
--------------------------------------------------------------------------------
/docs/static/img/banner1.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/assafelovic/gpt-researcher/9d085d867fce49c24cf296659eebf5d7595cbed2/docs/static/img/banner1.jpg
--------------------------------------------------------------------------------
/docs/static/img/examples.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/assafelovic/gpt-researcher/9d085d867fce49c24cf296659eebf5d7595cbed2/docs/static/img/examples.png
--------------------------------------------------------------------------------
/docs/static/img/gptr-logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/assafelovic/gpt-researcher/9d085d867fce49c24cf296659eebf5d7595cbed2/docs/static/img/gptr-logo.png
--------------------------------------------------------------------------------
/docs/static/img/leaderboard.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/assafelovic/gpt-researcher/9d085d867fce49c24cf296659eebf5d7595cbed2/docs/static/img/leaderboard.png
--------------------------------------------------------------------------------
/docs/static/img/multi-agent.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/assafelovic/gpt-researcher/9d085d867fce49c24cf296659eebf5d7595cbed2/docs/static/img/multi-agent.png
--------------------------------------------------------------------------------
/evals/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/assafelovic/gpt-researcher/9d085d867fce49c24cf296659eebf5d7595cbed2/evals/__init__.py
--------------------------------------------------------------------------------
/evals/simple_evals/.gitignore:
--------------------------------------------------------------------------------
1 | # Override global gitignore to track our evaluation logs
2 | !logs/
3 | !logs/*
4 | !logs/**/*
--------------------------------------------------------------------------------
/evals/simple_evals/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/assafelovic/gpt-researcher/9d085d867fce49c24cf296659eebf5d7595cbed2/evals/simple_evals/__init__.py
--------------------------------------------------------------------------------
/evals/simple_evals/logs/.gitkeep:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/assafelovic/gpt-researcher/9d085d867fce49c24cf296659eebf5d7595cbed2/evals/simple_evals/logs/.gitkeep
--------------------------------------------------------------------------------
/evals/simple_evals/logs/README.md:
--------------------------------------------------------------------------------
1 | # Evaluation Results
2 |
3 | This directory contains historical evaluation results for GPT-Researcher using the SimpleQA methodology.
4 |
5 | ## Latest Results
6 |
7 | ### [SimpleQA Eval 100 Problems 2-22-25](./SimpleQA%20Eval%20100%20Problems%202-22-25.txt)
8 |
9 | Evaluation run by [Kelly Abbott (kga245)](https://github.com/kga245)
10 |
11 | **Summary:**
12 | - Date: February 22, 2025
13 | - Sample Size: 100 queries
14 | - Success Rate: 100% (100/100 queries completed)
15 |
16 | **Performance Metrics:**
17 | - Accuracy: 92.9%
18 | - F1 Score: 92.5%
19 | - Answer Rate: 99%
20 |
21 | **Response Distribution:**
22 | - Correct: 92%
23 | - Incorrect: 7%
24 | - Not Attempted: 1%
25 |
26 | **Cost Efficiency:**
27 | - Total Cost: $9.60
28 | - Average Cost per Query: $0.096
29 |
30 | This evaluation demonstrates strong performance in factual accuracy while maintaining reasonable cost efficiency. The high answer rate (99%) and accuracy (92.9%) suggest that GPT-Researcher is effective at finding and reporting accurate information.
31 |
32 | ## Historical Context
33 |
34 | These logs are maintained in version control to:
35 | 1. Track performance improvements over time
36 | 2. Provide benchmarks for future enhancements
37 | 3. Enable analysis of different configurations
38 | 4. Ensure transparency in our evaluation process
39 |
40 | Each log file contains detailed information about:
41 | - Individual query results
42 | - Source citations
43 | - Cost breakdowns
44 | - Error analysis
45 | - Aggregate metrics
46 |
47 | ## Running New Evaluations
48 |
49 | To generate new evaluation logs, see the [main evaluation documentation](../README.md) for instructions on running evaluations with different configurations or sample sizes.
--------------------------------------------------------------------------------
/evals/simple_evals/requirements.txt:
--------------------------------------------------------------------------------
1 | pandas>=1.5.0
2 | tqdm>=4.65.0
--------------------------------------------------------------------------------
/frontend/nextjs/.babelrc.build.json:
--------------------------------------------------------------------------------
1 | {
2 | "env": {
3 | "production": {
4 | "presets": [
5 | "@babel/preset-env",
6 | "@babel/preset-react",
7 | ["@babel/preset-typescript", { "allowNamespaces": true, "onlyRemoveTypeImports": true }]
8 | ],
9 | "plugins": [
10 | ["@babel/plugin-transform-typescript", { "allowNamespaces": true }]
11 | ]
12 | }
13 | }
14 | }
--------------------------------------------------------------------------------
/frontend/nextjs/.dockerignore:
--------------------------------------------------------------------------------
1 | .git
2 |
3 | # Ignore env containing secrets
4 | .env
5 | .venv
6 | .envrc
7 |
8 | # Ignore Virtual Env
9 | env/
10 | venv/
11 | .venv/
12 |
13 | # Other Environments
14 | ENV/
15 | env.bak/
16 | venv.bak/
17 |
18 | # Ignore generated outputs
19 | outputs/
20 |
21 | # Ignore my local docs
22 | my-docs/
23 |
24 | # Ignore pycache
25 | **/__pycache__/
26 |
27 | # Ignore mypy cache
28 | .mypy_cache/
29 |
30 | # Node modules
31 | node_modules
32 |
33 | # Ignore IDE config
34 | .idea
35 |
36 | # macOS specific files
37 | .DS_Store
38 |
39 | # Docusaurus build artifacts
40 | .docusaurus
41 |
42 | # Build directories
43 | build
44 | docs/build
45 |
46 | # Language graph data
47 | .langgraph-data/
48 |
49 | # Next.js build artifacts
50 | .next/
51 |
52 | # Package lock file
53 | package-lock.json
54 |
55 | # Docker-specific exclusions (if any)
56 | Dockerfile
57 | docker-compose.yml
58 |
--------------------------------------------------------------------------------
/frontend/nextjs/.eslintrc.json:
--------------------------------------------------------------------------------
1 | {
2 | "extends": "next/core-web-vitals",
3 | "rules": {
4 | "no-unused-vars": "off",
5 | "no-undef": "off",
6 | "no-console": "off",
7 | "@next/next/no-img-element": "off",
8 | "@typescript-eslint/no-explicit-any": "off",
9 | "@typescript-eslint/no-unused-vars": "off"
10 | },
11 | "ignorePatterns": ["build/**/*"]
12 | }
13 |
--------------------------------------------------------------------------------
/frontend/nextjs/.example.env:
--------------------------------------------------------------------------------
1 | TOGETHER_API_KEY=
2 | BING_API_KEY=
3 | HELICONE_API_KEY=
4 |
--------------------------------------------------------------------------------
/frontend/nextjs/.gitignore:
--------------------------------------------------------------------------------
1 | # See https://help.github.com/articles/ignoring-files/ for more about ignoring files.
2 | .env
3 | package-lock.json
4 |
5 | # dependencies
6 | /node_modules
7 | /.pnp
8 | .pnp.js
9 | .yarn/install-state.gz
10 |
11 | # testing
12 | /coverage
13 |
14 | # next.js
15 | /.next/
16 | /out/
17 |
18 | # production
19 | /build
20 |
21 | # misc
22 | .DS_Store
23 | *.pem
24 |
25 | # debug
26 | npm-debug.log*
27 | yarn-debug.log*
28 | yarn-error.log*
29 |
30 | # local env files
31 | .env*.local
32 |
33 | # vercel
34 | .vercel
35 |
36 | # typescript
37 | *.tsbuildinfo
38 | next-env.d.ts
39 |
--------------------------------------------------------------------------------
/frontend/nextjs/.prettierrc:
--------------------------------------------------------------------------------
1 | { "plugins": ["prettier-plugin-tailwindcss"] }
2 |
--------------------------------------------------------------------------------
/frontend/nextjs/Dockerfile:
--------------------------------------------------------------------------------
1 | FROM node:18.17.0-alpine as builder
2 | WORKDIR /app
3 | COPY ./package.json ./
4 | RUN npm install --legacy-peer-deps
5 | COPY . .
6 | RUN npm run build
7 |
8 | FROM nginx
9 | EXPOSE 3000
10 | COPY ./nginx/default.conf /etc/nginx/conf.d/default.conf
11 | COPY --from=builder /app/build /usr/share/nginx/html
12 |
--------------------------------------------------------------------------------
/frontend/nextjs/Dockerfile.dev:
--------------------------------------------------------------------------------
1 | FROM node:18.17.0-alpine
2 | WORKDIR /app
3 | COPY ./package.json ./
4 | RUN npm install --legacy-peer-deps
5 | COPY . .
6 | CMD ["npm", "run", "dev"]
--------------------------------------------------------------------------------
/frontend/nextjs/app/layout.tsx:
--------------------------------------------------------------------------------
1 | import type { Metadata } from "next";
2 | import { Lexend } from "next/font/google";
3 | import PlausibleProvider from "next-plausible";
4 | import { GoogleAnalytics } from '@next/third-parties/google'
5 | import "./globals.css";
6 |
7 | const inter = Lexend({ subsets: ["latin"] });
8 |
9 | let title = "GPT Researcher";
10 | let description =
11 | "LLM based autonomous agent that conducts local and web research on any topic and generates a comprehensive report with citations.";
12 | let url = "https://github.com/assafelovic/gpt-researcher";
13 | let ogimage = "/favicon.ico";
14 | let sitename = "GPT Researcher";
15 |
16 | export const metadata: Metadata = {
17 | metadataBase: new URL(url),
18 | title,
19 | description,
20 | icons: {
21 | icon: "/favicon.ico",
22 | },
23 | openGraph: {
24 | images: [ogimage],
25 | title,
26 | description,
27 | url: url,
28 | siteName: sitename,
29 | locale: "en_US",
30 | type: "website",
31 | },
32 | twitter: {
33 | card: "summary_large_image",
34 | images: [ogimage],
35 | title,
36 | description,
37 | },
38 | };
39 |
40 | export default function RootLayout({
41 | children,
42 | }: Readonly<{
43 | children: React.ReactNode;
44 | }>) {
45 |
46 | return (
47 |
48 |
49 |
50 |
51 |
52 |
56 | {children}
57 |
58 |
59 | );
60 | }
61 |
--------------------------------------------------------------------------------
/frontend/nextjs/components/HumanFeedback.tsx:
--------------------------------------------------------------------------------
1 | // /multi_agents/frontend/components/HumanFeedback.tsx
2 |
3 | import React, { useState, useEffect } from 'react';
4 |
5 | interface HumanFeedbackProps {
6 | websocket: WebSocket | null;
7 | onFeedbackSubmit: (feedback: string | null) => void;
8 | questionForHuman: boolean;
9 | }
10 |
11 | const HumanFeedback: React.FC = ({ questionForHuman, websocket, onFeedbackSubmit }) => {
12 | const [feedbackRequest, setFeedbackRequest] = useState(null);
13 | const [userFeedback, setUserFeedback] = useState('');
14 |
15 | const handleSubmit = (e: React.FormEvent) => {
16 | e.preventDefault();
17 | onFeedbackSubmit(userFeedback === '' ? null : userFeedback);
18 | setFeedbackRequest(null);
19 | setUserFeedback('');
20 | };
21 |
22 | return (
23 |
24 |
Human Feedback Required
25 |
{questionForHuman}
26 |
40 |
41 | );
42 | };
43 |
44 | export default HumanFeedback;
--------------------------------------------------------------------------------
/frontend/nextjs/components/Langgraph/Langgraph.js:
--------------------------------------------------------------------------------
1 | import { Client } from "@langchain/langgraph-sdk";
2 | import { task } from '../../config/task';
3 |
4 | export async function startLanggraphResearch(newQuestion, report_source, langgraphHostUrl) {
5 | // Update the task query with the new question
6 | task.task.query = newQuestion;
7 | task.task.source = report_source;
8 | const host = langgraphHostUrl;
9 |
10 | // Add your Langgraph Cloud Authentication token here
11 | const authToken = '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';
12 |
13 | const client = new Client({
14 | apiUrl: host,
15 | defaultHeaders: {
16 | 'Content-Type': 'application/json',
17 | 'X-Api-Key': authToken
18 | }
19 | });
20 |
21 | // List all assistants
22 | const assistants = await client.assistants.search({
23 | metadata: null,
24 | offset: 0,
25 | limit: 10,
26 | });
27 |
28 | console.log('assistants: ', assistants);
29 |
30 | // We auto-create an assistant for each graph you register in config.
31 | const agent = assistants[0];
32 |
33 | // Start a new thread
34 | const thread = await client.threads.create();
35 |
36 | // Start a streaming run
37 | const input = task;
38 |
39 | const streamResponse = client.runs.stream(
40 | thread["thread_id"],
41 | agent["assistant_id"],
42 | {
43 | input,
44 | },
45 | );
46 |
47 | return {streamResponse, host, thread_id: thread["thread_id"]};
48 | }
--------------------------------------------------------------------------------
/frontend/nextjs/components/LoadingDots.tsx:
--------------------------------------------------------------------------------
1 | import React from 'react';
2 |
3 | const LoadingDots = () => {
4 | return (
5 |
12 | );
13 | };
14 |
15 | export default LoadingDots;
--------------------------------------------------------------------------------
/frontend/nextjs/components/ResearchBlocks/ImageSection.tsx:
--------------------------------------------------------------------------------
1 | import Image from "next/image";
2 | import React, { memo } from 'react';
3 | import ImagesAlbum from '../Images/ImagesAlbum';
4 |
5 | interface ImageSectionProps {
6 | metadata: any;
7 | }
8 |
9 | const ImageSection = ({ metadata }: ImageSectionProps) => {
10 | return (
11 |
12 |
13 |
14 |
15 | Related Images
16 |
17 |
18 |
19 |
20 |
21 |
22 | );
23 | };
24 |
25 | // Simple memo implementation that compares arrays properly
26 | export default memo(ImageSection, (prevProps, nextProps) => {
27 | // If both are null/undefined or the same reference, they're equal
28 | if (prevProps.metadata === nextProps.metadata) return true;
29 |
30 | // If one is null/undefined but not the other, they're not equal
31 | if (!prevProps.metadata || !nextProps.metadata) return false;
32 |
33 | // Compare lengths
34 | if (prevProps.metadata.length !== nextProps.metadata.length) return false;
35 |
36 | // Compare each item
37 | for (let i = 0; i < prevProps.metadata.length; i++) {
38 | if (prevProps.metadata[i] !== nextProps.metadata[i]) return false;
39 | }
40 |
41 | return true;
42 | });
--------------------------------------------------------------------------------
/frontend/nextjs/components/ResearchBlocks/LogsSection.tsx:
--------------------------------------------------------------------------------
1 | import Image from "next/image";
2 | import LogMessage from './elements/LogMessage';
3 | import { useEffect, useRef } from 'react';
4 |
5 | interface Log {
6 | header: string;
7 | text: string;
8 | metadata: any;
9 | key: string;
10 | }
11 |
12 | interface OrderedLogsProps {
13 | logs: Log[];
14 | }
15 |
16 | const LogsSection = ({ logs }: OrderedLogsProps) => {
17 | const logsContainerRef = useRef(null);
18 |
19 | useEffect(() => {
20 | // Scroll to bottom whenever logs change
21 | if (logsContainerRef.current) {
22 | logsContainerRef.current.scrollTop = logsContainerRef.current.scrollHeight;
23 | }
24 | }, [logs]); // Dependency on logs array ensures this runs when new logs are added
25 |
26 | return (
27 |
28 |
29 |
30 |
31 | Agent Work
32 |
33 |
34 |
38 |
39 |
40 |
41 | );
42 | };
43 |
44 | export default LogsSection;
--------------------------------------------------------------------------------
/frontend/nextjs/components/ResearchBlocks/Question.tsx:
--------------------------------------------------------------------------------
1 | import React from 'react';
2 | import Image from "next/image";
3 |
4 | interface QuestionProps {
5 | question: string;
6 | }
7 |
8 | const Question: React.FC = ({ question }) => {
9 | return (
10 |
11 |
12 |
19 |
20 | Research Task:
21 |
22 |
23 |
"{question}"
24 |
25 | );
26 | };
27 |
28 | export default Question;
29 |
--------------------------------------------------------------------------------
/frontend/nextjs/components/ResearchBlocks/Sources.tsx:
--------------------------------------------------------------------------------
1 | import Image from "next/image";
2 | import React from 'react';
3 | import SourceCard from "./elements/SourceCard";
4 |
5 | export default function Sources({
6 | sources,
7 | }: {
8 | sources: { name: string; url: string }[];
9 | }) {
10 | return (
11 |
12 |
13 |
14 |
15 | {sources.length} Sources{" "}
16 |
17 |
18 |
19 |
20 | {sources.length > 0 ? (
21 | sources.map((source) => (
22 |
23 | ))
24 | ) : (
25 | <>
26 |
27 |
28 |
29 |
30 |
31 |
32 | >
33 | )}
34 |
35 |
36 |
37 | );
38 | }
39 |
--------------------------------------------------------------------------------
/frontend/nextjs/components/ResearchBlocks/elements/SourceCard.tsx:
--------------------------------------------------------------------------------
1 | import Image from "next/image";
2 | import { useState } from "react";
3 |
4 | const SourceCard = ({ source }: { source: { name: string; url: string } }) => {
5 | const [imageSrc, setImageSrc] = useState(`https://www.google.com/s2/favicons?domain=${source.url}&sz=128`);
6 |
7 | const handleImageError = () => {
8 | setImageSrc("/img/globe.svg");
9 | };
10 |
11 | return (
12 |
13 |
14 |
22 |
23 |
36 |
37 | );
38 | };
39 |
40 | export default SourceCard;
41 |
--------------------------------------------------------------------------------
/frontend/nextjs/components/ResearchBlocks/elements/SubQuestions.tsx:
--------------------------------------------------------------------------------
1 | import Image from "next/image";
2 |
3 | interface SubQuestionsProps {
4 | metadata: string[];
5 | handleClickSuggestion: (value: string) => void;
6 | }
7 |
8 | const SubQuestions: React.FC = ({ metadata, handleClickSuggestion }) => {
9 | return (
10 |
11 |
12 |
19 |
20 |
21 |
22 | Pondering your question from several angles
23 |
24 |
25 | {metadata.map((item, subIndex) => (
26 |
handleClickSuggestion(item)}
29 | key={`${item}-${subIndex}`}
30 | >
31 |
32 | {item}
33 |
34 |
35 | ))}
36 |
37 |
38 |
39 | );
40 | };
41 |
42 | export default SubQuestions;
--------------------------------------------------------------------------------
/frontend/nextjs/components/Task/AgentLogs.tsx:
--------------------------------------------------------------------------------
1 | export default function AgentLogs({agentLogs}:any){
2 | const renderAgentLogs = (agentLogs:any)=>{
3 | return agentLogs && agentLogs.map((agentLog:any, index:number)=>{
4 | return ({agentLog.output}
)
5 | })
6 | }
7 |
8 | return (
9 |
10 |
Agent Output
11 |
12 | {renderAgentLogs(agentLogs)}
13 |
14 |
15 | );
16 | }
--------------------------------------------------------------------------------
/frontend/nextjs/components/Task/Report.tsx:
--------------------------------------------------------------------------------
1 | import React, { useEffect, useState } from 'react';
2 | import { markdownToHtml } from '../../helpers/markdownHelper';
3 | import '../../styles/markdown.css';
4 |
5 | export default function Report({report}:any) {
6 | const [htmlContent, setHtmlContent] = useState('');
7 |
8 | useEffect(() => {
9 | const convertMarkdownToHtml = async () => {
10 | try {
11 | const processedHtml = await markdownToHtml(report);
12 | setHtmlContent(processedHtml);
13 | } catch (error) {
14 | console.error('Error converting markdown to HTML:', error);
15 | setHtmlContent('Error rendering content
');
16 | }
17 | };
18 |
19 | if (report) {
20 | convertMarkdownToHtml();
21 | }
22 | }, [report]);
23 |
24 | return (
25 |
26 |
Research Report
27 |
30 |
31 | );
32 | };
--------------------------------------------------------------------------------
/frontend/nextjs/components/TypeAnimation.tsx:
--------------------------------------------------------------------------------
1 | const TypeAnimation = () => {
2 | return (
3 |
4 |
5 |
6 |
7 |
8 | );
9 | };
10 |
11 | export default TypeAnimation;
12 |
--------------------------------------------------------------------------------
/frontend/nextjs/config/task.ts:
--------------------------------------------------------------------------------
1 | export const task = {
2 | "task": {
3 | "query": "Is AI in a hype cycle?",
4 | "include_human_feedback": false,
5 | "model": "gpt-4o",
6 | "max_sections": 3,
7 | "publish_formats": {
8 | "markdown": true,
9 | "pdf": true,
10 | "docx": true
11 | },
12 | "source": "web",
13 | "follow_guidelines": true,
14 | "guidelines": [
15 | "The report MUST fully answer the original question",
16 | "The report MUST be written in apa format",
17 | "The report MUST be written in english"
18 | ],
19 | "verbose": true
20 | },
21 | "initial_research": "Initial research data here",
22 | "sections": ["Section 1", "Section 2"],
23 | "research_data": "Research data here",
24 | "title": "Research Title",
25 | "headers": {
26 | "introduction": "Introduction header",
27 | "table_of_contents": "Table of Contents header",
28 | "conclusion": "Conclusion header",
29 | "sources": "Sources header"
30 | },
31 | "date": "2023-10-01",
32 | "table_of_contents": "- Introduction\n- Section 1\n- Section 2\n- Conclusion",
33 | "introduction": "Introduction content here",
34 | "conclusion": "Conclusion content here",
35 | "sources": ["Source 1", "Source 2"],
36 | "report": "Full report content here"
37 | }
--------------------------------------------------------------------------------
/frontend/nextjs/helpers/findDifferences.ts:
--------------------------------------------------------------------------------
1 | type Value = string | number | boolean | null | undefined | object | Value[]; // Possible value types
2 | type Changes = { [key: string]: { before: Value; after: Value } | Changes }; // Recursive changes type
3 |
4 | function findDifferences>(obj1: T, obj2: T): Changes {
5 | // Helper function to check if a value is an object (excluding arrays)
6 | function isObject(obj: any): obj is Record {
7 | return obj && typeof obj === 'object' && !Array.isArray(obj);
8 | }
9 |
10 | // Recursive function to compare two objects and return the differences
11 | function compareObjects(o1: Record, o2: Record): Changes {
12 | const changes: Changes = {};
13 |
14 | // Iterate over keys in the first object (o1)
15 | for (const key in o1) {
16 | if (isObject(o1[key]) && isObject(o2[key])) {
17 | // Recursively compare nested objects
18 | const nestedChanges = compareObjects(o1[key], o2[key]);
19 | if (Object.keys(nestedChanges).length > 0) {
20 | changes[key] = nestedChanges; // Add nested changes if any
21 | }
22 | } else if (Array.isArray(o1[key]) && Array.isArray(o2[key])) {
23 | // Compare arrays
24 | if (o1[key].length !== o2[key].length || o1[key].some((val, index) => val !== o2[key][index])) {
25 | changes[key] = { before: o1[key], after: o2[key] };
26 | }
27 | } else {
28 | // Compare primitive values (or any non-object, non-array values)
29 | if (o1[key] !== o2[key]) {
30 | changes[key] = { before: o1[key], after: o2[key] };
31 | }
32 | }
33 | }
34 |
35 | // Iterate over keys in the second object (o2) to detect new keys
36 | for (const key in o2) {
37 | if (!(key in o1)) {
38 | changes[key] = { before: undefined, after: o2[key] };
39 | }
40 | }
41 |
42 | return changes; // Return the collected changes
43 | }
44 |
45 | return compareObjects(obj1, obj2); // Compare the two input objects
46 | }
47 |
48 | export default findDifferences;
--------------------------------------------------------------------------------
/frontend/nextjs/helpers/getHost.ts:
--------------------------------------------------------------------------------
1 | interface GetHostParams {
2 | purpose?: string;
3 | }
4 |
5 | export const getHost = ({ purpose }: GetHostParams = {}): string => {
6 | if (typeof window !== 'undefined') {
7 | let { host } = window.location;
8 | const apiUrlInLocalStorage = localStorage.getItem("GPTR_API_URL");
9 |
10 | const urlParams = new URLSearchParams(window.location.search);
11 | const apiUrlInUrlParams = urlParams.get("GPTR_API_URL");
12 |
13 | if (apiUrlInLocalStorage) {
14 | return apiUrlInLocalStorage;
15 | } else if (apiUrlInUrlParams) {
16 | return apiUrlInUrlParams;
17 | } else if (process.env.NEXT_PUBLIC_GPTR_API_URL) {
18 | return process.env.NEXT_PUBLIC_GPTR_API_URL;
19 | } else if (process.env.REACT_APP_GPTR_API_URL) {
20 | return process.env.REACT_APP_GPTR_API_URL;
21 | } else if (purpose === 'langgraph-gui') {
22 | return host.includes('localhost') ? 'http%3A%2F%2F127.0.0.1%3A8123' : `https://${host}`;
23 | } else {
24 | return host.includes('localhost') ? 'http://localhost:8000' : `https://${host}`;
25 | }
26 | }
27 | return '';
28 | };
--------------------------------------------------------------------------------
/frontend/nextjs/hooks/useAnalytics.ts:
--------------------------------------------------------------------------------
1 | import ReactGA from 'react-ga4';
2 |
3 | interface ResearchData {
4 | query: string;
5 | report_type: string;
6 | report_source: string;
7 | }
8 |
9 | interface TrackResearchData {
10 | query: string;
11 | report_type: string;
12 | report_source: string;
13 | }
14 |
15 | export const useAnalytics = () => {
16 | const initGA = () => {
17 | if (typeof window !== 'undefined' && process.env.NEXT_PUBLIC_GA_MEASUREMENT_ID) {
18 | ReactGA.initialize(process.env.NEXT_PUBLIC_GA_MEASUREMENT_ID);
19 | }
20 | };
21 |
22 | const trackResearchQuery = (data: TrackResearchData) => {
23 | ReactGA.event({
24 | category: 'Research',
25 | action: 'Submit Query',
26 | label: JSON.stringify({
27 | query: data.query,
28 | report_type: data.report_type,
29 | report_source: data.report_source
30 | })
31 | });
32 | };
33 |
34 | return {
35 | initGA,
36 | trackResearchQuery
37 | };
38 | };
--------------------------------------------------------------------------------
/frontend/nextjs/hooks/useResearchHistory.ts:
--------------------------------------------------------------------------------
1 | import { useState, useEffect } from 'react';
2 | import { ResearchHistoryItem, Data } from '../types/data';
3 |
4 | export const useResearchHistory = () => {
5 | const [history, setHistory] = useState([]);
6 |
7 | // Load history from localStorage on initial render
8 | useEffect(() => {
9 | const storedHistory = localStorage.getItem('researchHistory');
10 | if (storedHistory) {
11 | try {
12 | setHistory(JSON.parse(storedHistory));
13 | } catch (error) {
14 | console.error('Error parsing research history:', error);
15 | // If there's an error parsing, reset the history
16 | localStorage.removeItem('researchHistory');
17 | }
18 | }
19 | }, []);
20 |
21 | // Save research to history
22 | const saveResearch = (question: string, answer: string, orderedData: Data[]) => {
23 | const newItem: ResearchHistoryItem = {
24 | id: Date.now().toString(),
25 | question,
26 | answer,
27 | timestamp: Date.now(),
28 | orderedData,
29 | };
30 |
31 | const updatedHistory = [newItem, ...history];
32 | setHistory(updatedHistory);
33 | localStorage.setItem('researchHistory', JSON.stringify(updatedHistory));
34 | return newItem.id;
35 | };
36 |
37 | // Get a specific research item by ID
38 | const getResearchById = (id: string) => {
39 | return history.find(item => item.id === id);
40 | };
41 |
42 | // Delete a research item
43 | const deleteResearch = (id: string) => {
44 | const updatedHistory = history.filter(item => item.id !== id);
45 | setHistory(updatedHistory);
46 | localStorage.setItem('researchHistory', JSON.stringify(updatedHistory));
47 | };
48 |
49 | // Clear all history
50 | const clearHistory = () => {
51 | setHistory([]);
52 | localStorage.removeItem('researchHistory');
53 | };
54 |
55 | return {
56 | history,
57 | saveResearch,
58 | getResearchById,
59 | deleteResearch,
60 | clearHistory,
61 | };
62 | };
--------------------------------------------------------------------------------
/frontend/nextjs/next.config.mjs:
--------------------------------------------------------------------------------
1 | /** @type {import('next').NextConfig} */
2 | const nextConfig = {
3 | images: {
4 | remotePatterns: [
5 | {
6 | hostname: 'www.google.com',
7 | },
8 | {
9 | hostname: 'www.google-analytics.com',
10 | }
11 | ],
12 | },
13 | };
14 |
15 | export default nextConfig;
16 |
--------------------------------------------------------------------------------
/frontend/nextjs/nginx/default.conf:
--------------------------------------------------------------------------------
1 | server{
2 | listen 3000;
3 |
4 | location / {
5 | root /usr/share/nginx/html;
6 | index index.html index.htm;
7 | try_files $uri $uri/ /index.html;
8 | }
9 | }
--------------------------------------------------------------------------------
/frontend/nextjs/postcss.config.mjs:
--------------------------------------------------------------------------------
1 | /** @type {import('postcss-load-config').Config} */
2 | const config = {
3 | plugins: {
4 | tailwindcss: {},
5 | },
6 | };
7 |
8 | export default config;
9 |
--------------------------------------------------------------------------------
/frontend/nextjs/public/favicon.ico:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/assafelovic/gpt-researcher/9d085d867fce49c24cf296659eebf5d7595cbed2/frontend/nextjs/public/favicon.ico
--------------------------------------------------------------------------------
/frontend/nextjs/public/img/Info.svg:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
--------------------------------------------------------------------------------
/frontend/nextjs/public/img/agents/academicResearchAgentAvatar.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/assafelovic/gpt-researcher/9d085d867fce49c24cf296659eebf5d7595cbed2/frontend/nextjs/public/img/agents/academicResearchAgentAvatar.png
--------------------------------------------------------------------------------
/frontend/nextjs/public/img/agents/businessAnalystAgentAvatar.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/assafelovic/gpt-researcher/9d085d867fce49c24cf296659eebf5d7595cbed2/frontend/nextjs/public/img/agents/businessAnalystAgentAvatar.png
--------------------------------------------------------------------------------
/frontend/nextjs/public/img/agents/computerSecurityanalystAvatar.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/assafelovic/gpt-researcher/9d085d867fce49c24cf296659eebf5d7595cbed2/frontend/nextjs/public/img/agents/computerSecurityanalystAvatar.png
--------------------------------------------------------------------------------
/frontend/nextjs/public/img/agents/defaultAgentAvatar.JPG:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/assafelovic/gpt-researcher/9d085d867fce49c24cf296659eebf5d7595cbed2/frontend/nextjs/public/img/agents/defaultAgentAvatar.JPG
--------------------------------------------------------------------------------
/frontend/nextjs/public/img/agents/financeAgentAvatar.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/assafelovic/gpt-researcher/9d085d867fce49c24cf296659eebf5d7595cbed2/frontend/nextjs/public/img/agents/financeAgentAvatar.png
--------------------------------------------------------------------------------
/frontend/nextjs/public/img/agents/mathAgentAvatar.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/assafelovic/gpt-researcher/9d085d867fce49c24cf296659eebf5d7595cbed2/frontend/nextjs/public/img/agents/mathAgentAvatar.png
--------------------------------------------------------------------------------
/frontend/nextjs/public/img/agents/travelAgentAvatar.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/assafelovic/gpt-researcher/9d085d867fce49c24cf296659eebf5d7595cbed2/frontend/nextjs/public/img/agents/travelAgentAvatar.png
--------------------------------------------------------------------------------
/frontend/nextjs/public/img/arrow-circle-up-right.svg:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
--------------------------------------------------------------------------------
/frontend/nextjs/public/img/arrow-narrow-right.svg:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
--------------------------------------------------------------------------------
/frontend/nextjs/public/img/chat-check.svg:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
--------------------------------------------------------------------------------
/frontend/nextjs/public/img/chat.svg:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
--------------------------------------------------------------------------------
/frontend/nextjs/public/img/copy-white.svg:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/frontend/nextjs/public/img/copy.svg:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
--------------------------------------------------------------------------------
/frontend/nextjs/public/img/discord.svg:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
--------------------------------------------------------------------------------
/frontend/nextjs/public/img/github-blue.svg:
--------------------------------------------------------------------------------
1 | Github-color Created with Sketch.
--------------------------------------------------------------------------------
/frontend/nextjs/public/img/github-footer.svg:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
--------------------------------------------------------------------------------
/frontend/nextjs/public/img/github.svg:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
github [#142] Created with Sketch.
7 |
--------------------------------------------------------------------------------
/frontend/nextjs/public/img/gptr-logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/assafelovic/gpt-researcher/9d085d867fce49c24cf296659eebf5d7595cbed2/frontend/nextjs/public/img/gptr-logo.png
--------------------------------------------------------------------------------
/frontend/nextjs/public/img/icon _atom_.svg:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
--------------------------------------------------------------------------------
/frontend/nextjs/public/img/icon _dumbell_.svg:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
--------------------------------------------------------------------------------
/frontend/nextjs/public/img/icon _leaf_.svg:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
--------------------------------------------------------------------------------
/frontend/nextjs/public/img/image.svg:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
--------------------------------------------------------------------------------
/frontend/nextjs/public/img/link.svg:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
--------------------------------------------------------------------------------
/frontend/nextjs/public/img/message-question-circle.svg:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/frontend/nextjs/public/img/search.svg:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/frontend/nextjs/public/img/share.svg:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
--------------------------------------------------------------------------------
/frontend/nextjs/public/img/similarTopics.svg:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
--------------------------------------------------------------------------------
/frontend/nextjs/public/img/stock.svg:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
--------------------------------------------------------------------------------
/frontend/nextjs/public/img/stock2.svg:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
6 |
7 |
15 |
21 |
22 |
--------------------------------------------------------------------------------
/frontend/nextjs/public/img/white-books.svg:
--------------------------------------------------------------------------------
1 | books
--------------------------------------------------------------------------------
/frontend/nextjs/public/img/x.svg:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
--------------------------------------------------------------------------------
/frontend/nextjs/public/next.svg:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/frontend/nextjs/public/vercel.svg:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/frontend/nextjs/src/index.css:
--------------------------------------------------------------------------------
1 | /* frontend/nextjs/src/index.css */
2 | @import './styles/markdown.css';
3 | @import './app/globals.css';
4 | @import './components/Settings/Settings.css';
5 |
6 | /* Include your Tailwind directives */
7 | @tailwind base;
8 | @tailwind components;
9 | @tailwind utilities;
--------------------------------------------------------------------------------
/frontend/nextjs/src/index.d.ts:
--------------------------------------------------------------------------------
1 | declare module 'gpt-researcher-ui' {
2 | import React from 'react';
3 |
4 | export interface GPTResearcherProps {
5 | apiUrl?: string;
6 | apiKey?: string;
7 | defaultPrompt?: string;
8 | onResultsChange?: (results: any) => void;
9 | theme?: any;
10 | }
11 |
12 | export const GPTResearcher: React.FC;
13 | }
--------------------------------------------------------------------------------
/frontend/nextjs/src/index.ts:
--------------------------------------------------------------------------------
1 | // src/index.ts
2 | import { GPTResearcher } from './GPTResearcher';
3 |
4 | export { GPTResearcher };
5 | export type { GPTResearcherProps } from './GPTResearcher';
--------------------------------------------------------------------------------
/frontend/nextjs/src/utils/imageTransformPlugin.js:
--------------------------------------------------------------------------------
1 | // imageTransformPlugin.js
2 | export default function imageTransformPlugin() {
3 | return {
4 | name: 'image-transform',
5 | transform(code) {
6 | // Add more patterns to catch different image path formats
7 | return code.replace(
8 | /['"]\/img\/([^'"]+)['"]/g, // Also catch paths starting with /
9 | "'https://app.gptr.dev/img/$1'"
10 | ).replace(
11 | /['"]img\/([^'"]+)['"]/g, // Catch relative paths
12 | "'https://app.gptr.dev/img/$1'"
13 | );
14 | }
15 | };
16 | }
--------------------------------------------------------------------------------
/frontend/nextjs/tailwind.config.ts:
--------------------------------------------------------------------------------
1 | import type { Config } from 'tailwindcss';
2 |
3 | const config: Config = {
4 | content: [
5 | './pages/**/*.{js,ts,jsx,tsx,mdx}',
6 | './components/**/*.{js,ts,jsx,tsx,mdx}',
7 | './app/**/*.{js,ts,jsx,tsx,mdx}',
8 | ],
9 | theme: {
10 | screens: {
11 | sm: '640px',
12 | md: '768px',
13 | lg: '898px',
14 | // xl:"1024px"
15 | },
16 | container: {
17 | center: true,
18 | },
19 | extend: {
20 | animation: {
21 | 'gradient-x': 'gradient-x 10s ease infinite',
22 | 'shimmer': 'shimmer 2s linear infinite',
23 | 'pulse-slow': 'pulse 8s cubic-bezier(0.4, 0, 0.6, 1) infinite',
24 | },
25 | keyframes: {
26 | 'gradient-x': {
27 | '0%, 100%': { backgroundPosition: '0% 50%' },
28 | '50%': { backgroundPosition: '100% 50%' },
29 | },
30 | 'shimmer': {
31 | '100%': { transform: 'translateX(100%)' },
32 | },
33 | },
34 | backgroundImage: {
35 | 'gradient-radial': 'radial-gradient(var(--tw-gradient-stops))',
36 | 'custom-gradient':
37 | 'linear-gradient(150deg, #1B1B16 1.28%, #565646 90.75%)',
38 | 'gradient-conic':
39 | 'conic-gradient(from 180deg at 50% 50%, var(--tw-gradient-stops))',
40 | 'hero-gradient': 'linear-gradient(135deg, #9867F0, #ED4E50)',
41 | 'teal-gradient': 'linear-gradient(135deg, #0d9488, #0891b2, #2563eb)',
42 | },
43 | boxShadow: {
44 | 'glow': '0 0 40px rgba(152, 103, 240, 0.5)',
45 | 'teal-glow': '0 0 40px rgba(13, 148, 136, 0.5)',
46 | },
47 | colors: {
48 | 'primary': {
49 | '50': '#f0fdfa',
50 | '100': '#ccfbf1',
51 | '200': '#99f6e4',
52 | '300': '#5eead4',
53 | '400': '#2dd4bf',
54 | '500': '#14b8a6',
55 | '600': '#0d9488',
56 | '700': '#0f766e',
57 | '800': '#115e59',
58 | '900': '#134e4a',
59 | '950': '#042f2e',
60 | },
61 | },
62 | },
63 | },
64 | plugins: [],
65 | };
66 | export default config;
67 |
--------------------------------------------------------------------------------
/frontend/nextjs/tsconfig.json:
--------------------------------------------------------------------------------
1 | {
2 | "compilerOptions": {
3 | "lib": ["dom", "dom.iterable", "esnext"],
4 | "allowJs": true,
5 | "skipLibCheck": true,
6 | "strict": true,
7 | "noEmit": true,
8 | "esModuleInterop": true,
9 | "module": "esnext",
10 | "moduleResolution": "bundler",
11 | "resolveJsonModule": true,
12 | "isolatedModules": true,
13 | "jsx": "preserve",
14 | "incremental": true,
15 | "plugins": [
16 | {
17 | "name": "next"
18 | }
19 | ],
20 | "paths": {
21 | "@/*": ["./*"]
22 | }
23 | },
24 | "include": ["next-env.d.ts", "**/*.ts", "**/*.tsx", ".next/types/**/*.ts", "components/Task/ImagesCarousel.jsx"],
25 | "exclude": ["node_modules"]
26 | }
--------------------------------------------------------------------------------
/frontend/nextjs/tsconfig.lib.json:
--------------------------------------------------------------------------------
1 | {
2 | "extends": "./tsconfig.json",
3 | "compilerOptions": {
4 | "outDir": "./dist",
5 | "declaration": true,
6 | "declarationDir": "./dist",
7 | "emitDeclarationOnly": false,
8 | "sourceMap": true,
9 | "noEmit": false,
10 | "jsx": "react-jsx",
11 | "moduleResolution": "node",
12 | "allowSyntheticDefaultImports": true,
13 | "esModuleInterop": true
14 | },
15 | "include": [
16 | "src/**/*.ts",
17 | "src/**/*.tsx",
18 | "components/**/*.jsx",
19 | "components/**/*.tsx"
20 | ],
21 | "exclude": ["node_modules", "**/*.test.ts", "**/*.test.tsx"]
22 | }
--------------------------------------------------------------------------------
/frontend/nextjs/types/data.ts:
--------------------------------------------------------------------------------
1 | export interface BaseData {
2 | type: string;
3 | }
4 |
5 | export interface BasicData extends BaseData {
6 | type: 'basic';
7 | content: string;
8 | }
9 |
10 | export interface LanggraphButtonData extends BaseData {
11 | type: 'langgraphButton';
12 | link: string;
13 | }
14 |
15 | export interface DifferencesData extends BaseData {
16 | type: 'differences';
17 | content: string;
18 | output: string;
19 | }
20 |
21 | export interface QuestionData extends BaseData {
22 | type: 'question';
23 | content: string;
24 | }
25 |
26 | export interface ChatData extends BaseData {
27 | type: 'chat';
28 | content: string;
29 | }
30 |
31 | export interface ErrorData extends BaseData {
32 | type: 'error';
33 | content: string;
34 | output: string;
35 | }
36 |
37 | export type Data = BasicData | LanggraphButtonData | DifferencesData | QuestionData | ChatData | ErrorData;
38 |
39 | export interface ChatBoxSettings {
40 | report_type: string;
41 | report_source: string;
42 | tone: string;
43 | domains: string[];
44 | defaultReportType: string;
45 | }
46 |
47 | export interface Domain {
48 | value: string;
49 | }
50 |
51 | export interface ResearchHistoryItem {
52 | id: string;
53 | question: string;
54 | answer: string;
55 | timestamp: number;
56 | orderedData: Data[];
57 | }
--------------------------------------------------------------------------------
/frontend/nextjs/types/react-ga4.d.ts:
--------------------------------------------------------------------------------
1 | declare module 'react-ga4' {
2 | export interface InitOptions {
3 | gaOptions?: any;
4 | gtagOptions?: any;
5 | testMode?: boolean;
6 | }
7 |
8 | export function initialize(
9 | measurementId: string | string[],
10 | options?: InitOptions
11 | ): void;
12 |
13 | export function event(options: {
14 | category: string;
15 | action: string;
16 | label?: string;
17 | value?: number;
18 | nonInteraction?: boolean;
19 | transport?: 'beacon' | 'xhr' | 'image';
20 | [key: string]: any;
21 | }): void;
22 |
23 | // Add other methods as needed
24 | export default {
25 | initialize,
26 | event
27 | };
28 | }
--------------------------------------------------------------------------------
/frontend/nextjs/utils/consolidateBlocks.ts:
--------------------------------------------------------------------------------
1 | export const consolidateSourceAndImageBlocks = (groupedData: any[]) => {
2 | // Consolidate sourceBlocks
3 | const consolidatedSourceBlock = {
4 | type: 'sourceBlock',
5 | items: groupedData
6 | .filter(item => item.type === 'sourceBlock')
7 | .flatMap(block => block.items || [])
8 | .filter((item, index, self) =>
9 | index === self.findIndex(t => t.url === item.url)
10 | )
11 | };
12 |
13 | // Consolidate imageBlocks
14 | const consolidatedImageBlock = {
15 | type: 'imagesBlock',
16 | metadata: groupedData
17 | .filter(item => item.type === 'imagesBlock')
18 | .flatMap(block => block.metadata || [])
19 | };
20 |
21 | // Remove all existing sourceBlocks and imageBlocks
22 | groupedData = groupedData.filter(item =>
23 | item.type !== 'sourceBlock' && item.type !== 'imagesBlock'
24 | );
25 |
26 | // Add consolidated blocks if they have items
27 | if (consolidatedSourceBlock.items.length > 0) {
28 | groupedData.push(consolidatedSourceBlock);
29 | }
30 | if (consolidatedImageBlock.metadata.length > 0) {
31 | groupedData.push(consolidatedImageBlock);
32 | }
33 |
34 | return groupedData;
35 | };
--------------------------------------------------------------------------------
/frontend/pdf_styles.css:
--------------------------------------------------------------------------------
1 | body {
2 | font-family: 'Libre Baskerville', serif;
3 | font-size: 12pt; /* standard size for academic papers */
4 | line-height: 1.6; /* for readability */
5 | color: #333; /* softer on the eyes than black */
6 | background-color: #fff; /* white background */
7 | margin: 0;
8 | padding: 0;
9 | }
10 |
11 | h1, h2, h3, h4, h5, h6 {
12 | font-family: 'Libre Baskerville', serif;
13 | color: #000; /* darker than the body text */
14 | margin-top: 1em; /* space above headers */
15 | }
16 |
17 | h1 {
18 | font-size: 2em; /* make h1 twice the size of the body text */
19 | }
20 |
21 | h2 {
22 | font-size: 1.5em;
23 | }
24 |
25 | /* Add some space between paragraphs */
26 | p {
27 | margin-bottom: 1em;
28 | }
29 |
30 | /* Style for blockquotes, often used in academic papers */
31 | blockquote {
32 | font-style: italic;
33 | margin: 1em 0;
34 | padding: 1em;
35 | background-color: #f9f9f9; /* a light grey background */
36 | }
37 |
38 | /* You might want to style tables, figures, etc. too */
39 | table {
40 | border-collapse: collapse;
41 | width: 100%;
42 | }
43 |
44 | table, th, td {
45 | border: 1px solid #ddd;
46 | text-align: left;
47 | padding: 8px;
48 | }
49 |
50 | th {
51 | background-color: #f2f2f2;
52 | color: black;
53 | }
--------------------------------------------------------------------------------
/frontend/static/academicResearchAgentAvatar.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/assafelovic/gpt-researcher/9d085d867fce49c24cf296659eebf5d7595cbed2/frontend/static/academicResearchAgentAvatar.png
--------------------------------------------------------------------------------
/frontend/static/businessAnalystAgentAvatar.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/assafelovic/gpt-researcher/9d085d867fce49c24cf296659eebf5d7595cbed2/frontend/static/businessAnalystAgentAvatar.png
--------------------------------------------------------------------------------
/frontend/static/computerSecurityanalystAvatar.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/assafelovic/gpt-researcher/9d085d867fce49c24cf296659eebf5d7595cbed2/frontend/static/computerSecurityanalystAvatar.png
--------------------------------------------------------------------------------
/frontend/static/defaultAgentAvatar.JPG:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/assafelovic/gpt-researcher/9d085d867fce49c24cf296659eebf5d7595cbed2/frontend/static/defaultAgentAvatar.JPG
--------------------------------------------------------------------------------
/frontend/static/favicon.ico:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/assafelovic/gpt-researcher/9d085d867fce49c24cf296659eebf5d7595cbed2/frontend/static/favicon.ico
--------------------------------------------------------------------------------
/frontend/static/financeAgentAvatar.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/assafelovic/gpt-researcher/9d085d867fce49c24cf296659eebf5d7595cbed2/frontend/static/financeAgentAvatar.png
--------------------------------------------------------------------------------
/frontend/static/gptr-logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/assafelovic/gpt-researcher/9d085d867fce49c24cf296659eebf5d7595cbed2/frontend/static/gptr-logo.png
--------------------------------------------------------------------------------
/frontend/static/mathAgentAvatar.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/assafelovic/gpt-researcher/9d085d867fce49c24cf296659eebf5d7595cbed2/frontend/static/mathAgentAvatar.png
--------------------------------------------------------------------------------
/frontend/static/travelAgentAvatar.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/assafelovic/gpt-researcher/9d085d867fce49c24cf296659eebf5d7595cbed2/frontend/static/travelAgentAvatar.png
--------------------------------------------------------------------------------
/gpt_researcher/__init__.py:
--------------------------------------------------------------------------------
1 | from .agent import GPTResearcher
2 |
3 | __all__ = ['GPTResearcher']
--------------------------------------------------------------------------------
/gpt_researcher/actions/__init__.py:
--------------------------------------------------------------------------------
1 | from .retriever import get_retriever, get_retrievers
2 | from .query_processing import plan_research_outline, get_search_results
3 | from .agent_creator import extract_json_with_regex, choose_agent
4 | from .web_scraping import scrape_urls
5 | from .report_generation import write_conclusion, summarize_url, generate_draft_section_titles, generate_report, write_report_introduction
6 | from .markdown_processing import extract_headers, extract_sections, table_of_contents, add_references
7 | from .utils import stream_output
8 |
9 | __all__ = [
10 | "get_retriever",
11 | "get_retrievers",
12 | "get_search_results",
13 | "plan_research_outline",
14 | "extract_json_with_regex",
15 | "scrape_urls",
16 | "write_conclusion",
17 | "summarize_url",
18 | "generate_draft_section_titles",
19 | "generate_report",
20 | "write_report_introduction",
21 | "extract_headers",
22 | "extract_sections",
23 | "table_of_contents",
24 | "add_references",
25 | "stream_output",
26 | "choose_agent"
27 | ]
--------------------------------------------------------------------------------
/gpt_researcher/config/__init__.py:
--------------------------------------------------------------------------------
1 | from .config import Config
2 | from .variables.base import BaseConfig
3 | from .variables.default import DEFAULT_CONFIG as DefaultConfig
4 |
5 | __all__ = ["Config", "BaseConfig", "DefaultConfig"]
6 |
--------------------------------------------------------------------------------
/gpt_researcher/config/variables/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/assafelovic/gpt-researcher/9d085d867fce49c24cf296659eebf5d7595cbed2/gpt_researcher/config/variables/__init__.py
--------------------------------------------------------------------------------
/gpt_researcher/config/variables/base.py:
--------------------------------------------------------------------------------
1 | from typing import Union
2 | from typing_extensions import TypedDict
3 |
4 |
5 | class BaseConfig(TypedDict):
6 | RETRIEVER: str
7 | EMBEDDING: str
8 | SIMILARITY_THRESHOLD: float
9 | FAST_LLM: str
10 | SMART_LLM: str
11 | STRATEGIC_LLM: str
12 | FAST_TOKEN_LIMIT: int
13 | SMART_TOKEN_LIMIT: int
14 | STRATEGIC_TOKEN_LIMIT: int
15 | BROWSE_CHUNK_MAX_LENGTH: int
16 | SUMMARY_TOKEN_LIMIT: int
17 | TEMPERATURE: float
18 | USER_AGENT: str
19 | MAX_SEARCH_RESULTS_PER_QUERY: int
20 | MEMORY_BACKEND: str
21 | TOTAL_WORDS: int
22 | REPORT_FORMAT: str
23 | CURATE_SOURCES: bool
24 | MAX_ITERATIONS: int
25 | LANGUAGE: str
26 | AGENT_ROLE: Union[str, None]
27 | SCRAPER: str
28 | MAX_SCRAPER_WORKERS: int
29 | MAX_SUBTOPICS: int
30 | REPORT_SOURCE: Union[str, None]
31 | DOC_PATH: str
32 | PROMPT_FAMILY: str
33 | LLM_KWARGS: dict
34 | EMBEDDING_KWARGS: dict
35 | DEEP_RESEARCH_CONCURRENCY: int
36 | DEEP_RESEARCH_DEPTH: int
37 | DEEP_RESEARCH_BREADTH: int
38 | REASONING_EFFORT: str
39 |
--------------------------------------------------------------------------------
/gpt_researcher/config/variables/default.py:
--------------------------------------------------------------------------------
1 | from .base import BaseConfig
2 |
3 | DEFAULT_CONFIG: BaseConfig = {
4 | "RETRIEVER": "tavily",
5 | "EMBEDDING": "openai:text-embedding-3-small",
6 | "SIMILARITY_THRESHOLD": 0.42,
7 | "FAST_LLM": "openai:gpt-4o-mini",
8 | "SMART_LLM": "openai:gpt-4.1", # Has support for long responses (2k+ words).
9 | "STRATEGIC_LLM": "openai:o4-mini", # Can be used with o1 or o3, please note it will make tasks slower.
10 | "FAST_TOKEN_LIMIT": 3000,
11 | "SMART_TOKEN_LIMIT": 6000,
12 | "STRATEGIC_TOKEN_LIMIT": 4000,
13 | "BROWSE_CHUNK_MAX_LENGTH": 8192,
14 | "CURATE_SOURCES": False,
15 | "SUMMARY_TOKEN_LIMIT": 700,
16 | "TEMPERATURE": 0.4,
17 | "USER_AGENT": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/119.0.0.0 Safari/537.36 Edg/119.0.0.0",
18 | "MAX_SEARCH_RESULTS_PER_QUERY": 5,
19 | "MEMORY_BACKEND": "local",
20 | "TOTAL_WORDS": 1200,
21 | "REPORT_FORMAT": "APA",
22 | "MAX_ITERATIONS": 3,
23 | "AGENT_ROLE": None,
24 | "SCRAPER": "bs",
25 | "MAX_SCRAPER_WORKERS": 15,
26 | "MAX_SUBTOPICS": 3,
27 | "LANGUAGE": "english",
28 | "REPORT_SOURCE": "web",
29 | "DOC_PATH": "./my-docs",
30 | "PROMPT_FAMILY": "default",
31 | "LLM_KWARGS": {},
32 | "EMBEDDING_KWARGS": {},
33 | # Deep research specific settings
34 | "DEEP_RESEARCH_BREADTH": 3,
35 | "DEEP_RESEARCH_DEPTH": 2,
36 | "DEEP_RESEARCH_CONCURRENCY": 4,
37 | "REASONING_EFFORT": "medium",
38 | }
39 |
--------------------------------------------------------------------------------
/gpt_researcher/config/variables/test_local.json:
--------------------------------------------------------------------------------
1 | {
2 | "DOC_PATH": "tests/docs"
3 | }
4 |
--------------------------------------------------------------------------------
/gpt_researcher/context/__init__.py:
--------------------------------------------------------------------------------
1 | from .compression import ContextCompressor
2 | from .retriever import SearchAPIRetriever
3 |
4 | __all__ = ['ContextCompressor', 'SearchAPIRetriever']
5 |
--------------------------------------------------------------------------------
/gpt_researcher/context/retriever.py:
--------------------------------------------------------------------------------
1 | import os
2 | from enum import Enum
3 | from typing import Any, Dict, List, Optional
4 |
5 | from langchain.callbacks.manager import CallbackManagerForRetrieverRun
6 | from langchain.schema import Document
7 | from langchain.schema.retriever import BaseRetriever
8 |
9 |
10 | class SearchAPIRetriever(BaseRetriever):
11 | """Search API retriever."""
12 | pages: List[Dict] = []
13 |
14 | def _get_relevant_documents(
15 | self, query: str, *, run_manager: CallbackManagerForRetrieverRun
16 | ) -> List[Document]:
17 |
18 | docs = [
19 | Document(
20 | page_content=page.get("raw_content", ""),
21 | metadata={
22 | "title": page.get("title", ""),
23 | "source": page.get("url", ""),
24 | },
25 | )
26 | for page in self.pages
27 | ]
28 |
29 | return docs
30 |
31 | class SectionRetriever(BaseRetriever):
32 | """
33 | SectionRetriever:
34 | This class is used to retrieve sections while avoiding redundant subtopics.
35 | """
36 | sections: List[Dict] = []
37 | """
38 | sections example:
39 | [
40 | {
41 | "section_title": "Example Title",
42 | "written_content": "Example content"
43 | },
44 | ...
45 | ]
46 | """
47 |
48 | def _get_relevant_documents(
49 | self, query: str, *, run_manager: CallbackManagerForRetrieverRun
50 | ) -> List[Document]:
51 |
52 | docs = [
53 | Document(
54 | page_content=page.get("written_content", ""),
55 | metadata={
56 | "section_title": page.get("section_title", ""),
57 | },
58 | )
59 | for page in self.sections # Changed 'self.pages' to 'self.sections'
60 | ]
61 |
62 | return docs
--------------------------------------------------------------------------------
/gpt_researcher/document/__init__.py:
--------------------------------------------------------------------------------
1 | from .document import DocumentLoader
2 | from .online_document import OnlineDocumentLoader
3 | from .langchain_document import LangChainDocumentLoader
4 |
5 | __all__ = ['DocumentLoader', 'OnlineDocumentLoader', 'LangChainDocumentLoader']
6 |
--------------------------------------------------------------------------------
/gpt_researcher/document/azure_document_loader.py:
--------------------------------------------------------------------------------
1 | from azure.storage.blob import BlobServiceClient
2 | import os
3 | import tempfile
4 |
5 | class AzureDocumentLoader:
6 | def __init__(self, container_name, connection_string):
7 | self.client = BlobServiceClient.from_connection_string(connection_string)
8 | self.container = self.client.get_container_client(container_name)
9 |
10 | async def load(self):
11 | """Download all blobs to temp files and return their paths."""
12 | temp_dir = tempfile.mkdtemp()
13 | blobs = self.container.list_blobs()
14 | file_paths = []
15 | for blob in blobs:
16 | blob_client = self.container.get_blob_client(blob.name)
17 | local_path = os.path.join(temp_dir, blob.name)
18 | with open(local_path, "wb") as f:
19 | blob_data = blob_client.download_blob()
20 | f.write(blob_data.readall())
21 | file_paths.append(local_path)
22 | return file_paths # Pass to existing DocumentLoader
--------------------------------------------------------------------------------
/gpt_researcher/document/langchain_document.py:
--------------------------------------------------------------------------------
1 | import asyncio
2 | import os
3 |
4 | from langchain_core.documents import Document
5 | from typing import List, Dict
6 |
7 |
8 | # Supports the base Document class from langchain
9 | # - https://github.com/langchain-ai/langchain/blob/master/libs/core/langchain_core/documents/base.py
10 | class LangChainDocumentLoader:
11 |
12 | def __init__(self, documents: List[Document]):
13 | self.documents = documents
14 |
15 | async def load(self, metadata_source_index="title") -> List[Dict[str, str]]:
16 | docs = []
17 | for document in self.documents:
18 | docs.append(
19 | {
20 | "raw_content": document.page_content,
21 | "url": document.metadata.get(metadata_source_index, ""),
22 | }
23 | )
24 | return docs
25 |
--------------------------------------------------------------------------------
/gpt_researcher/llm_provider/__init__.py:
--------------------------------------------------------------------------------
1 | from .generic import GenericLLMProvider
2 |
3 | __all__ = [
4 | "GenericLLMProvider",
5 | ]
6 |
--------------------------------------------------------------------------------
/gpt_researcher/llm_provider/generic/__init__.py:
--------------------------------------------------------------------------------
1 | from .base import GenericLLMProvider
2 |
3 | __all__ = ["GenericLLMProvider"]
--------------------------------------------------------------------------------
/gpt_researcher/memory/__init__.py:
--------------------------------------------------------------------------------
1 | from .embeddings import Memory
2 |
--------------------------------------------------------------------------------
/gpt_researcher/retrievers/__init__.py:
--------------------------------------------------------------------------------
1 | from .arxiv.arxiv import ArxivSearch
2 | from .bing.bing import BingSearch
3 | from .custom.custom import CustomRetriever
4 | from .duckduckgo.duckduckgo import Duckduckgo
5 | from .google.google import GoogleSearch
6 | from .pubmed_central.pubmed_central import PubMedCentralSearch
7 | from .searx.searx import SearxSearch
8 | from .semantic_scholar.semantic_scholar import SemanticScholarSearch
9 | from .searchapi.searchapi import SearchApiSearch
10 | from .serpapi.serpapi import SerpApiSearch
11 | from .serper.serper import SerperSearch
12 | from .tavily.tavily_search import TavilySearch
13 | from .exa.exa import ExaSearch
14 |
15 | __all__ = [
16 | "TavilySearch",
17 | "CustomRetriever",
18 | "Duckduckgo",
19 | "SearchApiSearch",
20 | "SerperSearch",
21 | "SerpApiSearch",
22 | "GoogleSearch",
23 | "SearxSearch",
24 | "BingSearch",
25 | "ArxivSearch",
26 | "SemanticScholarSearch",
27 | "PubMedCentralSearch",
28 | "ExaSearch"
29 | ]
30 |
--------------------------------------------------------------------------------
/gpt_researcher/retrievers/arxiv/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/assafelovic/gpt-researcher/9d085d867fce49c24cf296659eebf5d7595cbed2/gpt_researcher/retrievers/arxiv/__init__.py
--------------------------------------------------------------------------------
/gpt_researcher/retrievers/arxiv/arxiv.py:
--------------------------------------------------------------------------------
1 | import arxiv
2 |
3 |
4 | class ArxivSearch:
5 | """
6 | Arxiv API Retriever
7 | """
8 | def __init__(self, query, sort='Relevance', query_domains=None):
9 | self.arxiv = arxiv
10 | self.query = query
11 | assert sort in ['Relevance', 'SubmittedDate'], "Invalid sort criterion"
12 | self.sort = arxiv.SortCriterion.SubmittedDate if sort == 'SubmittedDate' else arxiv.SortCriterion.Relevance
13 |
14 |
15 | def search(self, max_results=5):
16 | """
17 | Performs the search
18 | :param query:
19 | :param max_results:
20 | :return:
21 | """
22 |
23 | arxiv_gen = list(arxiv.Client().results(
24 | self.arxiv.Search(
25 | query= self.query, #+
26 | max_results=max_results,
27 | sort_by=self.sort,
28 | )))
29 |
30 | search_result = []
31 |
32 | for result in arxiv_gen:
33 |
34 | search_result.append({
35 | "title": result.title,
36 | "href": result.pdf_url,
37 | "body": result.summary,
38 | })
39 |
40 | return search_result
--------------------------------------------------------------------------------
/gpt_researcher/retrievers/bing/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/assafelovic/gpt-researcher/9d085d867fce49c24cf296659eebf5d7595cbed2/gpt_researcher/retrievers/bing/__init__.py
--------------------------------------------------------------------------------
/gpt_researcher/retrievers/custom/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/assafelovic/gpt-researcher/9d085d867fce49c24cf296659eebf5d7595cbed2/gpt_researcher/retrievers/custom/__init__.py
--------------------------------------------------------------------------------
/gpt_researcher/retrievers/custom/custom.py:
--------------------------------------------------------------------------------
1 | from typing import Any, Dict, List, Optional
2 | import requests
3 | import os
4 |
5 |
6 | class CustomRetriever:
7 | """
8 | Custom API Retriever
9 | """
10 |
11 | def __init__(self, query: str, query_domains=None):
12 | self.endpoint = os.getenv('RETRIEVER_ENDPOINT')
13 | if not self.endpoint:
14 | raise ValueError("RETRIEVER_ENDPOINT environment variable not set")
15 |
16 | self.params = self._populate_params()
17 | self.query = query
18 |
19 | def _populate_params(self) -> Dict[str, Any]:
20 | """
21 | Populates parameters from environment variables prefixed with 'RETRIEVER_ARG_'
22 | """
23 | return {
24 | key[len('RETRIEVER_ARG_'):].lower(): value
25 | for key, value in os.environ.items()
26 | if key.startswith('RETRIEVER_ARG_')
27 | }
28 |
29 | def search(self, max_results: int = 5) -> Optional[List[Dict[str, Any]]]:
30 | """
31 | Performs the search using the custom retriever endpoint.
32 |
33 | :param max_results: Maximum number of results to return (not currently used)
34 | :return: JSON response in the format:
35 | [
36 | {
37 | "url": "http://example.com/page1",
38 | "raw_content": "Content of page 1"
39 | },
40 | {
41 | "url": "http://example.com/page2",
42 | "raw_content": "Content of page 2"
43 | }
44 | ]
45 | """
46 | try:
47 | response = requests.get(self.endpoint, params={**self.params, 'query': self.query})
48 | response.raise_for_status()
49 | return response.json()
50 | except requests.RequestException as e:
51 | print(f"Failed to retrieve search results: {e}")
52 | return None
--------------------------------------------------------------------------------
/gpt_researcher/retrievers/duckduckgo/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/assafelovic/gpt-researcher/9d085d867fce49c24cf296659eebf5d7595cbed2/gpt_researcher/retrievers/duckduckgo/__init__.py
--------------------------------------------------------------------------------
/gpt_researcher/retrievers/duckduckgo/duckduckgo.py:
--------------------------------------------------------------------------------
1 | from itertools import islice
2 | from ..utils import check_pkg
3 |
4 |
5 | class Duckduckgo:
6 | """
7 | Duckduckgo API Retriever
8 | """
9 | def __init__(self, query, query_domains=None):
10 | check_pkg('duckduckgo_search')
11 | from duckduckgo_search import DDGS
12 | self.ddg = DDGS()
13 | self.query = query
14 | self.query_domains = query_domains or None
15 |
16 | def search(self, max_results=5):
17 | """
18 | Performs the search
19 | :param query:
20 | :param max_results:
21 | :return:
22 | """
23 | # TODO: Add support for query domains
24 | try:
25 | search_response = self.ddg.text(self.query, region='wt-wt', max_results=max_results)
26 | except Exception as e:
27 | print(f"Error: {e}. Failed fetching sources. Resulting in empty response.")
28 | search_response = []
29 | return search_response
--------------------------------------------------------------------------------
/gpt_researcher/retrievers/exa/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/assafelovic/gpt-researcher/9d085d867fce49c24cf296659eebf5d7595cbed2/gpt_researcher/retrievers/exa/__init__.py
--------------------------------------------------------------------------------
/gpt_researcher/retrievers/google/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/assafelovic/gpt-researcher/9d085d867fce49c24cf296659eebf5d7595cbed2/gpt_researcher/retrievers/google/__init__.py
--------------------------------------------------------------------------------
/gpt_researcher/retrievers/pubmed_central/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/assafelovic/gpt-researcher/9d085d867fce49c24cf296659eebf5d7595cbed2/gpt_researcher/retrievers/pubmed_central/__init__.py
--------------------------------------------------------------------------------
/gpt_researcher/retrievers/searchapi/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/assafelovic/gpt-researcher/9d085d867fce49c24cf296659eebf5d7595cbed2/gpt_researcher/retrievers/searchapi/__init__.py
--------------------------------------------------------------------------------
/gpt_researcher/retrievers/searx/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/assafelovic/gpt-researcher/9d085d867fce49c24cf296659eebf5d7595cbed2/gpt_researcher/retrievers/searx/__init__.py
--------------------------------------------------------------------------------
/gpt_researcher/retrievers/semantic_scholar/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/assafelovic/gpt-researcher/9d085d867fce49c24cf296659eebf5d7595cbed2/gpt_researcher/retrievers/semantic_scholar/__init__.py
--------------------------------------------------------------------------------
/gpt_researcher/retrievers/semantic_scholar/semantic_scholar.py:
--------------------------------------------------------------------------------
1 | from typing import Dict, List
2 |
3 | import requests
4 |
5 |
6 | class SemanticScholarSearch:
7 | """
8 | Semantic Scholar API Retriever
9 | """
10 |
11 | BASE_URL = "https://api.semanticscholar.org/graph/v1/paper/search"
12 | VALID_SORT_CRITERIA = ["relevance", "citationCount", "publicationDate"]
13 |
14 | def __init__(self, query: str, sort: str = "relevance", query_domains=None):
15 | """
16 | Initialize the SemanticScholarSearch class with a query and sort criterion.
17 |
18 | :param query: Search query string
19 | :param sort: Sort criterion ('relevance', 'citationCount', 'publicationDate')
20 | """
21 | self.query = query
22 | assert sort in self.VALID_SORT_CRITERIA, "Invalid sort criterion"
23 | self.sort = sort.lower()
24 |
25 | def search(self, max_results: int = 20) -> List[Dict[str, str]]:
26 | """
27 | Perform the search on Semantic Scholar and return results.
28 |
29 | :param max_results: Maximum number of results to retrieve
30 | :return: List of dictionaries containing title, href, and body of each paper
31 | """
32 | params = {
33 | "query": self.query,
34 | "limit": max_results,
35 | "fields": "title,abstract,url,venue,year,authors,isOpenAccess,openAccessPdf",
36 | "sort": self.sort,
37 | }
38 |
39 | try:
40 | response = requests.get(self.BASE_URL, params=params)
41 | response.raise_for_status()
42 | except requests.RequestException as e:
43 | print(f"An error occurred while accessing Semantic Scholar API: {e}")
44 | return []
45 |
46 | results = response.json().get("data", [])
47 | search_result = []
48 |
49 | for result in results:
50 | if result.get("isOpenAccess") and result.get("openAccessPdf"):
51 | search_result.append(
52 | {
53 | "title": result.get("title", "No Title"),
54 | "href": result["openAccessPdf"].get("url", "No URL"),
55 | "body": result.get("abstract", "Abstract not available"),
56 | }
57 | )
58 |
59 | return search_result
60 |
--------------------------------------------------------------------------------
/gpt_researcher/retrievers/serpapi/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/assafelovic/gpt-researcher/9d085d867fce49c24cf296659eebf5d7595cbed2/gpt_researcher/retrievers/serpapi/__init__.py
--------------------------------------------------------------------------------
/gpt_researcher/retrievers/serper/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/assafelovic/gpt-researcher/9d085d867fce49c24cf296659eebf5d7595cbed2/gpt_researcher/retrievers/serper/__init__.py
--------------------------------------------------------------------------------
/gpt_researcher/retrievers/tavily/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/assafelovic/gpt-researcher/9d085d867fce49c24cf296659eebf5d7595cbed2/gpt_researcher/retrievers/tavily/__init__.py
--------------------------------------------------------------------------------
/gpt_researcher/retrievers/utils.py:
--------------------------------------------------------------------------------
1 | import importlib.util
2 | import os
3 |
4 | VALID_RETRIEVERS = [
5 | "arxiv",
6 | "bing",
7 | "custom",
8 | "duckduckgo",
9 | "exa",
10 | "google",
11 | "searchapi",
12 | "searx",
13 | "semantic_scholar",
14 | "serpapi",
15 | "serper",
16 | "tavily",
17 | "pubmed_central",
18 | ]
19 |
20 |
21 | def check_pkg(pkg: str) -> None:
22 | if not importlib.util.find_spec(pkg):
23 | pkg_kebab = pkg.replace("_", "-")
24 | raise ImportError(
25 | f"Unable to import {pkg_kebab}. Please install with "
26 | f"`pip install -U {pkg_kebab}`"
27 | )
28 |
29 | # Get a list of all retriever names to be used as validators for supported retrievers
30 | def get_all_retriever_names() -> list:
31 | try:
32 | current_dir = os.path.dirname(__file__)
33 |
34 | all_items = os.listdir(current_dir)
35 |
36 | # Filter out only the directories, excluding __pycache__
37 | retrievers = [item for item in all_items if os.path.isdir(os.path.join(current_dir, item))]
38 | except Exception as e:
39 | print(f"Error in get_all_retriever_names: {e}")
40 | retrievers = VALID_RETRIEVERS
41 |
42 | return retrievers
43 |
--------------------------------------------------------------------------------
/gpt_researcher/scraper/__init__.py:
--------------------------------------------------------------------------------
1 | from .beautiful_soup.beautiful_soup import BeautifulSoupScraper
2 | from .web_base_loader.web_base_loader import WebBaseLoaderScraper
3 | from .arxiv.arxiv import ArxivScraper
4 | from .pymupdf.pymupdf import PyMuPDFScraper
5 | from .browser.browser import BrowserScraper
6 | from .browser.nodriver_scraper import NoDriverScraper
7 | from .tavily_extract.tavily_extract import TavilyExtract
8 | from .firecrawl.firecrawl import FireCrawl
9 | from .scraper import Scraper
10 |
11 | __all__ = [
12 | "BeautifulSoupScraper",
13 | "WebBaseLoaderScraper",
14 | "ArxivScraper",
15 | "PyMuPDFScraper",
16 | "BrowserScraper",
17 | "NoDriverScraper",
18 | "TavilyExtract",
19 | "Scraper",
20 | "FireCrawl",
21 | ]
22 |
--------------------------------------------------------------------------------
/gpt_researcher/scraper/arxiv/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/assafelovic/gpt-researcher/9d085d867fce49c24cf296659eebf5d7595cbed2/gpt_researcher/scraper/arxiv/__init__.py
--------------------------------------------------------------------------------
/gpt_researcher/scraper/arxiv/arxiv.py:
--------------------------------------------------------------------------------
1 | from langchain_community.retrievers import ArxivRetriever
2 |
3 |
4 | class ArxivScraper:
5 |
6 | def __init__(self, link, session=None):
7 | self.link = link
8 | self.session = session
9 |
10 | def scrape(self):
11 | """
12 | The function scrapes relevant documents from Arxiv based on a given link and returns the content
13 | of the first document.
14 |
15 | Returns:
16 | The code is returning the page content of the first document retrieved by the ArxivRetriever
17 | for a given query extracted from the link.
18 | """
19 | query = self.link.split("/")[-1]
20 | retriever = ArxivRetriever(load_max_docs=2, doc_content_chars_max=None)
21 | docs = retriever.invoke(query)
22 |
23 | # Include the published date and author to provide additional context,
24 | # aligning with APA-style formatting in the report.
25 | context = f"Published: {docs[0].metadata['Published']}; Author: {docs[0].metadata['Authors']}; Content: {docs[0].page_content}"
26 | image = []
27 |
28 | return context, image, docs[0].metadata["Title"]
29 |
--------------------------------------------------------------------------------
/gpt_researcher/scraper/beautiful_soup/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/assafelovic/gpt-researcher/9d085d867fce49c24cf296659eebf5d7595cbed2/gpt_researcher/scraper/beautiful_soup/__init__.py
--------------------------------------------------------------------------------
/gpt_researcher/scraper/beautiful_soup/beautiful_soup.py:
--------------------------------------------------------------------------------
1 | from bs4 import BeautifulSoup
2 | from urllib.parse import urljoin
3 |
4 | from ..utils import get_relevant_images, extract_title, get_text_from_soup, clean_soup
5 |
6 | class BeautifulSoupScraper:
7 |
8 | def __init__(self, link, session=None):
9 | self.link = link
10 | self.session = session
11 |
12 | def scrape(self):
13 | """
14 | This function scrapes content from a webpage by making a GET request, parsing the HTML using
15 | BeautifulSoup, and extracting script and style elements before returning the cleaned content.
16 |
17 | Returns:
18 | The `scrape` method is returning the cleaned and extracted content from the webpage specified
19 | by the `self.link` attribute. The method fetches the webpage content, removes script and style
20 | tags, extracts the text content, and returns the cleaned content as a string. If any exception
21 | occurs during the process, an error message is printed and an empty string is returned.
22 | """
23 | try:
24 | response = self.session.get(self.link, timeout=4)
25 | soup = BeautifulSoup(
26 | response.content, "lxml", from_encoding=response.encoding
27 | )
28 |
29 | soup = clean_soup(soup)
30 |
31 | content = get_text_from_soup(soup)
32 |
33 | image_urls = get_relevant_images(soup, self.link)
34 |
35 | # Extract the title using the utility function
36 | title = extract_title(soup)
37 |
38 | return content, image_urls, title
39 |
40 | except Exception as e:
41 | print("Error! : " + str(e))
42 | return "", [], ""
--------------------------------------------------------------------------------
/gpt_researcher/scraper/browser/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/assafelovic/gpt-researcher/9d085d867fce49c24cf296659eebf5d7595cbed2/gpt_researcher/scraper/browser/__init__.py
--------------------------------------------------------------------------------
/gpt_researcher/scraper/browser/js/overlay.js:
--------------------------------------------------------------------------------
1 | const overlay = document.createElement('div');
2 | Object.assign(overlay.style, {
3 | position: 'fixed',
4 | zIndex: 999999,
5 | top: 0,
6 | left: 0,
7 | width: '100%',
8 | height: '100%',
9 | background: 'rgba(0, 0, 0, 0.7)',
10 | color: '#fff',
11 | fontSize: '24px',
12 | fontWeight: 'bold',
13 | display: 'flex',
14 | justifyContent: 'center',
15 | alignItems: 'center',
16 | });
17 | const textContent = document.createElement('div');
18 | Object.assign(textContent.style, {
19 | textAlign: 'center',
20 | });
21 | textContent.textContent = 'GPT Researcher: Analyzing Page';
22 | overlay.appendChild(textContent);
23 | document.body.append(overlay);
24 | document.body.style.overflow = 'hidden';
25 | let dotCount = 0;
26 | setInterval(() => {
27 | textContent.textContent = 'GPT Researcher: Analyzing Page' + '.'.repeat(dotCount);
28 | dotCount = (dotCount + 1) % 4;
29 | }, 1000);
30 |
--------------------------------------------------------------------------------
/gpt_researcher/scraper/browser/processing/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/assafelovic/gpt-researcher/9d085d867fce49c24cf296659eebf5d7595cbed2/gpt_researcher/scraper/browser/processing/__init__.py
--------------------------------------------------------------------------------
/gpt_researcher/scraper/browser/processing/html.py:
--------------------------------------------------------------------------------
1 | """HTML processing functions"""
2 | from __future__ import annotations
3 |
4 | from bs4 import BeautifulSoup
5 | from requests.compat import urljoin
6 |
7 |
8 | def extract_hyperlinks(soup: BeautifulSoup, base_url: str) -> list[tuple[str, str]]:
9 | """Extract hyperlinks from a BeautifulSoup object
10 |
11 | Args:
12 | soup (BeautifulSoup): The BeautifulSoup object
13 | base_url (str): The base URL
14 |
15 | Returns:
16 | List[Tuple[str, str]]: The extracted hyperlinks
17 | """
18 | return [
19 | (link.text, urljoin(base_url, link["href"]))
20 | for link in soup.find_all("a", href=True)
21 | ]
22 |
23 |
24 | def format_hyperlinks(hyperlinks: list[tuple[str, str]]) -> list[str]:
25 | """Format hyperlinks to be displayed to the user
26 |
27 | Args:
28 | hyperlinks (List[Tuple[str, str]]): The hyperlinks to format
29 |
30 | Returns:
31 | List[str]: The formatted hyperlinks
32 | """
33 | return [f"{link_text} ({link_url})" for link_text, link_url in hyperlinks]
34 |
--------------------------------------------------------------------------------
/gpt_researcher/scraper/browser/processing/scrape_skills.py:
--------------------------------------------------------------------------------
1 | from langchain_community.document_loaders import PyMuPDFLoader
2 | from langchain_community.retrievers import ArxivRetriever
3 |
4 |
5 | def scrape_pdf_with_pymupdf(url) -> str:
6 | """Scrape a pdf with pymupdf
7 |
8 | Args:
9 | url (str): The url of the pdf to scrape
10 |
11 | Returns:
12 | str: The text scraped from the pdf
13 | """
14 | loader = PyMuPDFLoader(url)
15 | doc = loader.load()
16 | return str(doc)
17 |
18 |
19 | def scrape_pdf_with_arxiv(query) -> str:
20 | """Scrape a pdf with arxiv
21 | default document length of 70000 about ~15 pages or None for no limit
22 |
23 | Args:
24 | query (str): The query to search for
25 |
26 | Returns:
27 | str: The text scraped from the pdf
28 | """
29 | retriever = ArxivRetriever(load_max_docs=2, doc_content_chars_max=None)
30 | docs = retriever.get_relevant_documents(query=query)
31 | return docs[0].page_content
--------------------------------------------------------------------------------
/gpt_researcher/scraper/firecrawl/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/assafelovic/gpt-researcher/9d085d867fce49c24cf296659eebf5d7595cbed2/gpt_researcher/scraper/firecrawl/__init__.py
--------------------------------------------------------------------------------
/gpt_researcher/scraper/pymupdf/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/assafelovic/gpt-researcher/9d085d867fce49c24cf296659eebf5d7595cbed2/gpt_researcher/scraper/pymupdf/__init__.py
--------------------------------------------------------------------------------
/gpt_researcher/scraper/tavily_extract/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/assafelovic/gpt-researcher/9d085d867fce49c24cf296659eebf5d7595cbed2/gpt_researcher/scraper/tavily_extract/__init__.py
--------------------------------------------------------------------------------
/gpt_researcher/scraper/web_base_loader/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/assafelovic/gpt-researcher/9d085d867fce49c24cf296659eebf5d7595cbed2/gpt_researcher/scraper/web_base_loader/__init__.py
--------------------------------------------------------------------------------
/gpt_researcher/scraper/web_base_loader/web_base_loader.py:
--------------------------------------------------------------------------------
1 | from bs4 import BeautifulSoup
2 | from urllib.parse import urljoin
3 | import requests
4 | from ..utils import get_relevant_images, extract_title
5 |
6 | class WebBaseLoaderScraper:
7 |
8 | def __init__(self, link, session=None):
9 | self.link = link
10 | self.session = session or requests.Session()
11 |
12 | def scrape(self) -> tuple:
13 | """
14 | This Python function scrapes content from a webpage using a WebBaseLoader object and returns the
15 | concatenated page content.
16 |
17 | Returns:
18 | The `scrape` method is returning a string variable named `content` which contains the
19 | concatenated page content from the documents loaded by the `WebBaseLoader`. If an exception
20 | occurs during the process, an error message is printed and an empty string is returned.
21 | """
22 | try:
23 | from langchain_community.document_loaders import WebBaseLoader
24 | loader = WebBaseLoader(self.link)
25 | loader.requests_kwargs = {"verify": False}
26 | docs = loader.load()
27 | content = ""
28 |
29 | for doc in docs:
30 | content += doc.page_content
31 |
32 | response = self.session.get(self.link)
33 | soup = BeautifulSoup(response.content, 'html.parser')
34 | image_urls = get_relevant_images(soup, self.link)
35 |
36 | # Extract the title using the utility function
37 | title = extract_title(soup)
38 |
39 | return content, image_urls, title
40 |
41 | except Exception as e:
42 | print("Error! : " + str(e))
43 | return "", [], ""
44 |
--------------------------------------------------------------------------------
/gpt_researcher/skills/__init__.py:
--------------------------------------------------------------------------------
1 | from .context_manager import ContextManager
2 | from .researcher import ResearchConductor
3 | from .writer import ReportGenerator
4 | from .browser import BrowserManager
5 | from .curator import SourceCurator
6 |
7 | __all__ = [
8 | 'ResearchConductor',
9 | 'ReportGenerator',
10 | 'ContextManager',
11 | 'BrowserManager',
12 | 'SourceCurator'
13 | ]
14 |
--------------------------------------------------------------------------------
/gpt_researcher/utils/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/assafelovic/gpt-researcher/9d085d867fce49c24cf296659eebf5d7595cbed2/gpt_researcher/utils/__init__.py
--------------------------------------------------------------------------------
/gpt_researcher/utils/costs.py:
--------------------------------------------------------------------------------
1 | import tiktoken
2 |
3 | # Per OpenAI Pricing Page: https://openai.com/api/pricing/
4 | ENCODING_MODEL = "o200k_base"
5 | INPUT_COST_PER_TOKEN = 0.000005
6 | OUTPUT_COST_PER_TOKEN = 0.000015
7 | IMAGE_INFERENCE_COST = 0.003825
8 | EMBEDDING_COST = 0.02 / 1000000 # Assumes new ada-3-small
9 |
10 |
11 | # Cost estimation is via OpenAI libraries and models. May vary for other models
12 | def estimate_llm_cost(input_content: str, output_content: str) -> float:
13 | encoding = tiktoken.get_encoding(ENCODING_MODEL)
14 | input_tokens = encoding.encode(input_content)
15 | output_tokens = encoding.encode(output_content)
16 | input_costs = len(input_tokens) * INPUT_COST_PER_TOKEN
17 | output_costs = len(output_tokens) * OUTPUT_COST_PER_TOKEN
18 | return input_costs + output_costs
19 |
20 |
21 | def estimate_embedding_cost(model, docs):
22 | encoding = tiktoken.encoding_for_model(model)
23 | total_tokens = sum(len(encoding.encode(str(doc))) for doc in docs)
24 | return total_tokens * EMBEDDING_COST
25 |
26 |
--------------------------------------------------------------------------------
/gpt_researcher/utils/validators.py:
--------------------------------------------------------------------------------
1 | from typing import List
2 |
3 | from pydantic import BaseModel, Field
4 |
5 | class Subtopic(BaseModel):
6 | task: str = Field(description="Task name", min_length=1)
7 |
8 | class Subtopics(BaseModel):
9 | subtopics: List[Subtopic] = []
10 |
--------------------------------------------------------------------------------
/gpt_researcher/utils/workers.py:
--------------------------------------------------------------------------------
1 | import asyncio
2 | from concurrent.futures import ThreadPoolExecutor
3 | from contextlib import asynccontextmanager
4 |
5 |
6 | class WorkerPool:
7 | def __init__(self, max_workers: int):
8 | self.max_workers = max_workers
9 | self.executor = ThreadPoolExecutor(max_workers=max_workers)
10 | self.semaphore = asyncio.Semaphore(max_workers)
11 |
12 | @asynccontextmanager
13 | async def throttle(self):
14 | async with self.semaphore:
15 | yield
16 |
--------------------------------------------------------------------------------
/gpt_researcher/vector_store/__init__.py:
--------------------------------------------------------------------------------
1 | from .vector_store import VectorStoreWrapper
2 |
3 | __all__ = ['VectorStoreWrapper']
--------------------------------------------------------------------------------
/gpt_researcher/vector_store/vector_store.py:
--------------------------------------------------------------------------------
1 | """
2 | Wrapper for langchain vector store
3 | """
4 | from typing import List, Dict
5 |
6 | from langchain.docstore.document import Document
7 | from langchain.vectorstores import VectorStore
8 | from langchain.text_splitter import RecursiveCharacterTextSplitter
9 |
10 | class VectorStoreWrapper:
11 | """
12 | A Wrapper for LangchainVectorStore to handle GPT-Researcher Document Type
13 | """
14 | def __init__(self, vector_store : VectorStore):
15 | self.vector_store = vector_store
16 |
17 | def load(self, documents):
18 | """
19 | Load the documents into vector_store
20 | Translate to langchain doc type, split to chunks then load
21 | """
22 | langchain_documents = self._create_langchain_documents(documents)
23 | splitted_documents = self._split_documents(langchain_documents)
24 | self.vector_store.add_documents(splitted_documents)
25 |
26 | def _create_langchain_documents(self, data: List[Dict[str, str]]) -> List[Document]:
27 | """Convert GPT Researcher Document to Langchain Document"""
28 | return [Document(page_content=item["raw_content"], metadata={"source": item["url"]}) for item in data]
29 |
30 | def _split_documents(self, documents: List[Document], chunk_size: int = 1000, chunk_overlap: int = 200) -> List[Document]:
31 | """
32 | Split documents into smaller chunks
33 | """
34 | text_splitter = RecursiveCharacterTextSplitter(
35 | chunk_size=chunk_size,
36 | chunk_overlap=chunk_overlap,
37 | )
38 | return text_splitter.split_documents(documents)
39 |
40 | async def asimilarity_search(self, query, k, filter):
41 | """Return query by vector store"""
42 | results = await self.vector_store.asimilarity_search(query=query, k=k, filter=filter)
43 | return results
44 |
--------------------------------------------------------------------------------
/langgraph.json:
--------------------------------------------------------------------------------
1 | {
2 | "python_version": "3.11",
3 | "dependencies": [
4 | "./multi_agents"
5 | ],
6 | "graphs": {
7 | "agent": "./multi_agents/agent.py:graph"
8 | },
9 | "env": ".env"
10 | }
--------------------------------------------------------------------------------
/main.py:
--------------------------------------------------------------------------------
1 | from dotenv import load_dotenv
2 | import logging
3 | from pathlib import Path
4 |
5 | # Create logs directory if it doesn't exist
6 | logs_dir = Path("logs")
7 | logs_dir.mkdir(exist_ok=True)
8 |
9 | # Configure logging
10 | logging.basicConfig(
11 | level=logging.INFO,
12 | format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
13 | handlers=[
14 | # File handler for general application logs
15 | logging.FileHandler('logs/app.log'),
16 | # Stream handler for console output
17 | logging.StreamHandler()
18 | ]
19 | )
20 |
21 | # Suppress verbose fontTools logging
22 | logging.getLogger('fontTools').setLevel(logging.WARNING)
23 | logging.getLogger('fontTools.subset').setLevel(logging.WARNING)
24 | logging.getLogger('fontTools.ttLib').setLevel(logging.WARNING)
25 |
26 | # Create logger instance
27 | logger = logging.getLogger(__name__)
28 |
29 | load_dotenv()
30 |
31 | from backend.server.server import app
32 |
33 | if __name__ == "__main__":
34 | import uvicorn
35 |
36 | logger.info("Starting server...")
37 | uvicorn.run(app, host="0.0.0.0", port=8000)
38 |
--------------------------------------------------------------------------------
/multi_agents/__init__.py:
--------------------------------------------------------------------------------
1 | # multi_agents/__init__.py
2 |
3 | from .agents import (
4 | ResearchAgent,
5 | WriterAgent,
6 | PublisherAgent,
7 | ReviserAgent,
8 | ReviewerAgent,
9 | EditorAgent,
10 | ChiefEditorAgent
11 | )
12 | from .memory import (
13 | DraftState,
14 | ResearchState
15 | )
16 |
17 | __all__ = [
18 | "ResearchAgent",
19 | "WriterAgent",
20 | "PublisherAgent",
21 | "ReviserAgent",
22 | "ReviewerAgent",
23 | "EditorAgent",
24 | "ChiefEditorAgent",
25 | "DraftState",
26 | "ResearchState"
27 | ]
--------------------------------------------------------------------------------
/multi_agents/agent.py:
--------------------------------------------------------------------------------
1 | from multi_agents.agents import ChiefEditorAgent
2 |
3 | chief_editor = ChiefEditorAgent({
4 | "query": "Is AI in a hype cycle?",
5 | "max_sections": 3,
6 | "follow_guidelines": False,
7 | "model": "gpt-4o",
8 | "guidelines": [
9 | "The report MUST be written in APA format",
10 | "Each sub section MUST include supporting sources using hyperlinks. If none exist, erase the sub section or rewrite it to be a part of the previous section",
11 | "The report MUST be written in spanish"
12 | ],
13 | "verbose": False
14 | }, websocket=None, stream_output=None)
15 | graph = chief_editor.init_research_team()
16 | graph = graph.compile()
--------------------------------------------------------------------------------
/multi_agents/agents/__init__.py:
--------------------------------------------------------------------------------
1 | from .researcher import ResearchAgent
2 | from .writer import WriterAgent
3 | from .publisher import PublisherAgent
4 | from .reviser import ReviserAgent
5 | from .reviewer import ReviewerAgent
6 | from .editor import EditorAgent
7 | from .human import HumanAgent
8 |
9 | # Below import should remain last since it imports all of the above
10 | from .orchestrator import ChiefEditorAgent
11 |
12 | __all__ = [
13 | "ChiefEditorAgent",
14 | "ResearchAgent",
15 | "WriterAgent",
16 | "EditorAgent",
17 | "PublisherAgent",
18 | "ReviserAgent",
19 | "ReviewerAgent",
20 | "HumanAgent"
21 | ]
22 |
--------------------------------------------------------------------------------
/multi_agents/agents/utils/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/assafelovic/gpt-researcher/9d085d867fce49c24cf296659eebf5d7595cbed2/multi_agents/agents/utils/__init__.py
--------------------------------------------------------------------------------
/multi_agents/agents/utils/llms.py:
--------------------------------------------------------------------------------
1 | import json_repair
2 | from langchain_community.adapters.openai import convert_openai_messages
3 | from langchain_core.utils.json import parse_json_markdown
4 | from loguru import logger
5 |
6 | from gpt_researcher.config.config import Config
7 | from gpt_researcher.utils.llm import create_chat_completion
8 |
9 |
10 | async def call_model(
11 | prompt: list,
12 | model: str,
13 | response_format: str | None = None,
14 | ):
15 |
16 | cfg = Config()
17 | lc_messages = convert_openai_messages(prompt)
18 |
19 | try:
20 | response = await create_chat_completion(
21 | model=model,
22 | messages=lc_messages,
23 | temperature=0,
24 | llm_provider=cfg.smart_llm_provider,
25 | llm_kwargs=cfg.llm_kwargs,
26 | # cost_callback=cost_callback,
27 | )
28 |
29 | if response_format == "json":
30 | return parse_json_markdown(response, parser=json_repair.loads)
31 |
32 | return response
33 |
34 | except Exception as e:
35 | print("⚠️ Error in calling model")
36 | logger.error(f"Error in calling model: {e}")
37 |
--------------------------------------------------------------------------------
/multi_agents/agents/utils/pdf_styles.css:
--------------------------------------------------------------------------------
1 | body {
2 | font-family: 'Libre Baskerville', serif;
3 | font-size: 12pt; /* standard size for academic papers */
4 | line-height: 1.6; /* for readability */
5 | color: #333; /* softer on the eyes than black */
6 | background-color: #fff; /* white background */
7 | margin: 0;
8 | padding: 0;
9 | }
10 |
11 | h1, h2, h3, h4, h5, h6 {
12 | font-family: 'Libre Baskerville', serif;
13 | color: #000; /* darker than the body text */
14 | margin-top: 1em; /* space above headers */
15 | }
16 |
17 | h1 {
18 | font-size: 2em; /* make h1 twice the size of the body text */
19 | }
20 |
21 | h2 {
22 | font-size: 1.5em;
23 | }
24 |
25 | /* Add some space between paragraphs */
26 | p {
27 | margin-bottom: 1em;
28 | }
29 |
30 | /* Style for blockquotes, often used in academic papers */
31 | blockquote {
32 | font-style: italic;
33 | margin: 1em 0;
34 | padding: 1em;
35 | background-color: #f9f9f9; /* a light grey background */
36 | }
37 |
38 | /* You might want to style tables, figures, etc. too */
39 | table {
40 | border-collapse: collapse;
41 | width: 100%;
42 | }
43 |
44 | table, th, td {
45 | border: 1px solid #ddd;
46 | text-align: left;
47 | padding: 8px;
48 | }
49 |
50 | th {
51 | background-color: #f2f2f2;
52 | color: black;
53 | }
--------------------------------------------------------------------------------
/multi_agents/agents/utils/utils.py:
--------------------------------------------------------------------------------
1 | import re
2 |
3 | def sanitize_filename(filename: str) -> str:
4 | """
5 | Sanitize a given filename by replacing characters that are invalid
6 | in Windows file paths with an underscore ('_').
7 |
8 | This function ensures that the filename is compatible with all
9 | operating systems by removing or replacing characters that are
10 | not allowed in Windows file paths. Specifically, it replaces
11 | the following characters: < > : " / \\ | ? *
12 |
13 | Parameters:
14 | filename (str): The original filename to be sanitized.
15 |
16 | Returns:
17 | str: The sanitized filename with invalid characters replaced by an underscore.
18 |
19 | Examples:
20 | >>> sanitize_filename('invalid:file/name*example?.txt')
21 | 'invalid_file_name_example_.txt'
22 |
23 | >>> sanitize_filename('valid_filename.txt')
24 | 'valid_filename.txt'
25 | """
26 | return re.sub(r'[<>:"/\\|?*]', '_', filename)
27 |
--------------------------------------------------------------------------------
/multi_agents/agents/utils/views.py:
--------------------------------------------------------------------------------
1 | from colorama import Fore, Style
2 | from enum import Enum
3 |
4 |
5 | class AgentColor(Enum):
6 | RESEARCHER = Fore.LIGHTBLUE_EX
7 | EDITOR = Fore.YELLOW
8 | WRITER = Fore.LIGHTGREEN_EX
9 | PUBLISHER = Fore.MAGENTA
10 | REVIEWER = Fore.CYAN
11 | REVISOR = Fore.LIGHTWHITE_EX
12 | MASTER = Fore.LIGHTYELLOW_EX
13 |
14 |
15 | def print_agent_output(output:str, agent: str="RESEARCHER"):
16 | print(f"{AgentColor[agent].value}{agent}: {output}{Style.RESET_ALL}")
--------------------------------------------------------------------------------
/multi_agents/langgraph.json:
--------------------------------------------------------------------------------
1 | {
2 | "python_version": "3.11",
3 | "dependencies": [
4 | "."
5 | ],
6 | "graphs": {
7 | "agent": "./agent.py:graph"
8 | },
9 | "env": ".env"
10 | }
--------------------------------------------------------------------------------
/multi_agents/main.py:
--------------------------------------------------------------------------------
1 | from dotenv import load_dotenv
2 | import sys
3 | import os
4 | import uuid
5 | sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '..')))
6 |
7 | from multi_agents.agents import ChiefEditorAgent
8 | import asyncio
9 | import json
10 | from gpt_researcher.utils.enum import Tone
11 |
12 | # Run with LangSmith if API key is set
13 | if os.environ.get("LANGCHAIN_API_KEY"):
14 | os.environ["LANGCHAIN_TRACING_V2"] = "true"
15 | load_dotenv()
16 |
17 | def open_task():
18 | # Get the directory of the current script
19 | current_dir = os.path.dirname(os.path.abspath(__file__))
20 | # Construct the absolute path to task.json
21 | task_json_path = os.path.join(current_dir, 'task.json')
22 |
23 | with open(task_json_path, 'r') as f:
24 | task = json.load(f)
25 |
26 | if not task:
27 | raise Exception("No task found. Please ensure a valid task.json file is present in the multi_agents directory and contains the necessary task information.")
28 |
29 | # Override model with STRATEGIC_LLM if defined in environment
30 | strategic_llm = os.environ.get("STRATEGIC_LLM")
31 | if strategic_llm and ":" in strategic_llm:
32 | # Extract the model name (part after the first colon)
33 | model_name = strategic_llm.split(":", 1)[1]
34 | task["model"] = model_name
35 | elif strategic_llm:
36 | task["model"] = model_name
37 |
38 | return task
39 |
40 | async def run_research_task(query, websocket=None, stream_output=None, tone=Tone.Objective, headers=None):
41 | task = open_task()
42 | task["query"] = query
43 |
44 | chief_editor = ChiefEditorAgent(task, websocket, stream_output, tone, headers)
45 | research_report = await chief_editor.run_research_task()
46 |
47 | if websocket and stream_output:
48 | await stream_output("logs", "research_report", research_report, websocket)
49 |
50 | return research_report
51 |
52 | async def main():
53 | task = open_task()
54 |
55 | chief_editor = ChiefEditorAgent(task)
56 | research_report = await chief_editor.run_research_task(task_id=uuid.uuid4())
57 |
58 | return research_report
59 |
60 | if __name__ == "__main__":
61 | asyncio.run(main())
62 |
--------------------------------------------------------------------------------
/multi_agents/memory/__init__.py:
--------------------------------------------------------------------------------
1 | from .draft import DraftState
2 | from .research import ResearchState
3 |
4 | __all__ = [
5 | "DraftState",
6 | "ResearchState"
7 | ]
--------------------------------------------------------------------------------
/multi_agents/memory/draft.py:
--------------------------------------------------------------------------------
1 | from typing import TypedDict, List, Annotated
2 | import operator
3 |
4 |
5 | class DraftState(TypedDict):
6 | task: dict
7 | topic: str
8 | draft: dict
9 | review: str
10 | revision_notes: str
--------------------------------------------------------------------------------
/multi_agents/memory/research.py:
--------------------------------------------------------------------------------
1 | from typing import TypedDict, List, Annotated
2 | import operator
3 |
4 |
5 | class ResearchState(TypedDict):
6 | task: dict
7 | initial_research: str
8 | sections: List[str]
9 | research_data: List[dict]
10 | human_feedback: str
11 | # Report layout
12 | title: str
13 | headers: dict
14 | date: str
15 | table_of_contents: str
16 | introduction: str
17 | conclusion: str
18 | sources: List[str]
19 | report: str
20 |
21 |
22 |
--------------------------------------------------------------------------------
/multi_agents/package.json:
--------------------------------------------------------------------------------
1 | {
2 | "name": "simple_js_test",
3 | "version": "1.0.0",
4 | "description": "",
5 | "main": "server.js",
6 | "type": "module",
7 | "scripts": {
8 | "test": "echo \"Error: no test specified\" && exit 1"
9 | },
10 | "author": "",
11 | "license": "ISC",
12 | "dependencies": {
13 | "@langchain/langgraph-sdk": "^0.0.1-rc.13"
14 | }
15 | }
16 |
--------------------------------------------------------------------------------
/multi_agents/requirements.txt:
--------------------------------------------------------------------------------
1 | json5
2 | langgraph
3 | json5
4 | langgraph-cli
5 | loguru
6 | python-dotenv
7 | weasyprint
8 |
--------------------------------------------------------------------------------
/multi_agents/task.json:
--------------------------------------------------------------------------------
1 | {
2 | "query": "Is AI in a hype cycle?",
3 | "max_sections": 3,
4 | "publish_formats": {
5 | "markdown": true,
6 | "pdf": true,
7 | "docx": true
8 | },
9 | "include_human_feedback": false,
10 | "follow_guidelines": false,
11 | "model": "gpt-4o",
12 | "guidelines": [
13 | "The report MUST be written in APA format",
14 | "Each sub section MUST include supporting sources using hyperlinks. If none exist, erase the sub section or rewrite it to be a part of the previous section",
15 | "The report MUST be written in spanish"
16 | ],
17 | "verbose": true
18 | }
--------------------------------------------------------------------------------
/poetry.toml:
--------------------------------------------------------------------------------
1 | [virtualenvs]
2 | in-project = true
--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
1 | from setuptools import find_packages, setup
2 |
3 | LATEST_VERSION = "0.13.4"
4 |
5 | exclude_packages = [
6 | "selenium",
7 | "webdriver",
8 | "fastapi",
9 | "fastapi.*",
10 | "uvicorn",
11 | "jinja2",
12 | "gpt-researcher",
13 | "langgraph"
14 | ]
15 |
16 | with open(r"README.md", "r", encoding="utf-8") as f:
17 | long_description = f.read()
18 |
19 | with open("requirements.txt", "r") as f:
20 | reqs = [line.strip() for line in f if not any(pkg in line for pkg in exclude_packages)]
21 |
22 | setup(
23 | name="gpt-researcher",
24 | version=LATEST_VERSION,
25 | description="GPT Researcher is an autonomous agent designed for comprehensive web research on any task",
26 | package_dir={'gpt_researcher': 'gpt_researcher'},
27 | packages=find_packages(exclude=exclude_packages),
28 | long_description=long_description,
29 | long_description_content_type="text/markdown",
30 | url="https://github.com/assafelovic/gpt-researcher",
31 | author="Assaf Elovic",
32 | author_email="assaf.elovic@gmail.com",
33 | license="MIT",
34 | classifiers=[
35 | "License :: OSI Approved :: MIT License",
36 | "Intended Audience :: Developers",
37 | "Intended Audience :: Education",
38 | "Intended Audience :: Science/Research",
39 | "Programming Language :: Python :: 3.11",
40 | "Programming Language :: Python :: 3.12",
41 | "Topic :: Scientific/Engineering :: Artificial Intelligence",
42 | ],
43 | python_requires='>=3.11',
44 | install_requires=reqs,
45 |
46 |
47 | )
--------------------------------------------------------------------------------
/tests/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/assafelovic/gpt-researcher/9d085d867fce49c24cf296659eebf5d7595cbed2/tests/__init__.py
--------------------------------------------------------------------------------
/tests/docs/doc.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/assafelovic/gpt-researcher/9d085d867fce49c24cf296659eebf5d7595cbed2/tests/docs/doc.pdf
--------------------------------------------------------------------------------
/tests/documents-report-source.py:
--------------------------------------------------------------------------------
1 | import os
2 | import asyncio
3 | import pytest
4 | # Ensure this path is correct
5 | from gpt_researcher import GPTResearcher
6 | from dotenv import load_dotenv
7 | load_dotenv()
8 |
9 | # Define the report types to test
10 | report_types = [
11 | "research_report",
12 | "custom_report",
13 | "subtopic_report",
14 | "summary_report",
15 | "detailed_report",
16 | "quick_report"
17 | ]
18 |
19 | # Define a common query and sources for testing
20 | query = "What can you tell me about myself based on my documents?"
21 |
22 | # Define the output directory
23 | output_dir = "./outputs"
24 |
25 |
26 | @pytest.mark.asyncio
27 | @pytest.mark.parametrize("report_type", report_types)
28 | async def test_gpt_researcher(report_type):
29 | # Ensure the output directory exists
30 | if not os.path.exists(output_dir):
31 | os.makedirs(output_dir)
32 |
33 | # Create an instance of GPTResearcher with report_source set to "documents"
34 | researcher = GPTResearcher(
35 | query=query, report_type=report_type, report_source="documents")
36 |
37 | # Conduct research and write the report
38 | await researcher.conduct_research()
39 | report = await researcher.write_report()
40 |
41 | # Define the expected output filenames
42 | pdf_filename = os.path.join(output_dir, f"{report_type}.pdf")
43 | docx_filename = os.path.join(output_dir, f"{report_type}.docx")
44 |
45 | # Check if the PDF and DOCX files are created
46 | # assert os.path.exists(pdf_filename), f"PDF file not found for report type: {report_type}"
47 | # assert os.path.exists(docx_filename), f"DOCX file not found for report type: {report_type}"
48 |
49 | # Clean up the generated files (optional)
50 | # os.remove(pdf_filename)
51 | # os.remove(docx_filename)
52 |
53 | if __name__ == "__main__":
54 | pytest.main()
55 |
--------------------------------------------------------------------------------
/tests/gptr-logs-handler.py:
--------------------------------------------------------------------------------
1 | import logging
2 | from typing import List, Dict, Any
3 | import asyncio
4 | from gpt_researcher import GPTResearcher
5 | from backend.server.server_utils import CustomLogsHandler # Update import
6 |
7 | async def run() -> None:
8 | """Run the research process and generate a report."""
9 | query = "What happened in the latest burning man floods?"
10 | report_type = "research_report"
11 | report_source = "online"
12 | tone = "informative"
13 | config_path = None
14 |
15 | custom_logs_handler = CustomLogsHandler(None, query) # Pass query parameter
16 |
17 | researcher = GPTResearcher(
18 | query=query,
19 | report_type=report_type,
20 | report_source=report_source,
21 | tone=tone,
22 | config_path=config_path,
23 | websocket=custom_logs_handler
24 | )
25 |
26 | await researcher.conduct_research() # Conduct the research
27 | report = await researcher.write_report() # Write the research report
28 | logging.info("Report generated successfully.") # Log report generation
29 |
30 | return report
31 |
32 | # Run the asynchronous function using asyncio
33 | if __name__ == "__main__":
34 | asyncio.run(run())
35 |
--------------------------------------------------------------------------------
/tests/report-types.py:
--------------------------------------------------------------------------------
1 | import os
2 | import asyncio
3 | import pytest
4 | from unittest.mock import AsyncMock
5 | from gpt_researcher.agent import GPTResearcher
6 | from backend.server.server_utils import CustomLogsHandler
7 | from typing import List, Dict, Any
8 |
9 | # Define the report types to test
10 | report_types = ["research_report", "subtopic_report"]
11 |
12 | # Define a common query and sources for testing
13 | query = "what is gpt-researcher"
14 |
15 |
16 | @pytest.mark.asyncio
17 | @pytest.mark.parametrize("report_type", report_types)
18 | async def test_gpt_researcher(report_type):
19 | mock_websocket = AsyncMock()
20 | custom_logs_handler = CustomLogsHandler(mock_websocket, query)
21 | # Create an instance of GPTResearcher
22 | researcher = GPTResearcher(
23 | query=query,
24 | query_domains=["github.com"],
25 | report_type=report_type,
26 | websocket=custom_logs_handler,
27 | )
28 |
29 | # Conduct research and write the report
30 | await researcher.conduct_research()
31 | report = await researcher.write_report()
32 |
33 | print(researcher.visited_urls)
34 | print(report)
35 |
36 | # Check if the report contains part of the query
37 | assert "gpt-researcher" in report
38 |
39 | # test if at least one url starts with "github.com" as it was limited to this domain
40 | matching_urls = [
41 | url for url in researcher.visited_urls if url.startswith("https://github.com")
42 | ]
43 | assert len(matching_urls) > 0
44 |
45 |
46 | if __name__ == "__main__":
47 | pytest.main()
48 |
--------------------------------------------------------------------------------
/tests/test-loaders.py:
--------------------------------------------------------------------------------
1 | from langchain_community.document_loaders import PyMuPDFLoader, UnstructuredCSVLoader
2 |
3 | # # Test PyMuPDFLoader
4 | pdf_loader = PyMuPDFLoader("my-docs/Elisha - Coding Career.pdf")
5 | try:
6 | pdf_data = pdf_loader.load()
7 | print("PDF Data:", pdf_data)
8 | except Exception as e:
9 | print("Failed to load PDF:", e)
10 |
11 | # Test UnstructuredCSVLoader
12 | csv_loader = UnstructuredCSVLoader("my-docs/active_braze_protocols_from_bq.csv", mode="elements")
13 | try:
14 | csv_data = csv_loader.load()
15 | print("CSV Data:", csv_data)
16 | except Exception as e:
17 | print("Failed to load CSV:", e)
--------------------------------------------------------------------------------
/tests/test-openai-llm.py:
--------------------------------------------------------------------------------
1 | import asyncio
2 | from gpt_researcher.utils.llm import get_llm
3 | from gpt_researcher import GPTResearcher
4 | from dotenv import load_dotenv
5 | load_dotenv()
6 |
7 | async def main():
8 |
9 | # Example usage of get_llm function
10 | llm_provider = "openai"
11 | model = "gpt-3.5-turbo"
12 | temperature = 0.7
13 | max_tokens = 1000
14 |
15 | llm = get_llm(llm_provider, model=model, temperature=temperature, max_tokens=max_tokens)
16 | print(f"LLM Provider: {llm_provider}, Model: {model}, Temperature: {temperature}, Max Tokens: {max_tokens}")
17 | print('llm: ',llm)
18 | await test_llm(llm=llm)
19 |
20 |
21 | async def test_llm(llm):
22 | # Test the connection with a simple query
23 | messages = [{"role": "user", "content": "sup?"}]
24 | try:
25 | response = await llm.get_chat_response(messages, stream=False)
26 | print("LLM response:", response)
27 | except Exception as e:
28 | print(f"Error: {e}")
29 |
30 | # Run the async function
31 | asyncio.run(main())
--------------------------------------------------------------------------------
/tests/test-your-llm.py:
--------------------------------------------------------------------------------
1 | from gpt_researcher.config.config import Config
2 | from gpt_researcher.utils.llm import create_chat_completion
3 | import asyncio
4 | from dotenv import load_dotenv
5 | load_dotenv()
6 |
7 | async def main():
8 | cfg = Config()
9 |
10 | try:
11 | report = await create_chat_completion(
12 | model=cfg.smart_llm_model,
13 | messages = [{"role": "user", "content": "sup?"}],
14 | temperature=0.35,
15 | llm_provider=cfg.smart_llm_provider,
16 | stream=True,
17 | max_tokens=cfg.smart_token_limit,
18 | llm_kwargs=cfg.llm_kwargs
19 | )
20 | except Exception as e:
21 | print(f"Error in calling LLM: {e}")
22 |
23 | # Run the async function
24 | asyncio.run(main())
--------------------------------------------------------------------------------
/tests/test-your-retriever.py:
--------------------------------------------------------------------------------
1 | import asyncio
2 | from dotenv import load_dotenv
3 | from gpt_researcher.config.config import Config
4 | from gpt_researcher.actions.retriever import get_retrievers
5 | from gpt_researcher.skills.researcher import ResearchConductor
6 | import pprint
7 | # Load environment variables from .env file
8 | load_dotenv()
9 |
10 | async def test_scrape_data_by_query():
11 | # Initialize the Config object
12 | config = Config()
13 |
14 | # Retrieve the retrievers based on the current configuration
15 | retrievers = get_retrievers({}, config)
16 | print("Retrievers:", retrievers)
17 |
18 | # Create a mock researcher object with necessary attributes
19 | class MockResearcher:
20 | def init(self):
21 | self.retrievers = retrievers
22 | self.cfg = config
23 | self.verbose = True
24 | self.websocket = None
25 | self.scraper_manager = None # Mock or implement scraper manager
26 | self.vector_store = None # Mock or implement vector store
27 |
28 | researcher = MockResearcher()
29 | research_conductor = ResearchConductor(researcher)
30 | # print('research_conductor',dir(research_conductor))
31 | # print('MockResearcher',dir(researcher))
32 | # Define a sub-query to test
33 | sub_query = "design patterns for autonomous ai agents"
34 |
35 | # Iterate through all retrievers
36 | for retriever_class in retrievers:
37 | # Instantiate the retriever with the sub-query
38 | retriever = retriever_class(sub_query)
39 |
40 | # Perform the search using the current retriever
41 | search_results = await asyncio.to_thread(
42 | retriever.search, max_results=10
43 | )
44 |
45 | print("\033[35mSearch results:\033[0m")
46 | pprint.pprint(search_results, indent=4, width=80)
47 |
48 | if __name__ == "__main__":
49 | asyncio.run(test_scrape_data_by_query())
--------------------------------------------------------------------------------
/tests/test_logging.py:
--------------------------------------------------------------------------------
1 | import pytest
2 | from unittest.mock import AsyncMock
3 | from fastapi import WebSocket
4 | from backend.server.server_utils import CustomLogsHandler
5 | import os
6 | import json
7 |
8 | @pytest.mark.asyncio
9 | async def test_custom_logs_handler():
10 | # Mock websocket
11 | mock_websocket = AsyncMock()
12 | mock_websocket.send_json = AsyncMock()
13 |
14 | # Test initialization
15 | handler = CustomLogsHandler(mock_websocket, "test_query")
16 |
17 | # Verify log file creation
18 | assert os.path.exists(handler.log_file)
19 |
20 | # Test sending log data
21 | test_data = {
22 | "type": "logs",
23 | "message": "Test log message"
24 | }
25 |
26 | await handler.send_json(test_data)
27 |
28 | # Verify websocket was called with correct data
29 | mock_websocket.send_json.assert_called_once_with(test_data)
30 |
31 | # Verify log file contents
32 | with open(handler.log_file, 'r') as f:
33 | log_data = json.load(f)
34 | assert len(log_data['events']) == 1
35 | assert log_data['events'][0]['data'] == test_data
36 |
37 | @pytest.mark.asyncio
38 | async def test_content_update():
39 | """Test handling of non-log type data that updates content"""
40 | mock_websocket = AsyncMock()
41 | mock_websocket.send_json = AsyncMock()
42 |
43 | handler = CustomLogsHandler(mock_websocket, "test_query")
44 |
45 | # Test content update
46 | content_data = {
47 | "query": "test query",
48 | "sources": ["source1", "source2"],
49 | "report": "test report"
50 | }
51 |
52 | await handler.send_json(content_data)
53 |
54 | mock_websocket.send_json.assert_called_once_with(content_data)
55 |
56 | # Verify log file contents
57 | with open(handler.log_file, 'r') as f:
58 | log_data = json.load(f)
59 | assert log_data['content']['query'] == "test query"
60 | assert log_data['content']['sources'] == ["source1", "source2"]
61 | assert log_data['content']['report'] == "test report"
--------------------------------------------------------------------------------
/tests/test_logging_output.py:
--------------------------------------------------------------------------------
1 | import pytest
2 | import asyncio
3 | from pathlib import Path
4 | import json
5 | import logging
6 | from fastapi import WebSocket
7 | from datetime import datetime
8 |
9 | logging.basicConfig(level=logging.INFO)
10 | logger = logging.getLogger(__name__)
11 |
12 | class TestWebSocket(WebSocket):
13 | def __init__(self):
14 | self.events = []
15 | self.scope = {}
16 |
17 | def __bool__(self):
18 | return True
19 |
20 | async def accept(self):
21 | self.scope["type"] = "websocket"
22 | pass
23 |
24 | async def send_json(self, event):
25 | logger.info(f"WebSocket received event: {event}")
26 | self.events.append(event)
27 |
28 | @pytest.mark.asyncio
29 | async def test_log_output_file():
30 | """Test to verify logs are properly written to output file"""
31 | from gpt_researcher.agent import GPTResearcher
32 | from backend.server.server_utils import CustomLogsHandler
33 |
34 | # 1. Setup like the main app
35 | websocket = TestWebSocket()
36 | await websocket.accept()
37 |
38 | # 2. Initialize researcher like main app
39 | query = "What is the capital of France?"
40 | research_id = f"{datetime.now().strftime('%Y%m%d_%H%M%S')}_{hash(query)}"
41 | logs_handler = CustomLogsHandler(websocket=websocket, task=research_id)
42 | researcher = GPTResearcher(query=query, websocket=logs_handler)
43 |
44 | # 3. Run research
45 | await researcher.conduct_research()
46 |
47 | # 4. Verify events were captured
48 | logger.info(f"Events captured: {len(websocket.events)}")
49 | assert len(websocket.events) > 0, "No events were captured"
50 |
51 | # 5. Check output file
52 | output_dir = Path().joinpath(Path.cwd(), "outputs")
53 | output_files = list(output_dir.glob(f"task_*{research_id}*.json"))
54 | assert len(output_files) > 0, "No output file was created"
55 |
56 | with open(output_files[-1]) as f:
57 | data = json.load(f)
58 | assert len(data.get('events', [])) > 0, "No events in output file"
59 |
60 | # Clean up the output files
61 | for output_file in output_files:
62 | output_file.unlink()
63 | logger.info(f"Deleted output file: {output_file}")
--------------------------------------------------------------------------------
/tests/test_logs.py:
--------------------------------------------------------------------------------
1 | import os
2 | from pathlib import Path
3 | import sys
4 |
5 | # Add the project root to Python path
6 | project_root = Path(__file__).parent.parent
7 | sys.path.append(str(project_root))
8 |
9 | from backend.server.server_utils import CustomLogsHandler
10 |
11 | def test_logs_creation():
12 | # Print current working directory
13 | print(f"Current working directory: {os.getcwd()}")
14 |
15 | # Print project root
16 | print(f"Project root: {project_root}")
17 |
18 | # Try to create logs directory directly
19 | logs_dir = project_root / "logs"
20 | print(f"Attempting to create logs directory at: {logs_dir}")
21 |
22 | try:
23 | # Create directory with full permissions
24 | os.makedirs(logs_dir, mode=0o777, exist_ok=True)
25 | print(f"✓ Created directory: {logs_dir}")
26 |
27 | # Test file creation
28 | test_file = logs_dir / "test.txt"
29 | with open(test_file, 'w') as f:
30 | f.write("Test log entry")
31 | print(f"✓ Created test file: {test_file}")
32 |
33 | # Initialize the handler
34 | handler = CustomLogsHandler()
35 | print("✓ CustomLogsHandler initialized")
36 |
37 | # Test JSON logging
38 | handler.logs.append({"test": "message"})
39 | print("✓ Added test log entry")
40 |
41 | except Exception as e:
42 | print(f"❌ Error: {str(e)}")
43 | print(f"Error type: {type(e)}")
44 | import traceback
45 | print(f"Traceback: {traceback.format_exc()}")
46 |
47 | if __name__ == "__main__":
48 | test_logs_creation()
--------------------------------------------------------------------------------