├── .dockerignore ├── .github └── dependabot.yml ├── .gitignore ├── Dockerfile ├── LICENSE ├── README.es.md ├── README.fr.md ├── README.jp.md ├── README.kr.md ├── README.md ├── README.zh.md ├── application.py ├── backend ├── __init__.py ├── classes │ ├── __init__.py │ └── state.py ├── graph.py ├── nodes │ ├── __init__.py │ ├── briefing.py │ ├── collector.py │ ├── curator.py │ ├── editor.py │ ├── enricher.py │ ├── grounding.py │ └── researchers │ │ ├── __init__.py │ │ ├── base.py │ │ ├── company.py │ │ ├── financial.py │ │ ├── industry.py │ │ └── news.py ├── services │ ├── mongodb.py │ ├── pdf_service.py │ └── websocket_manager.py └── utils │ ├── __init__.py │ ├── references.py │ └── utils.py ├── docker-compose.yml ├── langgraph.json ├── langgraph_entry.py ├── package-lock.json ├── requirements.txt ├── setup.sh ├── static ├── agent-flow.png ├── demo.mp4 ├── ui-1.png └── ui-2.png └── ui ├── .env.development.example ├── .gitignore ├── eslint.config.js ├── index.html ├── package-lock.json ├── package.json ├── postcss.config.js ├── public ├── favicon.ico └── tavilylogo.png ├── src ├── App.tsx ├── components │ ├── CurationExtraction.tsx │ ├── ExamplePopup.tsx │ ├── Header.tsx │ ├── LocationInput.tsx │ ├── ResearchBriefings.tsx │ ├── ResearchForm.tsx │ ├── ResearchQueries.tsx │ ├── ResearchReport.tsx │ ├── ResearchStatus.tsx │ └── index.ts ├── env.d.ts ├── index.css ├── main.tsx ├── styles │ └── index.ts ├── types │ └── index.ts ├── utils │ ├── constants.ts │ └── handlers.ts └── vite-env.d.ts ├── tailwind.config.js ├── tsconfig.app.json ├── tsconfig.json ├── tsconfig.node.json ├── vercel.json └── vite.config.ts /.dockerignore: -------------------------------------------------------------------------------- 1 | # Version control 2 | .git 3 | .gitignore 4 | .gitattributes 5 | 6 | # Environment files 7 | .env 8 | .env.* 9 | *.env 10 | 11 | # Python 12 | __pycache__/ 13 | *.py[cod] 14 | *.so 15 | .Python 16 | *.egg 17 | *.egg-info/ 18 | .eggs/ 19 | *.pyc 20 | .pytest_cache/ 21 | .coverage 22 | htmlcov/ 23 | .tox/ 24 | .venv/ 25 | venv/ 26 | 27 | # Node.js 28 | ui/node_modules/ 29 | ui/.npm 30 | ui/npm-debug.log* 31 | ui/yarn-debug.log* 32 | ui/yarn-error.log* 33 | ui/dist/ 34 | 35 | # IDE 36 | .idea/ 37 | .vscode/ 38 | *.swp 39 | *.swo 40 | 41 | # OS 42 | .DS_Store 43 | Thumbs.db 44 | 45 | # Project specific 46 | reports/* 47 | .langgraph/ 48 | .elasticbeanstalk/ 49 | README.md 50 | LICENSE 51 | *.md 52 | *.log -------------------------------------------------------------------------------- /.github/dependabot.yml: -------------------------------------------------------------------------------- 1 | # To get started with Dependabot version updates, you'll need to specify which 2 | # package ecosystems to update and where the package manifests are located. 3 | # Please see the documentation for all configuration options: 4 | # https://docs.github.com/code-security/dependabot/dependabot-version-updates/configuration-options-for-the-dependabot.yml-file 5 | 6 | version: 2 7 | updates: 8 | - package-ecosystem: "" # See documentation for possible values 9 | directory: "/" # Location of package manifests 10 | schedule: 11 | interval: "weekly" 12 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Environment and secrets 2 | .env 3 | reports/* 4 | 5 | # Python 6 | __pycache__/ 7 | *.py[cod] 8 | *.egg-info/ 9 | 10 | # Virtual Environment 11 | .venv/ 12 | 13 | # IDE and OS 14 | .vscode/ 15 | .DS_Store 16 | 17 | # Frontend 18 | node_modules/ 19 | 20 | # LangGraph 21 | .langgraph/ 22 | 23 | # Elastic Beanstalk Files 24 | .elasticbeanstalk/* 25 | !.elasticbeanstalk/*.cfg.yml 26 | !.elasticbeanstalk/*.global.yml 27 | Procfile 28 | .vercel 29 | .ebextensions -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | # Stage 1: Build Frontend 2 | FROM node:20-slim AS frontend-builder 3 | WORKDIR /app/ui 4 | COPY ui/package*.json ./ 5 | RUN npm install 6 | COPY ui/ ./ 7 | RUN npm run build 8 | 9 | # Stage 2: Build Backend 10 | FROM python:3.11-slim AS backend-builder 11 | WORKDIR /app 12 | COPY requirements.txt . 13 | RUN pip install --no-cache-dir -r requirements.txt 14 | 15 | # Stage 3: Final Image 16 | FROM python:3.11-slim 17 | WORKDIR /app 18 | 19 | # Install system dependencies 20 | RUN apt-get update && apt-get install -y --no-install-recommends \ 21 | curl \ 22 | && rm -rf /var/lib/apt/lists/* 23 | 24 | # Copy backend 25 | COPY --from=backend-builder /usr/local/lib/python3.11/site-packages/ /usr/local/lib/python3.11/site-packages/ 26 | COPY backend/ ./backend/ 27 | COPY application.py . 28 | 29 | # Copy frontend build 30 | COPY --from=frontend-builder /app/ui/dist/ ./ui/dist/ 31 | 32 | # Create reports directory 33 | RUN mkdir -p reports 34 | 35 | # Set environment variables 36 | ENV PYTHONUNBUFFERED=1 37 | ENV PORT=8000 38 | 39 | # Expose the port 40 | EXPOSE 8000 41 | 42 | # Create a non-root user 43 | RUN useradd -m -u 1000 appuser 44 | RUN chown -R appuser:appuser /app 45 | USER appuser 46 | 47 | # Start command 48 | CMD ["python", "-m", "uvicorn", "application:app", "--host", "0.0.0.0", "--port", "8000"] -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "[]" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright 2025 Guy Hartstein 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /README.es.md: -------------------------------------------------------------------------------- 1 | [![en](https://img.shields.io/badge/lang-en-red.svg)](https://github.com/pogjester/company-research-agent/blob/main/README.md) 2 | [![zh](https://img.shields.io/badge/lang-zh-green.svg)](https://github.com/pogjester/company-research-agent/blob/main/README.zh.md) 3 | [![fr](https://img.shields.io/badge/lang-fr-blue.svg)](https://github.com/pogjester/company-research-agent/blob/main/README.fr.md) 4 | [![es](https://img.shields.io/badge/lang-es-yellow.svg)](https://github.com/pogjester/company-research-agent/blob/main/README.es.md) 5 | [![jp](https://img.shields.io/badge/lang-jp-orange.svg)](https://github.com/pogjester/company-research-agent/blob/main/README.jp.md) 6 | [![kr](https://img.shields.io/badge/lang-ko-purple.svg)](https://github.com/pogjester/company-research-agent/blob/main/README.kr.md) 7 | 8 | # Investigador de Empresas 🔍 9 | 10 | ![interfaz web]() 11 | 12 | Una herramienta multi-agente que genera informes de investigación exhaustivos sobre empresas. La plataforma utiliza un sistema de agentes de IA para recopilar, seleccionar y sintetizar información sobre cualquier empresa. 13 | 14 | ✨¡Pruébalo en línea! https://companyresearcher.tavily.com ✨ 15 | 16 | https://github.com/user-attachments/assets/0e373146-26a7-4391-b973-224ded3182a9 17 | 18 | ## Características 19 | 20 | - **Investigación Multi-Fuente**: Recopila datos de diversas fuentes, incluyendo sitios web de empresas, artículos de noticias, informes financieros y análisis sectoriales 21 | - **Filtrado de Contenido Impulsado por IA**: Utiliza la puntuación de relevancia de Tavily para la selección de contenido 22 | - **Transmisión de Progreso en Tiempo Real**: Utiliza conexiones WebSocket para transmitir el progreso de la investigación y los resultados 23 | - **Arquitectura de Modelo Dual**: 24 | - Gemini 2.0 Flash para síntesis de investigación de alto contexto 25 | - GPT-4.1 para formato preciso y edición de informes 26 | - **Frontend Moderno en React**: Interfaz de usuario receptiva con actualizaciones en tiempo real, seguimiento de progreso y opciones de descarga 27 | - **Arquitectura Modular**: Construido utilizando un sistema de nodos de investigación y procesamiento especializados 28 | 29 | ## Marco de Agentes 30 | 31 | ### Sistema de Investigación 32 | 33 | La plataforma sigue un marco basado en agentes con nodos especializados que procesan datos secuencialmente: 34 | 35 | 1. **Nodos de Investigación**: 36 | - `CompanyAnalyzer`: Investiga información básica del negocio 37 | - `IndustryAnalyzer`: Analiza posición de mercado y tendencias 38 | - `FinancialAnalyst`: Recopila métricas financieras y datos de rendimiento 39 | - `NewsScanner`: Recopila noticias y desarrollos recientes 40 | 41 | 2. **Nodos de Procesamiento**: 42 | - `Collector`: Agrega datos de investigación de todos los analizadores 43 | - `Curator`: Implementa filtrado de contenido y puntuación de relevancia 44 | - `Briefing`: Genera resúmenes específicos por categoría utilizando Gemini 2.0 Flash 45 | - `Editor`: Compila y formatea los resúmenes en un informe final utilizando GPT-4.1-mini 46 | 47 | ![interfaz web]() 48 | 49 | ### Arquitectura de Generación de Contenido 50 | 51 | La plataforma aprovecha modelos separados para un rendimiento óptimo: 52 | 53 | 1. **Gemini 2.0 Flash** (`briefing.py`): 54 | - Maneja tareas de síntesis de investigación de alto contexto 55 | - Sobresale en el procesamiento y resumen de grandes volúmenes de datos 56 | - Utilizado para generar resúmenes iniciales por categoría 57 | - Eficiente en mantener el contexto a través de múltiples documentos 58 | 59 | 2. **GPT-4.1 mini** (`editor.py`): 60 | - Se especializa en tareas precisas de formato y edición 61 | - Maneja la estructura y consistencia en markdown 62 | - Superior en seguir instrucciones exactas de formato 63 | - Utilizado para: 64 | - Compilación final del informe 65 | - Eliminación de duplicados de contenido 66 | - Formateo en markdown 67 | - Transmisión de informes en tiempo real 68 | 69 | Este enfoque combina la fortaleza de Gemini en el manejo de ventanas de contexto grandes con la precisión de GPT-4.1-mini en seguir instrucciones específicas de formato. 70 | 71 | ### Sistema de Selección de Contenido 72 | 73 | La plataforma utiliza un sistema de filtrado de contenido en `curator.py`: 74 | 75 | 1. **Puntuación de Relevancia**: 76 | - Los documentos son puntuados por la búsqueda potenciada por IA de Tavily 77 | - Se requiere un umbral mínimo (predeterminado 0.4) para proceder 78 | - Las puntuaciones reflejan la relevancia para la consulta de investigación específica 79 | - Puntuaciones más altas indican mejores coincidencias con la intención de la investigación 80 | 81 | 2. **Procesamiento de Documentos**: 82 | - El contenido se normaliza y limpia 83 | - Las URLs se desduplicaron y estandarizaron 84 | - Los documentos se ordenan por puntuaciones de relevancia 85 | - Las actualizaciones de progreso en tiempo real se envían a través de WebSocket 86 | 87 | ### Sistema de Comunicación en Tiempo Real 88 | 89 | La plataforma implementa un sistema de comunicación en tiempo real basado en WebSocket: 90 | 91 | ![interfaz web]() 92 | 93 | 1. **Implementación Backend**: 94 | - Utiliza el soporte de WebSocket de FastAPI 95 | - Mantiene conexiones persistentes por trabajo de investigación 96 | - Envía actualizaciones de estado estructuradas para varios eventos: 97 | ```python 98 | await websocket_manager.send_status_update( 99 | job_id=job_id, 100 | status="processing", 101 | message=f"Generating {category} briefing", 102 | result={ 103 | "step": "Briefing", 104 | "category": category, 105 | "total_docs": len(docs) 106 | } 107 | ) 108 | ``` 109 | 110 | 2. **Integración Frontend**: 111 | - Los componentes de React se suscriben a actualizaciones WebSocket 112 | - Las actualizaciones se procesan y muestran en tiempo real 113 | - Diferentes componentes de UI manejan tipos específicos de actualizaciones: 114 | - Progreso de generación de consultas 115 | - Estadísticas de selección de documentos 116 | - Estado de finalización de resúmenes 117 | - Progreso de generación de informes 118 | 119 | 3. **Tipos de Estado**: 120 | - `query_generating`: Actualizaciones en tiempo real de creación de consultas 121 | - `document_kept`: Progreso de selección de documentos 122 | - `briefing_start/complete`: Estado de generación de resúmenes 123 | - `report_chunk`: Transmisión de generación de informes 124 | - `curation_complete`: Estadísticas finales de documentos 125 | 126 | ## Instalación 127 | 128 | ### Instalación Rápida (Recomendada) 129 | 130 | La forma más sencilla de comenzar es utilizando el script de instalación: 131 | 132 | 1. Clonar el repositorio: 133 | ```bash 134 | git clone https://github.com/pogjester/tavily-company-research.git 135 | cd tavily-company-research 136 | ``` 137 | 138 | 2. Hacer que el script de instalación sea ejecutable y ejecutarlo: 139 | ```bash 140 | chmod +x setup.sh 141 | ./setup.sh 142 | ``` 143 | 144 | El script de instalación hará lo siguiente: 145 | - Verificar las versiones requeridas de Python y Node.js 146 | - Opcionalmente crear un entorno virtual de Python (recomendado) 147 | - Instalar todas las dependencias (Python y Node.js) 148 | - Guiarte a través de la configuración de tus variables de entorno 149 | - Opcionalmente iniciar los servidores de backend y frontend 150 | 151 | Necesitarás tener listas las siguientes claves API: 152 | - Clave API de Tavily 153 | - Clave API de Google Gemini 154 | - Clave API de OpenAI 155 | - URI de MongoDB (opcional) 156 | 157 | ### Instalación Manual 158 | 159 | Si prefieres realizar la instalación manualmente, sigue estos pasos: 160 | 161 | 1. Clonar el repositorio: 162 | ```bash 163 | git clone https://github.com/pogjester/tavily-company-research.git 164 | cd tavily-company-research 165 | ``` 166 | 167 | 2. Instalar dependencias de backend: 168 | ```bash 169 | # Opcional: Crear y activar entorno virtual 170 | python -m venv .venv 171 | source .venv/bin/activate 172 | 173 | # Instalar dependencias de Python 174 | pip install -r requirements.txt 175 | ``` 176 | 177 | 3. Instalar dependencias de frontend: 178 | ```bash 179 | cd ui 180 | npm install 181 | ``` 182 | 183 | 4. Crear un archivo `.env` con tus claves API: 184 | ```env 185 | TAVILY_API_KEY=tu_clave_tavily 186 | GEMINI_API_KEY=tu_clave_gemini 187 | OPENAI_API_KEY=tu_clave_openai 188 | 189 | # Opcional: Habilitar persistencia en MongoDB 190 | # MONGODB_URI=tu_cadena_de_conexion_mongodb 191 | ``` 192 | 193 | ### Instalación con Docker 194 | 195 | La aplicación puede ejecutarse utilizando Docker y Docker Compose: 196 | 197 | 1. Clonar el repositorio: 198 | ```bash 199 | git clone https://github.com/pogjester/tavily-company-research.git 200 | cd tavily-company-research 201 | ``` 202 | 203 | 2. Crear un archivo `.env` con tus claves API: 204 | ```env 205 | TAVILY_API_KEY=tu_clave_tavily 206 | GEMINI_API_KEY=tu_clave_gemini 207 | OPENAI_API_KEY=tu_clave_openai 208 | 209 | # Opcional: Habilitar persistencia en MongoDB 210 | # MONGODB_URI=tu_cadena_de_conexion_mongodb 211 | ``` 212 | 213 | 3. Construir e iniciar los contenedores: 214 | ```bash 215 | docker compose up --build 216 | ``` 217 | 218 | Esto iniciará los servicios de backend y frontend: 219 | - La API de backend estará disponible en `http://localhost:8000` 220 | - El frontend estará disponible en `http://localhost:5174` 221 | 222 | Para detener los servicios: 223 | ```bash 224 | docker compose down 225 | ``` 226 | 227 | Nota: Al actualizar las variables de entorno en `.env`, necesitarás reiniciar los contenedores: 228 | ```bash 229 | docker compose down && docker compose up 230 | ``` 231 | 232 | ### Ejecutando la Aplicación 233 | 234 | 1. Iniciar el servidor de backend (elige una opción): 235 | ```bash 236 | # Opción 1: Módulo Python Directo 237 | python -m application.py 238 | 239 | # Opción 2: FastAPI con Uvicorn 240 | uvicorn application:app --reload --port 8000 241 | ``` 242 | 243 | 2. En una nueva terminal, iniciar el frontend: 244 | ```bash 245 | cd ui 246 | npm run dev 247 | ``` 248 | 249 | 3. Acceder a la aplicación en `http://localhost:5173` 250 | 251 | ## Uso 252 | 253 | ### Desarrollo Local 254 | 255 | 1. Iniciar el servidor de backend (elige una opción): 256 | 257 | **Opción 1: Módulo Python Directo** 258 | ```bash 259 | python -m application.py 260 | ``` 261 | 262 | **Opción 2: FastAPI con Uvicorn** 263 | ```bash 264 | # Instalar uvicorn si aún no está instalado 265 | pip install uvicorn 266 | 267 | # Ejecutar la aplicación FastAPI con recarga automática 268 | uvicorn application:app --reload --port 8000 269 | ``` 270 | 271 | El backend estará disponible en: 272 | - Punto de conexión API: `http://localhost:8000` 273 | - Punto de conexión WebSocket: `ws://localhost:8000/research/ws/{job_id}` 274 | 275 | 2. Iniciar el servidor de desarrollo del frontend: 276 | ```bash 277 | cd ui 278 | npm run dev 279 | ``` 280 | 281 | 3. Acceder a la aplicación en `http://localhost:5173` 282 | 283 | ### Opciones de Despliegue 284 | 285 | La aplicación puede desplegarse en varias plataformas en la nube. Aquí hay algunas opciones comunes: 286 | 287 | #### AWS Elastic Beanstalk 288 | 289 | 1. Instalar el EB CLI: 290 | ```bash 291 | pip install awsebcli 292 | ``` 293 | 294 | 2. Inicializar la aplicación EB: 295 | ```bash 296 | eb init -p python-3.11 tavily-research 297 | ``` 298 | 299 | 3. Crear y desplegar: 300 | ```bash 301 | eb create tavily-research-prod 302 | ``` 303 | 304 | #### Otras Opciones de Despliegue 305 | 306 | - **Docker**: La aplicación incluye un Dockerfile para despliegue en contenedores 307 | - **Heroku**: Despliegue directamente desde GitHub con el buildpack de Python 308 | - **Google Cloud Run**: Adecuado para despliegue en contenedores con escalado automático 309 | 310 | Elige la plataforma que mejor se adapte a tus necesidades. La aplicación es independiente de la plataforma y puede alojarse en cualquier lugar que admita aplicaciones web Python. 311 | 312 | ## Contribuir 313 | 314 | 1. Haz un fork del repositorio 315 | 2. Crea una rama de características (`git checkout -b feature/caracteristica-increible`) 316 | 3. Haz commit de tus cambios (`git commit -m 'Añadir característica increíble'`) 317 | 4. Haz push a la rama (`git push origin feature/caracteristica-increible`) 318 | 5. Abre un Pull Request 319 | 320 | ## Licencia 321 | 322 | Este proyecto está licenciado bajo la Licencia MIT - consulta el archivo [LICENSE](LICENSE) para más detalles. 323 | 324 | ## Agradecimientos 325 | 326 | - [Tavily](https://tavily.com/) por la API de investigación 327 | - Todas las demás bibliotecas de código abierto y sus contribuyentes 328 | -------------------------------------------------------------------------------- /README.fr.md: -------------------------------------------------------------------------------- 1 | [![en](https://img.shields.io/badge/lang-en-red.svg)](https://github.com/pogjester/company-research-agent/blob/main/README.md) 2 | [![zh](https://img.shields.io/badge/lang-zh-green.svg)](https://github.com/pogjester/company-research-agent/blob/main/README.zh.md) 3 | [![fr](https://img.shields.io/badge/lang-fr-blue.svg)](https://github.com/pogjester/company-research-agent/blob/main/README.fr.md) 4 | [![es](https://img.shields.io/badge/lang-es-yellow.svg)](https://github.com/pogjester/company-research-agent/blob/main/README.es.md) 5 | [![jp](https://img.shields.io/badge/lang-jp-orange.svg)](https://github.com/pogjester/company-research-agent/blob/main/README.jp.md) 6 | [![kr](https://img.shields.io/badge/lang-ko-purple.svg)](https://github.com/pogjester/company-research-agent/blob/main/README.kr.md) 7 | 8 | 9 | # Agent de Recherche d'Entreprise 🔍 10 | 11 | ![web ui]() 12 | 13 | Un outil multi-agents qui génère des rapports de recherche d'entreprise complets. La plateforme utilise un pipeline d'agents IA pour collecter, organiser et synthétiser des informations sur n'importe quelle entreprise. 14 | 15 | ✨Essayez-le en ligne ! https://companyresearcher.tavily.com ✨ 16 | 17 | https://github.com/user-attachments/assets/0e373146-26a7-4391-b973-224ded3182a9 18 | 19 | ## Fonctionnalités 20 | 21 | - **Recherche Multi-Sources** : Récupère des données de diverses sources, y compris les sites web d'entreprise, articles de presse, rapports financiers et analyses sectorielles 22 | - **Filtrage de contenu par IA** : Utilise le score de pertinence de Tavily pour la curation du contenu 23 | - **Streaming en temps réel** : Utilise les WebSockets pour diffuser l'avancement et les résultats de la recherche en temps réel 24 | - **Architecture à double modèle** : 25 | - Gemini 2.0 Flash pour la synthèse de recherche à large contexte 26 | - GPT-4.1 pour la mise en forme et l'édition précises du rapport 27 | - **Frontend React moderne** : Interface réactive avec mises à jour en temps réel, suivi de progression et options de téléchargement 28 | - **Architecture modulaire** : Construite autour d'un pipeline de nœuds spécialisés de recherche et de traitement 29 | 30 | ## Cadre Agentique 31 | 32 | ### Pipeline de Recherche 33 | 34 | La plateforme suit un cadre agentique avec des nœuds spécialisés qui traitent les données de manière séquentielle : 35 | 36 | 1. **Nœuds de Recherche** : 37 | - `CompanyAnalyzer` : Recherche les informations principales sur l'entreprise 38 | - `IndustryAnalyzer` : Analyse la position sur le marché et les tendances 39 | - `FinancialAnalyst` : Récupère les indicateurs financiers et les données de performance 40 | - `NewsScanner` : Collecte les actualités et développements récents 41 | 42 | 2. **Nœuds de Traitement** : 43 | - `Collector` : Agrège les données de recherche de tous les analyseurs 44 | - `Curator` : Met en œuvre le filtrage de contenu et le scoring de pertinence 45 | - `Briefing` : Génère des synthèses par catégorie à l'aide de Gemini 2.0 Flash 46 | - `Editor` : Compile et met en forme les synthèses dans un rapport final avec GPT-4.1-mini 47 | 48 | ![web ui]() 49 | 50 | ### Architecture de Génération de Contenu 51 | 52 | La plateforme exploite des modèles distincts pour des performances optimales : 53 | 54 | 1. **Gemini 2.0 Flash** (`briefing.py`) : 55 | - Gère la synthèse de recherche à large contexte 56 | - Excelle dans le traitement et le résumé de grands volumes de données 57 | - Utilisé pour générer les synthèses initiales par catégorie 58 | - Efficace pour maintenir le contexte sur plusieurs documents 59 | 60 | 2. **GPT-4.1 mini** (`editor.py`) : 61 | - Spécialisé dans la mise en forme et l'édition précises 62 | - Gère la structure markdown et la cohérence 63 | - Supérieur pour suivre des instructions de formatage exactes 64 | - Utilisé pour : 65 | - Compilation du rapport final 66 | - Déduplication du contenu 67 | - Mise en forme markdown 68 | - Streaming du rapport en temps réel 69 | 70 | Cette approche combine la capacité de Gemini à gérer de larges fenêtres de contexte avec la précision de GPT-4.1-mini pour le respect des consignes de formatage. 71 | 72 | ### Système de Curation de Contenu 73 | 74 | La plateforme utilise un système de filtrage de contenu dans `curator.py` : 75 | 76 | 1. **Scoring de Pertinence** : 77 | - Les documents sont scorés par la recherche IA de Tavily 78 | - Un seuil minimum (par défaut 0,4) est requis pour continuer 79 | - Les scores reflètent la pertinence par rapport à la requête de recherche 80 | - Un score élevé indique une meilleure correspondance avec l'intention de recherche 81 | 82 | 2. **Traitement des Documents** : 83 | - Le contenu est normalisé et nettoyé 84 | - Les URLs sont dédupliquées et standardisées 85 | - Les documents sont triés par score de pertinence 86 | - Les mises à jour de progression sont envoyées en temps réel via WebSocket 87 | 88 | ### Système de Communication en Temps Réel 89 | 90 | La plateforme implémente un système de communication en temps réel basé sur WebSocket : 91 | 92 | ![web ui]() 93 | 94 | 1. **Implémentation Backend** : 95 | - Utilise le support WebSocket de FastAPI 96 | - Maintient des connexions persistantes par tâche de recherche 97 | - Envoie des mises à jour structurées pour divers événements : 98 | ```python 99 | await websocket_manager.send_status_update( 100 | job_id=job_id, 101 | status="processing", 102 | message=f"Génération du briefing {category}", 103 | result={ 104 | "step": "Briefing", 105 | "category": category, 106 | "total_docs": len(docs) 107 | } 108 | ) 109 | ``` 110 | 111 | 2. **Intégration Frontend** : 112 | - Les composants React s'abonnent aux mises à jour WebSocket 113 | - Les mises à jour sont traitées et affichées en temps réel 114 | - Différents composants UI gèrent des types d'updates spécifiques : 115 | - Progression de la génération de requête 116 | - Statistiques de curation de documents 117 | - Statut de complétion des briefings 118 | - Progression de la génération du rapport 119 | 120 | 3. **Types de Statut** : 121 | - `query_generating` : Mises à jour de création de requête en temps réel 122 | - `document_kept` : Progression de la curation de documents 123 | - `briefing_start/complete` : Statut de génération des briefings 124 | - `report_chunk` : Streaming de la génération du rapport 125 | - `curation_complete` : Statistiques finales des documents 126 | 127 | ## Configuration 128 | 129 | ### Configuration Rapide (Recommandée) 130 | 131 | La façon la plus simple de commencer est d'utiliser le script de configuration : 132 | 133 | 1. Clonez le dépôt : 134 | ```bash 135 | git clone https://github.com/pogjester/tavily-company-research.git 136 | cd tavily-company-research 137 | ``` 138 | 139 | 2. Rendez le script de configuration exécutable et lancez-le : 140 | ```bash 141 | chmod +x setup.sh 142 | ./setup.sh 143 | ``` 144 | 145 | Le script de configuration va : 146 | - Vérifier les versions requises de Python et Node.js 147 | - Créer éventuellement un environnement virtuel Python (recommandé) 148 | - Installer toutes les dépendances (Python et Node.js) 149 | - Vous guider dans la configuration de vos variables d'environnement 150 | - Démarrer éventuellement les serveurs backend et frontend 151 | 152 | Vous aurez besoin des clés API suivantes : 153 | - Clé API Tavily 154 | - Clé API Google Gemini 155 | - Clé API OpenAI 156 | - URI MongoDB (optionnel) 157 | 158 | ### Configuration Manuelle 159 | 160 | Si vous préférez configurer manuellement, suivez ces étapes : 161 | 162 | 1. Clonez le dépôt : 163 | ```bash 164 | git clone https://github.com/pogjester/tavily-company-research.git 165 | cd tavily-company-research 166 | ``` 167 | 168 | 2. Installez les dépendances backend : 169 | ```bash 170 | # Optionnel : Créez et activez un environnement virtuel 171 | python -m venv .venv 172 | source .venv/bin/activate 173 | 174 | # Installez les dépendances Python 175 | pip install -r requirements.txt 176 | ``` 177 | 178 | 3. Installez les dépendances frontend : 179 | ```bash 180 | cd ui 181 | npm install 182 | ``` 183 | 184 | 4. Créez un fichier `.env` avec vos clés API : 185 | ```env 186 | TAVILY_API_KEY=votre_clé_tavily 187 | GEMINI_API_KEY=votre_clé_gemini 188 | OPENAI_API_KEY=votre_clé_openai 189 | 190 | # Optionnel : Activez la persistance MongoDB 191 | # MONGODB_URI=votre_chaîne_de_connexion_mongodb 192 | ``` 193 | 194 | ### Configuration Docker 195 | 196 | L'application peut être exécutée à l'aide de Docker et Docker Compose : 197 | 198 | 1. Clonez le dépôt : 199 | ```bash 200 | git clone https://github.com/pogjester/tavily-company-research.git 201 | cd tavily-company-research 202 | ``` 203 | 204 | 2. Créez un fichier `.env` avec vos clés API : 205 | ```env 206 | TAVILY_API_KEY=votre_clé_tavily 207 | GEMINI_API_KEY=votre_clé_gemini 208 | OPENAI_API_KEY=votre_clé_openai 209 | 210 | # Optionnel : Activez la persistance MongoDB 211 | # MONGODB_URI=votre_chaîne_de_connexion_mongodb 212 | ``` 213 | 214 | 3. Construisez et démarrez les conteneurs : 215 | ```bash 216 | docker compose up --build 217 | ``` 218 | 219 | Cela démarrera les services backend et frontend : 220 | - L'API backend sera disponible sur `http://localhost:8000` 221 | - Le frontend sera disponible sur `http://localhost:5174` 222 | 223 | Pour arrêter les services : 224 | ```bash 225 | docker compose down 226 | ``` 227 | 228 | Remarque : Lors de la mise à jour des variables d'environnement dans `.env`, vous devrez redémarrer les conteneurs : 229 | ```bash 230 | docker compose down && docker compose up 231 | ``` 232 | 233 | ### Exécution de l'Application 234 | 235 | 1. Démarrez le serveur backend (choisissez une option) : 236 | ```bash 237 | # Option 1 : Module Python Direct 238 | python -m application.py 239 | 240 | # Option 2 : FastAPI avec Uvicorn 241 | uvicorn application:app --reload --port 8000 242 | ``` 243 | 244 | 2. Dans un nouveau terminal, démarrez le frontend : 245 | ```bash 246 | cd ui 247 | npm run dev 248 | ``` 249 | 250 | 3. Accédez à l'application sur `http://localhost:5173` 251 | 252 | ## Utilisation 253 | 254 | ### Développement Local 255 | 256 | 1. Démarrez le serveur backend (choisissez une option) : 257 | 258 | **Option 1 : Module Python Direct** 259 | ```bash 260 | python -m application.py 261 | ``` 262 | 263 | **Option 2 : FastAPI avec Uvicorn** 264 | ```bash 265 | # Installez uvicorn si ce n'est pas déjà fait 266 | pip install uvicorn 267 | 268 | # Exécutez l'application FastAPI avec rechargement à chaud 269 | uvicorn application:app --reload --port 8000 270 | ``` 271 | 272 | Le backend sera disponible sur : 273 | - Point d'accès API : `http://localhost:8000` 274 | - Point d'accès WebSocket : `ws://localhost:8000/research/ws/{job_id}` 275 | 276 | 2. Démarrez le serveur de développement frontend : 277 | ```bash 278 | cd ui 279 | npm run dev 280 | ``` 281 | 282 | 3. Accédez à l'application sur `http://localhost:5173` 283 | 284 | ### Options de Déploiement 285 | 286 | L'application peut être déployée sur diverses plateformes cloud. Voici quelques options courantes : 287 | 288 | #### AWS Elastic Beanstalk 289 | 290 | 1. Installez l'EB CLI : 291 | ```bash 292 | pip install awsebcli 293 | ``` 294 | 295 | 2. Initialisez l'application EB : 296 | ```bash 297 | eb init -p python-3.11 tavily-research 298 | ``` 299 | 300 | 3. Créez et déployez : 301 | ```bash 302 | eb create tavily-research-prod 303 | ``` 304 | 305 | #### Autres Options de Déploiement 306 | 307 | - **Docker** : L'application inclut un Dockerfile pour le déploiement conteneurisé 308 | - **Heroku** : Déployez directement depuis GitHub avec le buildpack Python 309 | - **Google Cloud Run** : Adapté au déploiement conteneurisé avec mise à l'échelle automatique 310 | 311 | Choisissez la plateforme qui convient le mieux à vos besoins. L'application est indépendante de la plateforme et peut être hébergée partout où les applications web Python sont prises en charge. 312 | 313 | ## Contribution 314 | 315 | 1. Forkez le dépôt 316 | 2. Créez une branche de fonctionnalité (`git checkout -b fonctionnalite/superbe-fonction`) 317 | 3. Validez vos modifications (`git commit -m 'Ajout d'une superbe fonction'`) 318 | 4. Poussez vers la branche (`git push origin fonctionnalite/superbe-fonction`) 319 | 5. Ouvrez une Pull Request 320 | 321 | ## Licence 322 | 323 | Ce projet est sous licence MIT - voir le fichier [LICENSE](LICENSE) pour plus de détails. 324 | 325 | ## Remerciements 326 | 327 | - [Tavily](https://tavily.com/) pour l'API de recherche 328 | - Toutes les autres bibliothèques open-source et leurs contributeurs 329 | -------------------------------------------------------------------------------- /README.jp.md: -------------------------------------------------------------------------------- 1 | [![en](https://img.shields.io/badge/lang-en-red.svg)](https://github.com/pogjester/company-research-agent/blob/main/README.md) 2 | [![zh](https://img.shields.io/badge/lang-zh-green.svg)](https://github.com/pogjester/company-research-agent/blob/main/README.zh.md) 3 | [![fr](https://img.shields.io/badge/lang-fr-blue.svg)](https://github.com/pogjester/company-research-agent/blob/main/README.fr.md) 4 | [![es](https://img.shields.io/badge/lang-es-yellow.svg)](https://github.com/pogjester/company-research-agent/blob/main/README.es.md) 5 | [![jp](https://img.shields.io/badge/lang-jp-orange.svg)](https://github.com/pogjester/company-research-agent/blob/main/README.jp.md) 6 | [![kr](https://img.shields.io/badge/lang-ko-purple.svg)](https://github.com/pogjester/company-research-agent/blob/main/README.kr.md) 7 | 8 | 9 | # 企業調査エージェント 🔍 10 | 11 | ![web ui]() 12 | 13 | 包括的な企業調査レポートを生成するマルチエージェントツール。このプラットフォームは、AIエージェントのパイプラインを使用して、あらゆる企業に関する情報を収集、整理、統合します。 14 | 15 | ✨オンラインで試してみてください! https://companyresearcher.tavily.com ✨ 16 | 17 | https://github.com/user-attachments/assets/0e373146-26a7-4391-b973-224ded3182a9 18 | 19 | ## 機能 20 | 21 | - **マルチソース調査**:企業ウェブサイト、ニュース記事、財務報告書、業界分析など、様々なソースからデータを取得 22 | - **AIによるコンテンツフィルタリング**:Tavilyの関連性スコアを使用したコンテンツキュレーション 23 | - **リアルタイムストリーミング**:WebSocketを使用して調査の進捗と結果をリアルタイムで配信 24 | - **デュアルモデルアーキテクチャ**: 25 | - 大規模コンテキスト調査統合のためのGemini 2.0 Flash 26 | - 精密なレポート書式設定と編集のためのGPT-4.1 27 | - **モダンなReactフロントエンド**:リアルタイム更新、進捗追跡、ダウンロードオプションを備えたレスポンシブインターフェース 28 | - **モジュラーアーキテクチャ**:専門的な調査・処理ノードのパイプラインを中心に構築 29 | 30 | ## エージェントフレームワーク 31 | 32 | ### 調査パイプライン 33 | 34 | このプラットフォームは、データを順次処理する専門ノードを持つエージェントフレームワークに従います: 35 | 36 | 1. **調査ノード**: 37 | - `CompanyAnalyzer`:主要な企業情報を調査 38 | - `IndustryAnalyzer`:市場ポジションとトレンドを分析 39 | - `FinancialAnalyst`:財務指標とパフォーマンスデータを取得 40 | - `NewsScanner`:最新のニュースと動向を収集 41 | 42 | 2. **処理ノード**: 43 | - `Collector`:すべてのアナライザーから調査データを集約 44 | - `Curator`:コンテンツフィルタリングと関連性スコアリングを実装 45 | - `Briefing`:Gemini 2.0 Flashを使用してカテゴリ別の要約を生成 46 | - `Editor`:GPT-4.1-miniで要約を最終レポートにコンパイル・書式設定 47 | 48 | ![web ui]() 49 | 50 | ### コンテンツ生成アーキテクチャ 51 | 52 | このプラットフォームは最適なパフォーマンスのために異なるモデルを活用します: 53 | 54 | 1. **Gemini 2.0 Flash** (`briefing.py`): 55 | - 大規模コンテキスト調査統合を処理 56 | - 大量データの処理と要約に優れる 57 | - カテゴリ別の初期要約生成に使用 58 | - 複数文書にわたるコンテキスト維持に効率的 59 | 60 | 2. **GPT-4.1 mini** (`editor.py`): 61 | - 精密な書式設定と編集に特化 62 | - Markdown構造と一貫性を処理 63 | - 正確な書式設定指示の遵守に優れる 64 | - 以下に使用: 65 | - 最終レポートのコンパイル 66 | - コンテンツの重複除去 67 | - Markdown書式設定 68 | - リアルタイムレポートストリーミング 69 | 70 | このアプローチは、Geminiの大規模コンテキストウィンドウ処理能力とGPT-4.1-miniの書式設定指示精度を組み合わせます。 71 | 72 | ### コンテンツキュレーションシステム 73 | 74 | このプラットフォームは`curator.py`でコンテンツフィルタリングシステムを使用します: 75 | 76 | 1. **関連性スコアリング**: 77 | - 文書はTavilyのAI検索によってスコア付けされます 78 | - 継続するには最小閾値(デフォルト0.4)が必要 79 | - スコアは検索クエリとの関連性を反映 80 | - 高スコアは検索意図とのより良い一致を示す 81 | 82 | 2. **文書処理**: 83 | - コンテンツは正規化・クリーニングされます 84 | - URLは重複除去・標準化されます 85 | - 文書は関連性スコアでソートされます 86 | - 進捗更新はWebSocket経由でリアルタイム送信されます 87 | 88 | ### リアルタイム通信システム 89 | 90 | このプラットフォームはWebSocketベースのリアルタイム通信システムを実装します: 91 | 92 | ![web ui]() 93 | 94 | 1. **バックエンド実装**: 95 | - FastAPIのWebSocketサポートを使用 96 | - 調査タスクごとに永続的な接続を維持 97 | - 様々なイベントに対して構造化された更新を送信: 98 | ```python 99 | await websocket_manager.send_status_update( 100 | job_id=job_id, 101 | status="processing", 102 | message=f"{category}ブリーフィング生成中", 103 | result={ 104 | "step": "Briefing", 105 | "category": category, 106 | "total_docs": len(docs) 107 | } 108 | ) 109 | ``` 110 | 111 | 2. **フロントエンド統合**: 112 | - ReactコンポーネントがWebSocket更新を購読 113 | - 更新はリアルタイムで処理・表示されます 114 | - 異なるUIコンポーネントが特定の更新タイプを処理: 115 | - クエリ生成進捗 116 | - 文書キュレーション統計 117 | - ブリーフィング完了ステータス 118 | - レポート生成進捗 119 | 120 | 3. **ステータスタイプ**: 121 | - `query_generating`:リアルタイムクエリ作成更新 122 | - `document_kept`:文書キュレーション進捗 123 | - `briefing_start/complete`:ブリーフィング生成ステータス 124 | - `report_chunk`:レポート生成ストリーミング 125 | - `curation_complete`:最終文書統計 126 | 127 | ## セットアップ 128 | 129 | ### クイックセットアップ(推奨) 130 | 131 | 最も簡単な開始方法はセットアップスクリプトを使用することです: 132 | 133 | 1. リポジトリをクローン: 134 | ```bash 135 | git clone https://github.com/pogjester/tavily-company-research.git 136 | cd tavily-company-research 137 | ``` 138 | 139 | 2. セットアップスクリプトを実行可能にして実行: 140 | ```bash 141 | chmod +x setup.sh 142 | ./setup.sh 143 | ``` 144 | 145 | セットアップスクリプトは以下を行います: 146 | - 必要なPythonとNode.jsのバージョンを確認 147 | - Python仮想環境を作成(推奨) 148 | - すべての依存関係をインストール(PythonとNode.js) 149 | - 環境変数の設定をガイド 150 | - バックエンドとフロントエンドサーバーを起動(オプション) 151 | 152 | 以下のAPIキーが必要です: 153 | - Tavily APIキー 154 | - Google Gemini APIキー 155 | - OpenAI APIキー 156 | - MongoDB URI(オプション) 157 | 158 | ### 手動セットアップ 159 | 160 | 手動でセットアップしたい場合は、以下の手順に従ってください: 161 | 162 | 1. リポジトリをクローン: 163 | ```bash 164 | git clone https://github.com/pogjester/tavily-company-research.git 165 | cd tavily-company-research 166 | ``` 167 | 168 | 2. バックエンド依存関係をインストール: 169 | ```bash 170 | # オプション:仮想環境を作成・アクティベート 171 | python -m venv .venv 172 | source .venv/bin/activate 173 | 174 | # Python依存関係をインストール 175 | pip install -r requirements.txt 176 | ``` 177 | 178 | 3. フロントエンド依存関係をインストール: 179 | ```bash 180 | cd ui 181 | npm install 182 | ``` 183 | 184 | 4. APIキーを含む`.env`ファイルを作成: 185 | ```env 186 | TAVILY_API_KEY=your_tavily_key 187 | GEMINI_API_KEY=your_gemini_key 188 | OPENAI_API_KEY=your_openai_key 189 | 190 | # オプション:MongoDB永続化を有効化 191 | # MONGODB_URI=your_mongodb_connection_string 192 | ``` 193 | 194 | ### Dockerセットアップ 195 | 196 | アプリケーションはDockerとDocker Composeを使用して実行できます: 197 | 198 | 1. リポジトリをクローン: 199 | ```bash 200 | git clone https://github.com/pogjester/tavily-company-research.git 201 | cd tavily-company-research 202 | ``` 203 | 204 | 2. APIキーを含む`.env`ファイルを作成: 205 | ```env 206 | TAVILY_API_KEY=your_tavily_key 207 | GEMINI_API_KEY=your_gemini_key 208 | OPENAI_API_KEY=your_openai_key 209 | 210 | # オプション:MongoDB永続化を有効化 211 | # MONGODB_URI=your_mongodb_connection_string 212 | ``` 213 | 214 | 3. コンテナをビルド・起動: 215 | ```bash 216 | docker compose up --build 217 | ``` 218 | 219 | これによりバックエンドとフロントエンドサービスが起動します: 220 | - バックエンドAPIは`http://localhost:8000`で利用可能 221 | - フロントエンドは`http://localhost:5174`で利用可能 222 | 223 | サービスを停止するには: 224 | ```bash 225 | docker compose down 226 | ``` 227 | 228 | 注意:`.env`の環境変数を更新する際は、コンテナを再起動する必要があります: 229 | ```bash 230 | docker compose down && docker compose up 231 | ``` 232 | 233 | ### アプリケーションの実行 234 | 235 | 1. バックエンドサーバーを起動(オプションを選択): 236 | ```bash 237 | # オプション1:直接Pythonモジュール 238 | python -m application.py 239 | 240 | # オプション2:UvicornでFastAPI 241 | uvicorn application:app --reload --port 8000 242 | ``` 243 | 244 | 2. 新しいターミナルでフロントエンドを起動: 245 | ```bash 246 | cd ui 247 | npm run dev 248 | ``` 249 | 250 | 3. `http://localhost:5173`でアプリケーションにアクセス 251 | 252 | ## 使用方法 253 | 254 | ### ローカル開発 255 | 256 | 1. バックエンドサーバーを起動(オプションを選択): 257 | 258 | **オプション1:直接Pythonモジュール** 259 | ```bash 260 | python -m application.py 261 | ``` 262 | 263 | **オプション2:UvicornでFastAPI** 264 | ```bash 265 | # uvicornがインストールされていない場合はインストール 266 | pip install uvicorn 267 | 268 | # ホットリロード付きでFastAPIアプリケーションを実行 269 | uvicorn application:app --reload --port 8000 270 | ``` 271 | 272 | バックエンドは以下で利用可能: 273 | - APIエンドポイント:`http://localhost:8000` 274 | - WebSocketエンドポイント:`ws://localhost:8000/research/ws/{job_id}` 275 | 276 | 2. フロントエンド開発サーバーを起動: 277 | ```bash 278 | cd ui 279 | npm run dev 280 | ``` 281 | 282 | 3. `http://localhost:5173`でアプリケーションにアクセス 283 | 284 | ### デプロイメントオプション 285 | 286 | アプリケーションは様々なクラウドプラットフォームにデプロイできます。一般的なオプションをいくつか紹介します: 287 | 288 | #### AWS Elastic Beanstalk 289 | 290 | 1. EB CLIをインストール: 291 | ```bash 292 | pip install awsebcli 293 | ``` 294 | 295 | 2. EBアプリケーションを初期化: 296 | ```bash 297 | eb init -p python-3.11 tavily-research 298 | ``` 299 | 300 | 3. 作成・デプロイ: 301 | ```bash 302 | eb create tavily-research-prod 303 | ``` 304 | 305 | #### その他のデプロイメントオプション 306 | 307 | - **Docker**:アプリケーションにはコンテナ化デプロイメント用のDockerfileが含まれています 308 | - **Heroku**:PythonビルドパックでGitHubから直接デプロイ 309 | - **Google Cloud Run**:自動スケーリング付きコンテナ化デプロイメントに適しています 310 | 311 | ニーズに最も適したプラットフォームを選択してください。アプリケーションはプラットフォーム非依存で、Pythonウェブアプリケーションがサポートされているどこでもホストできます。 312 | 313 | ## 貢献 314 | 315 | 1. リポジトリをフォーク 316 | 2. 機能ブランチを作成(`git checkout -b feature/amazing-feature`) 317 | 3. 変更をコミット(`git commit -m 'Add some amazing feature'`) 318 | 4. ブランチにプッシュ(`git push origin feature/amazing-feature`) 319 | 5. プルリクエストを開く 320 | 321 | ## ライセンス 322 | 323 | このプロジェクトはMITライセンスの下でライセンスされています - 詳細は[LICENSE](LICENSE)ファイルを参照してください。 324 | 325 | ## 謝辞 326 | 327 | - 検索APIを提供する[Tavily](https://tavily.com/) 328 | - その他すべてのオープンソースライブラリとその貢献者 -------------------------------------------------------------------------------- /README.kr.md: -------------------------------------------------------------------------------- 1 | [![en](https://img.shields.io/badge/lang-en-red.svg)](https://github.com/pogjester/company-research-agent/blob/main/README.md) 2 | [![zh](https://img.shields.io/badge/lang-zh-green.svg)](https://github.com/pogjester/company-research-agent/blob/main/README.zh.md) 3 | [![fr](https://img.shields.io/badge/lang-fr-blue.svg)](https://github.com/pogjester/company-research-agent/blob/main/README.fr.md) 4 | [![es](https://img.shields.io/badge/lang-es-yellow.svg)](https://github.com/pogjester/company-research-agent/blob/main/README.es.md) 5 | [![jp](https://img.shields.io/badge/lang-jp-orange.svg)](https://github.com/pogjester/company-research-agent/blob/main/README.jp.md) 6 | [![kr](https://img.shields.io/badge/lang-ko-purple.svg)](https://github.com/pogjester/company-research-agent/blob/main/README.kr.md) 7 | 8 | 9 | # 기업 조사 에이전트 🔍 10 | 11 | ![web ui]() 12 | 13 | 포괄적인 기업 조사 보고서를 생성하는 멀티 에이전트 도구입니다. 이 플랫폼은 AI 에이전트 파이프라인을 사용하여 모든 기업에 대한 정보를 수집, 정리 및 종합합니다. 14 | 15 | ✨온라인에서 체험해보세요! https://companyresearcher.tavily.com ✨ 16 | 17 | https://github.com/user-attachments/assets/0e373146-26a7-4391-b973-224ded3182a9 18 | 19 | ## 기능 20 | 21 | - **멀티소스 조사**: 기업 웹사이트, 뉴스 기사, 재무 보고서, 업계 분석 등 다양한 소스에서 데이터 수집 22 | - **AI 콘텐츠 필터링**: Tavily의 관련성 점수를 사용한 콘텐츠 큐레이션 23 | - **실시간 스트리밍**: WebSocket을 사용하여 조사 진행 상황과 결과를 실시간으로 스트리밍 24 | - **듀얼 모델 아키텍처**: 25 | - 대규모 컨텍스트 조사 종합을 위한 Gemini 2.0 Flash 26 | - 정밀한 보고서 형식 지정 및 편집을 위한 GPT-4.1 27 | - **모던 React 프론트엔드**: 실시간 업데이트, 진행 상황 추적, 다운로드 옵션을 갖춘 반응형 인터페이스 28 | - **모듈러 아키텍처**: 전문화된 조사 및 처리 노드 파이프라인을 중심으로 구축 29 | 30 | ## 에이전트 프레임워크 31 | 32 | ### 조사 파이프라인 33 | 34 | 이 플랫폼은 데이터를 순차적으로 처리하는 전문화된 노드를 가진 에이전트 프레임워크를 따릅니다: 35 | 36 | 1. **조사 노드**: 37 | - `CompanyAnalyzer`: 핵심 기업 정보 조사 38 | - `IndustryAnalyzer`: 시장 위치 및 트렌드 분석 39 | - `FinancialAnalyst`: 재무 지표 및 성과 데이터 수집 40 | - `NewsScanner`: 최신 뉴스 및 개발 사항 수집 41 | 42 | 2. **처리 노드**: 43 | - `Collector`: 모든 분석기에서 조사 데이터 집계 44 | - `Curator`: 콘텐츠 필터링 및 관련성 점수 매기기 구현 45 | - `Briefing`: Gemini 2.0 Flash를 사용하여 카테고리별 요약 생성 46 | - `Editor`: GPT-4.1-mini로 요약을 최종 보고서로 컴파일 및 형식 지정 47 | 48 | ![web ui]() 49 | 50 | ### 콘텐츠 생성 아키텍처 51 | 52 | 이 플랫폼은 최적의 성능을 위해 서로 다른 모델을 활용합니다: 53 | 54 | 1. **Gemini 2.0 Flash** (`briefing.py`): 55 | - 대규모 컨텍스트 조사 종합 처리 56 | - 대량의 데이터 처리 및 요약에 뛰어남 57 | - 카테고리별 초기 요약 생성에 사용 58 | - 여러 문서에 걸친 컨텍스트 유지에 효율적 59 | 60 | 2. **GPT-4.1 mini** (`editor.py`): 61 | - 정밀한 형식 지정 및 편집에 특화 62 | - Markdown 구조 및 일관성 처리 63 | - 정확한 형식 지정 지침 준수에 우수 64 | - 다음 용도로 사용: 65 | - 최종 보고서 컴파일 66 | - 콘텐츠 중복 제거 67 | - Markdown 형식 지정 68 | - 실시간 보고서 스트리밍 69 | 70 | 이 접근 방식은 Gemini의 대규모 컨텍스트 윈도우 처리 능력과 GPT-4.1-mini의 형식 지정 지침 정밀도를 결합합니다. 71 | 72 | ### 콘텐츠 큐레이션 시스템 73 | 74 | 이 플랫폼은 `curator.py`에서 콘텐츠 필터링 시스템을 사용합니다: 75 | 76 | 1. **관련성 점수 매기기**: 77 | - 문서는 Tavily의 AI 검색으로 점수가 매겨집니다 78 | - 계속 진행하려면 최소 임계값(기본값 0.4)이 필요합니다 79 | - 점수는 검색 쿼리와의 관련성을 반영합니다 80 | - 높은 점수는 검색 의도와의 더 나은 일치를 나타냅니다 81 | 82 | 2. **문서 처리**: 83 | - 콘텐츠가 정규화되고 정리됩니다 84 | - URL이 중복 제거되고 표준화됩니다 85 | - 문서가 관련성 점수로 정렬됩니다 86 | - 진행 상황 업데이트가 WebSocket을 통해 실시간으로 전송됩니다 87 | 88 | ### 실시간 통신 시스템 89 | 90 | 이 플랫폼은 WebSocket 기반 실시간 통신 시스템을 구현합니다: 91 | 92 | ![web ui]() 93 | 94 | 1. **백엔드 구현**: 95 | - FastAPI의 WebSocket 지원 사용 96 | - 조사 작업당 지속적인 연결 유지 97 | - 다양한 이벤트에 대한 구조화된 업데이트 전송: 98 | ```python 99 | await websocket_manager.send_status_update( 100 | job_id=job_id, 101 | status="processing", 102 | message=f"{category} 브리핑 생성 중", 103 | result={ 104 | "step": "Briefing", 105 | "category": category, 106 | "total_docs": len(docs) 107 | } 108 | ) 109 | ``` 110 | 111 | 2. **프론트엔드 통합**: 112 | - React 컴포넌트가 WebSocket 업데이트를 구독 113 | - 업데이트가 실시간으로 처리되고 표시됩니다 114 | - 다양한 UI 컴포넌트가 특정 업데이트 유형을 처리: 115 | - 쿼리 생성 진행 상황 116 | - 문서 큐레이션 통계 117 | - 브리핑 완료 상태 118 | - 보고서 생성 진행 상황 119 | 120 | 3. **상태 유형**: 121 | - `query_generating`: 실시간 쿼리 생성 업데이트 122 | - `document_kept`: 문서 큐레이션 진행 상황 123 | - `briefing_start/complete`: 브리핑 생성 상태 124 | - `report_chunk`: 보고서 생성 스트리밍 125 | - `curation_complete`: 최종 문서 통계 126 | 127 | ## 설정 128 | 129 | ### 빠른 설정 (권장) 130 | 131 | 시작하는 가장 쉬운 방법은 설정 스크립트를 사용하는 것입니다: 132 | 133 | 1. 저장소 클론: 134 | ```bash 135 | git clone https://github.com/pogjester/tavily-company-research.git 136 | cd tavily-company-research 137 | ``` 138 | 139 | 2. 설정 스크립트를 실행 가능하게 만들고 실행: 140 | ```bash 141 | chmod +x setup.sh 142 | ./setup.sh 143 | ``` 144 | 145 | 설정 스크립트는 다음을 수행합니다: 146 | - 필요한 Python 및 Node.js 버전 확인 147 | - Python 가상 환경 생성 (권장) 148 | - 모든 종속성 설치 (Python 및 Node.js) 149 | - 환경 변수 설정 안내 150 | - 백엔드 및 프론트엔드 서버 시작 (선택사항) 151 | 152 | 다음 API 키가 필요합니다: 153 | - Tavily API 키 154 | - Google Gemini API 키 155 | - OpenAI API 키 156 | - MongoDB URI (선택사항) 157 | 158 | ### 수동 설정 159 | 160 | 수동으로 설정하려면 다음 단계를 따르세요: 161 | 162 | 1. 저장소 클론: 163 | ```bash 164 | git clone https://github.com/pogjester/tavily-company-research.git 165 | cd tavily-company-research 166 | ``` 167 | 168 | 2. 백엔드 종속성 설치: 169 | ```bash 170 | # 선택사항: 가상 환경 생성 및 활성화 171 | python -m venv .venv 172 | source .venv/bin/activate 173 | 174 | # Python 종속성 설치 175 | pip install -r requirements.txt 176 | ``` 177 | 178 | 3. 프론트엔드 종속성 설치: 179 | ```bash 180 | cd ui 181 | npm install 182 | ``` 183 | 184 | 4. API 키가 포함된 `.env` 파일 생성: 185 | ```env 186 | TAVILY_API_KEY=your_tavily_key 187 | GEMINI_API_KEY=your_gemini_key 188 | OPENAI_API_KEY=your_openai_key 189 | 190 | # 선택사항: MongoDB 지속성 활성화 191 | # MONGODB_URI=your_mongodb_connection_string 192 | ``` 193 | 194 | ### Docker 설정 195 | 196 | 애플리케이션은 Docker 및 Docker Compose를 사용하여 실행할 수 있습니다: 197 | 198 | 1. 저장소 클론: 199 | ```bash 200 | git clone https://github.com/pogjester/tavily-company-research.git 201 | cd tavily-company-research 202 | ``` 203 | 204 | 2. API 키가 포함된 `.env` 파일 생성: 205 | ```env 206 | TAVILY_API_KEY=your_tavily_key 207 | GEMINI_API_KEY=your_gemini_key 208 | OPENAI_API_KEY=your_openai_key 209 | 210 | # 선택사항: MongoDB 지속성 활성화 211 | # MONGODB_URI=your_mongodb_connection_string 212 | ``` 213 | 214 | 3. 컨테이너 빌드 및 시작: 215 | ```bash 216 | docker compose up --build 217 | ``` 218 | 219 | 이렇게 하면 백엔드 및 프론트엔드 서비스가 시작됩니다: 220 | - 백엔드 API는 `http://localhost:8000`에서 사용 가능 221 | - 프론트엔드는 `http://localhost:5174`에서 사용 가능 222 | 223 | 서비스를 중지하려면: 224 | ```bash 225 | docker compose down 226 | ``` 227 | 228 | 참고: `.env`의 환경 변수를 업데이트할 때 컨테이너를 다시 시작해야 합니다: 229 | ```bash 230 | docker compose down && docker compose up 231 | ``` 232 | 233 | ### 애플리케이션 실행 234 | 235 | 1. 백엔드 서버 시작 (옵션 선택): 236 | ```bash 237 | # 옵션 1: 직접 Python 모듈 238 | python -m application.py 239 | 240 | # 옵션 2: Uvicorn으로 FastAPI 241 | uvicorn application:app --reload --port 8000 242 | ``` 243 | 244 | 2. 새 터미널에서 프론트엔드 시작: 245 | ```bash 246 | cd ui 247 | npm run dev 248 | ``` 249 | 250 | 3. `http://localhost:5173`에서 애플리케이션에 액세스 251 | 252 | ## 사용법 253 | 254 | ### 로컬 개발 255 | 256 | 1. 백엔드 서버 시작 (옵션 선택): 257 | 258 | **옵션 1: 직접 Python 모듈** 259 | ```bash 260 | python -m application.py 261 | ``` 262 | 263 | **옵션 2: Uvicorn으로 FastAPI** 264 | ```bash 265 | # uvicorn이 설치되지 않은 경우 설치 266 | pip install uvicorn 267 | 268 | # 핫 리로드로 FastAPI 애플리케이션 실행 269 | uvicorn application:app --reload --port 8000 270 | ``` 271 | 272 | 백엔드는 다음에서 사용 가능합니다: 273 | - API 엔드포인트: `http://localhost:8000` 274 | - WebSocket 엔드포인트: `ws://localhost:8000/research/ws/{job_id}` 275 | 276 | 2. 프론트엔드 개발 서버 시작: 277 | ```bash 278 | cd ui 279 | npm run dev 280 | ``` 281 | 282 | 3. `http://localhost:5173`에서 애플리케이션에 액세스 283 | 284 | ### 배포 옵션 285 | 286 | 애플리케이션은 다양한 클라우드 플랫폼에 배포할 수 있습니다. 몇 가지 일반적인 옵션은 다음과 같습니다: 287 | 288 | #### AWS Elastic Beanstalk 289 | 290 | 1. EB CLI 설치: 291 | ```bash 292 | pip install awsebcli 293 | ``` 294 | 295 | 2. EB 애플리케이션 초기화: 296 | ```bash 297 | eb init -p python-3.11 tavily-research 298 | ``` 299 | 300 | 3. 생성 및 배포: 301 | ```bash 302 | eb create tavily-research-prod 303 | ``` 304 | 305 | #### 기타 배포 옵션 306 | 307 | - **Docker**: 애플리케이션에는 컨테이너화된 배포를 위한 Dockerfile이 포함되어 있습니다 308 | - **Heroku**: Python 빌드팩으로 GitHub에서 직접 배포 309 | - **Google Cloud Run**: 자동 스케일링을 통한 컨테이너화된 배포에 적합 310 | 311 | 귀하의 요구 사항에 가장 적합한 플랫폼을 선택하세요. 애플리케이션은 플랫폼에 구애받지 않으며 Python 웹 애플리케이션이 지원되는 곳이면 어디든 호스팅할 수 있습니다. 312 | 313 | ## 기여 314 | 315 | 1. 저장소 포크 316 | 2. 기능 브랜치 생성 (`git checkout -b feature/amazing-feature`) 317 | 3. 변경 사항 커밋 (`git commit -m 'Add some amazing feature'`) 318 | 4. 브랜치에 푸시 (`git push origin feature/amazing-feature`) 319 | 5. Pull Request 열기 320 | 321 | ## 라이선스 322 | 323 | 이 프로젝트는 MIT 라이선스 하에 라이선스가 부여됩니다 - 자세한 내용은 [LICENSE](LICENSE) 파일을 참조하세요. 324 | 325 | ## 감사의 말 326 | 327 | - 검색 API를 제공하는 [Tavily](https://tavily.com/) 328 | - 모든 다른 오픈 소스 라이브러리와 그 기여자들 -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | [![en](https://img.shields.io/badge/lang-en-red.svg)](https://github.com/pogjester/company-research-agent/blob/main/README.md) 2 | [![zh](https://img.shields.io/badge/lang-zh-green.svg)](https://github.com/pogjester/company-research-agent/blob/main/README.zh.md) 3 | [![fr](https://img.shields.io/badge/lang-fr-blue.svg)](https://github.com/pogjester/company-research-agent/blob/main/README.fr.md) 4 | [![es](https://img.shields.io/badge/lang-es-yellow.svg)](https://github.com/pogjester/company-research-agent/blob/main/README.es.md) 5 | [![jp](https://img.shields.io/badge/lang-jp-orange.svg)](https://github.com/pogjester/company-research-agent/blob/main/README.jp.md) 6 | [![kr](https://img.shields.io/badge/lang-ko-purple.svg)](https://github.com/pogjester/company-research-agent/blob/main/README.kr.md) 7 | 8 | 9 | # Agentic Company Researcher 🔍 10 | 11 | ![web ui]() 12 | 13 | A multi-agent tool that generates comprehensive company research reports. The platform uses a pipeline of AI agents to gather, curate, and synthesize information about any company. 14 | 15 | ✨Check it out online! https://companyresearcher.tavily.com ✨ 16 | 17 | https://github.com/user-attachments/assets/0e373146-26a7-4391-b973-224ded3182a9 18 | 19 | ## Features 20 | 21 | - **Multi-Source Research**: Gathers data from various sources including company websites, news articles, financial reports, and industry analyses 22 | - **AI-Powered Content Filtering**: Uses Tavily's relevance scoring for content curation 23 | - **Real-Time Progress Streaming**: Uses WebSocket connections to stream research progress and results 24 | - **Dual Model Architecture**: 25 | - Gemini 2.0 Flash for high-context research synthesis 26 | - GPT-4.1 for precise report formatting and editing 27 | - **Modern React Frontend**: Responsive UI with real-time updates, progress tracking, and download options 28 | - **Modular Architecture**: Built using a pipeline of specialized research and processing nodes 29 | 30 | ## Agent Framework 31 | 32 | ### Research Pipeline 33 | 34 | The platform follows an agentic framework with specialized nodes that process data sequentially: 35 | 36 | 1. **Research Nodes**: 37 | - `CompanyAnalyzer`: Researches core business information 38 | - `IndustryAnalyzer`: Analyzes market position and trends 39 | - `FinancialAnalyst`: Gathers financial metrics and performance data 40 | - `NewsScanner`: Collects recent news and developments 41 | 42 | 2. **Processing Nodes**: 43 | - `Collector`: Aggregates research data from all analyzers 44 | - `Curator`: Implements content filtering and relevance scoring 45 | - `Briefing`: Generates category-specific summaries using Gemini 2.0 Flash 46 | - `Editor`: Compiles and formats the briefings into a final report using GPT-4.1-mini 47 | 48 | ![web ui]() 49 | 50 | ### Content Generation Architecture 51 | 52 | The platform leverages separate models for optimal performance: 53 | 54 | 1. **Gemini 2.0 Flash** (`briefing.py`): 55 | - Handles high-context research synthesis tasks 56 | - Excels at processing and summarizing large volumes of data 57 | - Used for generating initial category briefings 58 | - Efficient at maintaining context across multiple documents 59 | 60 | 2. **GPT-4.1 mini** (`editor.py`): 61 | - Specializes in precise formatting and editing tasks 62 | - Handles markdown structure and consistency 63 | - Superior at following exact formatting instructions 64 | - Used for: 65 | - Final report compilation 66 | - Content deduplication 67 | - Markdown formatting 68 | - Real-time report streaming 69 | 70 | This approach combines Gemini's strength in handling large context windows with GPT-4.1-mini's precision in following specific formatting instructions. 71 | 72 | ### Content Curation System 73 | 74 | The platform uses a content filtering system in `curator.py`: 75 | 76 | 1. **Relevance Scoring**: 77 | - Documents are scored by Tavily's AI-powered search 78 | - A minimum threshold (default 0.4) is required to proceed 79 | - Scores reflect relevance to the specific research query 80 | - Higher scores indicate better matches to the research intent 81 | 82 | 2. **Document Processing**: 83 | - Content is normalized and cleaned 84 | - URLs are deduplicated and standardized 85 | - Documents are sorted by relevance scores 86 | - Real-time progress updates are sent via WebSocket 87 | 88 | ### Real-Time Communication System 89 | 90 | The platform implements a WebSocket-based real-time communication system: 91 | 92 | ![web ui]() 93 | 94 | 1. **Backend Implementation**: 95 | - Uses FastAPI's WebSocket support 96 | - Maintains persistent connections per research job 97 | - Sends structured status updates for various events: 98 | ```python 99 | await websocket_manager.send_status_update( 100 | job_id=job_id, 101 | status="processing", 102 | message=f"Generating {category} briefing", 103 | result={ 104 | "step": "Briefing", 105 | "category": category, 106 | "total_docs": len(docs) 107 | } 108 | ) 109 | ``` 110 | 111 | 2. **Frontend Integration**: 112 | - React components subscribe to WebSocket updates 113 | - Updates are processed and displayed in real-time 114 | - Different UI components handle specific update types: 115 | - Query generation progress 116 | - Document curation statistics 117 | - Briefing completion status 118 | - Report generation progress 119 | 120 | 3. **Status Types**: 121 | - `query_generating`: Real-time query creation updates 122 | - `document_kept`: Document curation progress 123 | - `briefing_start/complete`: Briefing generation status 124 | - `report_chunk`: Streaming report generation 125 | - `curation_complete`: Final document statistics 126 | 127 | ## Setup 128 | 129 | ### Quick Setup (Recommended) 130 | 131 | The easiest way to get started is using the setup script: 132 | 133 | 1. Clone the repository: 134 | ```bash 135 | git clone https://github.com/pogjester/tavily-company-research.git 136 | cd tavily-company-research 137 | ``` 138 | 139 | 2. Make the setup script executable and run it: 140 | ```bash 141 | chmod +x setup.sh 142 | ./setup.sh 143 | ``` 144 | 145 | The setup script will: 146 | - Check for required Python and Node.js versions 147 | - Optionally create a Python virtual environment (recommended) 148 | - Install all dependencies (Python and Node.js) 149 | - Guide you through setting up your environment variables 150 | - Optionally start both backend and frontend servers 151 | 152 | You'll need the following API keys ready: 153 | - Tavily API Key 154 | - Google Gemini API Key 155 | - OpenAI API Key 156 | - MongoDB URI (optional) 157 | 158 | ### Manual Setup 159 | 160 | If you prefer to set up manually, follow these steps: 161 | 162 | 1. Clone the repository: 163 | ```bash 164 | git clone https://github.com/pogjester/tavily-company-research.git 165 | cd tavily-company-research 166 | ``` 167 | 168 | 2. Install backend dependencies: 169 | ```bash 170 | # Optional: Create and activate virtual environment 171 | python -m venv .venv 172 | source .venv/bin/activate 173 | 174 | # Install Python dependencies 175 | pip install -r requirements.txt 176 | ``` 177 | 178 | 3. Install frontend dependencies: 179 | ```bash 180 | cd ui 181 | npm install 182 | ``` 183 | 184 | 4. Create a `.env` file with your API keys: 185 | ```env 186 | TAVILY_API_KEY=your_tavily_key 187 | GEMINI_API_KEY=your_gemini_key 188 | OPENAI_API_KEY=your_openai_key 189 | 190 | # Optional: Enable MongoDB persistence 191 | # MONGODB_URI=your_mongodb_connection_string 192 | ``` 193 | 194 | ### Docker Setup 195 | 196 | The application can be run using Docker and Docker Compose: 197 | 198 | 1. Clone the repository: 199 | ```bash 200 | git clone https://github.com/pogjester/tavily-company-research.git 201 | cd tavily-company-research 202 | ``` 203 | 204 | 2. Create a `.env` file with your API keys: 205 | ```env 206 | TAVILY_API_KEY=your_tavily_key 207 | GEMINI_API_KEY=your_gemini_key 208 | OPENAI_API_KEY=your_openai_key 209 | 210 | # Optional: Enable MongoDB persistence 211 | # MONGODB_URI=your_mongodb_connection_string 212 | ``` 213 | 214 | 3. Build and start the containers: 215 | ```bash 216 | docker compose up --build 217 | ``` 218 | 219 | This will start both the backend and frontend services: 220 | - Backend API will be available at `http://localhost:8000` 221 | - Frontend will be available at `http://localhost:5174` 222 | 223 | To stop the services: 224 | ```bash 225 | docker compose down 226 | ``` 227 | 228 | Note: When updating environment variables in `.env`, you'll need to restart the containers: 229 | ```bash 230 | docker compose down && docker compose up 231 | ``` 232 | 233 | ### Running the Application 234 | 235 | 1. Start the backend server (choose one): 236 | ```bash 237 | # Option 1: Direct Python Module 238 | python -m application.py 239 | 240 | # Option 2: FastAPI with Uvicorn 241 | uvicorn application:app --reload --port 8000 242 | ``` 243 | 244 | 2. In a new terminal, start the frontend: 245 | ```bash 246 | cd ui 247 | npm run dev 248 | ``` 249 | 250 | 3. Access the application at `http://localhost:5173` 251 | 252 | ## Usage 253 | 254 | ### Local Development 255 | 256 | 1. Start the backend server (choose one option): 257 | 258 | **Option 1: Direct Python Module** 259 | ```bash 260 | python -m application.py 261 | ``` 262 | 263 | **Option 2: FastAPI with Uvicorn** 264 | ```bash 265 | # Install uvicorn if not already installed 266 | pip install uvicorn 267 | 268 | # Run the FastAPI application with hot reload 269 | uvicorn application:app --reload --port 8000 270 | ``` 271 | 272 | The backend will be available at: 273 | - API Endpoint: `http://localhost:8000` 274 | - WebSocket Endpoint: `ws://localhost:8000/research/ws/{job_id}` 275 | 276 | 2. Start the frontend development server: 277 | ```bash 278 | cd ui 279 | npm run dev 280 | ``` 281 | 282 | 3. Access the application at `http://localhost:5173` 283 | 284 | ### Deployment Options 285 | 286 | The application can be deployed to various cloud platforms. Here are some common options: 287 | 288 | #### AWS Elastic Beanstalk 289 | 290 | 1. Install the EB CLI: 291 | ```bash 292 | pip install awsebcli 293 | ``` 294 | 295 | 2. Initialize EB application: 296 | ```bash 297 | eb init -p python-3.11 tavily-research 298 | ``` 299 | 300 | 3. Create and deploy: 301 | ```bash 302 | eb create tavily-research-prod 303 | ``` 304 | 305 | #### Other Deployment Options 306 | 307 | - **Docker**: The application includes a Dockerfile for containerized deployment 308 | - **Heroku**: Deploy directly from GitHub with the Python buildpack 309 | - **Google Cloud Run**: Suitable for containerized deployment with automatic scaling 310 | 311 | Choose the platform that best suits your needs. The application is platform-agnostic and can be hosted anywhere that supports Python web applications. 312 | 313 | ## Contributing 314 | 315 | 1. Fork the repository 316 | 2. Create a feature branch (`git checkout -b feature/amazing-feature`) 317 | 3. Commit your changes (`git commit -m 'Add amazing feature'`) 318 | 4. Push to the branch (`git push origin feature/amazing-feature`) 319 | 5. Open a Pull Request 320 | 321 | ## License 322 | 323 | This project is licensed under the MIT License - see the [LICENSE](LICENSE) file for details. 324 | 325 | ## Acknowledgments 326 | 327 | - [Tavily](https://tavily.com/) for the research API 328 | - All other open-source libraries and their contributors 329 | -------------------------------------------------------------------------------- /README.zh.md: -------------------------------------------------------------------------------- 1 | [![en](https://img.shields.io/badge/lang-en-red.svg)](https://github.com/pogjester/company-research-agent/blob/main/README.md) 2 | [![zh](https://img.shields.io/badge/lang-zh-green.svg)](https://github.com/pogjester/company-research-agent/blob/main/README.zh.md) 3 | [![fr](https://img.shields.io/badge/lang-fr-blue.svg)](https://github.com/pogjester/company-research-agent/blob/main/README.fr.md) 4 | [![es](https://img.shields.io/badge/lang-es-yellow.svg)](https://github.com/pogjester/company-research-agent/blob/main/README.es.md) 5 | [![jp](https://img.shields.io/badge/lang-jp-orange.svg)](https://github.com/pogjester/company-research-agent/blob/main/README.jp.md) 6 | [![kr](https://img.shields.io/badge/lang-ko-purple.svg)](https://github.com/pogjester/company-research-agent/blob/main/README.kr.md) 7 | 8 | 9 | # 智能公司研究助手 🔍 10 | 11 | ![web ui]() 12 | 13 | 一个多智能体工具,用于生成全面的公司研究报告。该平台使用一系列AI智能体来收集、整理和综合任何公司的信息。 14 | 15 | ✨快来看看吧![https://companyresearcher.tavily.com](https://companyresearcher.tavily.com) ✨ 16 | 17 | https://github.com/user-attachments/assets/071aa491-009b-4d76-a698-88863149e71c 18 | 19 | ## 功能特点 20 | 21 | - **多源研究**:从公司网站、新闻文章、财务报告和行业分析等多个来源收集数据 22 | - **AI驱动的内容过滤**:使用Tavily的相关性评分进行内容筛选 23 | - **实时进度流**:使用WebSocket连接流式传输研究进度和结果 24 | - **双模型架构**: 25 | - Gemini 2.0 Flash用于高上下文研究综合 26 | - GPT-4.1用于精确的报告格式化和编辑 27 | - **现代React前端**:具有实时更新、进度跟踪和下载选项的响应式UI 28 | - **模块化架构**:使用专业研究和处理节点构建的管道 29 | 30 | ## 智能体框架 31 | 32 | ### 研究管道 33 | 34 | 该平台遵循智能体框架,使用专门的节点按顺序处理数据: 35 | 36 | 1. **研究节点**: 37 | - `CompanyAnalyzer`:研究核心业务信息 38 | - `IndustryAnalyzer`:分析市场定位和趋势 39 | - `FinancialAnalyst`:收集财务指标和业绩数据 40 | - `NewsScanner`:收集最新新闻和发展动态 41 | 42 | 2. **处理节点**: 43 | - `Collector`:汇总所有分析器的研究数据 44 | - `Curator`:实现内容过滤和相关性评分 45 | - `Briefing`:使用Gemini 2.0 Flash生成特定类别的摘要 46 | - `Editor`:使用GPT-4.1-mini将简报编译和格式化为最终报告 47 | 48 | ![web ui]() 49 | 50 | ### 内容生成架构 51 | 52 | 该平台利用不同的模型以获得最佳性能: 53 | 54 | 1. **Gemini 2.0 Flash**(`briefing.py`): 55 | - 处理高上下文研究综合任务 56 | - 擅长处理和总结大量数据 57 | - 用于生成初始类别简报 58 | - 在多个文档之间高效维护上下文 59 | 60 | 2. **GPT-4.1 mini**(`editor.py`): 61 | - 专注于精确的格式化和编辑任务 62 | - 处理markdown结构和一致性 63 | - 在遵循精确格式说明方面表现出色 64 | - 用于: 65 | - 最终报告编译 66 | - 内容去重 67 | - Markdown格式化 68 | - 实时报告流式传输 69 | 70 | 这种方法结合了Gemini处理大上下文窗口的优势和GPT-4.1-mini在遵循特定格式说明方面的精确性。 71 | 72 | ### 内容筛选系统 73 | 74 | 该平台在`curator.py`中使用内容过滤系统: 75 | 76 | 1. **相关性评分**: 77 | - 文档由Tavily的AI驱动搜索进行评分 78 | - 需要达到最低阈值(默认0.4)才能继续 79 | - 分数反映与特定研究查询的相关性 80 | - 更高的分数表示与研究意图更好的匹配 81 | 82 | 2. **文档处理**: 83 | - 内容被标准化和清理 84 | - URL被去重和标准化 85 | - 文档按相关性分数排序 86 | - 通过WebSocket发送实时进度更新 87 | 88 | ### 实时通信系统 89 | 90 | 该平台实现了基于WebSocket的实时通信系统: 91 | 92 | ![web ui]() 93 | 94 | 1. **后端实现**: 95 | - 使用FastAPI的WebSocket支持 96 | - 为每个研究任务维护持久连接 97 | - 发送各种事件的结构化状态更新: 98 | ```python 99 | await websocket_manager.send_status_update( 100 | job_id=job_id, 101 | status="processing", 102 | message=f"Generating {category} briefing", 103 | result={ 104 | "step": "Briefing", 105 | "category": category, 106 | "total_docs": len(docs) 107 | } 108 | ) 109 | ``` 110 | 111 | 2. **前端集成**: 112 | - React组件订阅WebSocket更新 113 | - 实时处理和显示更新 114 | - 不同的UI组件处理特定类型的更新: 115 | - 查询生成进度 116 | - 文档筛选统计 117 | - 简报完成状态 118 | - 报告生成进度 119 | 120 | 3. **状态类型**: 121 | - `query_generating`:实时查询创建更新 122 | - `document_kept`:文档筛选进度 123 | - `briefing_start/complete`:简报生成状态 124 | - `report_chunk`:流式报告生成 125 | - `curation_complete`:最终文档统计 126 | 127 | ## 安装设置 128 | 129 | ### 快速安装(推荐) 130 | 131 | 最简单的方法是使用安装脚本: 132 | 133 | 1. 克隆仓库: 134 | ```bash 135 | git clone https://github.com/pogjester/tavily-company-research.git 136 | cd tavily-company-research 137 | ``` 138 | 139 | 2. 使安装脚本可执行并运行: 140 | ```bash 141 | chmod +x setup.sh 142 | ./setup.sh 143 | ``` 144 | 145 | 安装脚本将: 146 | - 检查所需的Python和Node.js版本 147 | - 可选创建Python虚拟环境(推荐) 148 | - 安装所有依赖(Python和Node.js) 149 | - 指导您设置环境变量 150 | - 可选启动后端和前端服务器 151 | 152 | 您需要准备以下API密钥: 153 | - Tavily API密钥 154 | - Google Gemini API密钥 155 | - OpenAI API密钥 156 | - MongoDB URI(可选) 157 | 158 | ### 手动安装 159 | 160 | 如果您更喜欢手动安装,请按照以下步骤操作: 161 | 162 | 1. 克隆仓库: 163 | ```bash 164 | git clone https://github.com/pogjester/tavily-company-research.git 165 | cd tavily-company-research 166 | ``` 167 | 168 | 2. 安装后端依赖: 169 | ```bash 170 | # 可选:创建并激活虚拟环境 171 | python -m venv .venv 172 | source .venv/bin/activate 173 | 174 | # 安装Python依赖 175 | pip install -r requirements.txt 176 | ``` 177 | 178 | 3. 安装前端依赖: 179 | ```bash 180 | cd ui 181 | npm install 182 | ``` 183 | 184 | 4. 创建包含API密钥的`.env`文件: 185 | ```env 186 | TAVILY_API_KEY=your_tavily_key 187 | GEMINI_API_KEY=your_gemini_key 188 | OPENAI_API_KEY=your_openai_key 189 | 190 | # 可选:启用MongoDB持久化 191 | # MONGODB_URI=your_mongodb_connection_string 192 | ``` 193 | 194 | ### Docker安装 195 | 196 | 可以使用Docker和Docker Compose运行应用程序: 197 | 198 | 1. 克隆仓库: 199 | ```bash 200 | git clone https://github.com/pogjester/tavily-company-research.git 201 | cd tavily-company-research 202 | ``` 203 | 204 | 2. 创建包含API密钥的`.env`文件: 205 | ```env 206 | TAVILY_API_KEY=your_tavily_key 207 | GEMINI_API_KEY=your_gemini_key 208 | OPENAI_API_KEY=your_openai_key 209 | 210 | # 可选:启用MongoDB持久化 211 | # MONGODB_URI=your_mongodb_connection_string 212 | ``` 213 | 214 | 3. 构建并启动容器: 215 | ```bash 216 | docker compose up --build 217 | ``` 218 | 219 | 这将启动后端和前端服务: 220 | - 后端API将在`http://localhost:8000`可用 221 | - 前端将在`http://localhost:5174`可用 222 | 223 | 停止服务: 224 | ```bash 225 | docker compose down 226 | ``` 227 | 228 | 注意:更新`.env`中的环境变量时,需要重启容器: 229 | ```bash 230 | docker compose down && docker compose up 231 | ``` 232 | 233 | ### 运行应用程序 234 | 235 | 1. 启动后端服务器(选择一种方式): 236 | ```bash 237 | # 选项1:直接Python模块 238 | python -m application.py 239 | 240 | # 选项2:使用Uvicorn的FastAPI 241 | uvicorn application:app --reload --port 8000 242 | ``` 243 | 244 | 2. 在新终端中启动前端: 245 | ```bash 246 | cd ui 247 | npm run dev 248 | ``` 249 | 250 | 3. 在`http://localhost:5173`访问应用程序 251 | 252 | ## 使用方法 253 | 254 | ### 本地开发 255 | 256 | 1. 启动后端服务器(选择一个选项): 257 | 258 | **选项1:直接Python模块** 259 | ```bash 260 | python -m application.py 261 | ``` 262 | 263 | **选项2:使用Uvicorn的FastAPI** 264 | ```bash 265 | # 如果尚未安装,安装uvicorn 266 | pip install uvicorn 267 | 268 | # 使用热重载运行FastAPI应用 269 | uvicorn application:app --reload --port 8000 270 | ``` 271 | 272 | 后端将在以下位置可用: 273 | - API端点:`http://localhost:8000` 274 | - WebSocket端点:`ws://localhost:8000/research/ws/{job_id}` 275 | 276 | 2. 启动前端开发服务器: 277 | ```bash 278 | cd ui 279 | npm run dev 280 | ``` 281 | 282 | 3. 在`http://localhost:5173`访问应用程序 283 | 284 | ### 部署选项 285 | 286 | 该应用程序可以部署到各种云平台。以下是一些常见选项: 287 | 288 | #### AWS Elastic Beanstalk 289 | 290 | 1. 安装EB CLI: 291 | ```bash 292 | pip install awsebcli 293 | ``` 294 | 295 | 2. 初始化EB应用: 296 | ```bash 297 | eb init -p python-3.11 tavily-research 298 | ``` 299 | 300 | 3. 创建并部署: 301 | ```bash 302 | eb create tavily-research-prod 303 | ``` 304 | 305 | #### 其他部署选项 306 | 307 | - **Docker**:应用程序包含用于容器化部署的Dockerfile 308 | - **Heroku**:使用Python构建包直接从GitHub部署 309 | - **Google Cloud Run**:适用于具有自动扩展功能的容器化部署 310 | 311 | 选择最适合您需求的平台。该应用程序是平台无关的,可以托管在任何支持Python Web应用程序的地方。 312 | 313 | ## 贡献 314 | 315 | 1. Fork仓库 316 | 2. 创建特性分支(`git checkout -b feature/amazing-feature`) 317 | 3. 提交更改(`git commit -m 'Add amazing feature'`) 318 | 4. 推送到分支(`git push origin feature/amazing-feature`) 319 | 5. 打开Pull Request 320 | 321 | ## 许可证 322 | 323 | 本项目采用MIT许可证 - 详情请参阅[LICENSE](LICENSE)文件。 324 | 325 | ## 致谢 326 | 327 | - [Tavily](https://tavily.com/)提供研究API 328 | - 所有其他开源库及其贡献者 329 | -------------------------------------------------------------------------------- /application.py: -------------------------------------------------------------------------------- 1 | import asyncio 2 | import logging 3 | import os 4 | import uuid 5 | from collections import defaultdict 6 | from datetime import datetime 7 | from pathlib import Path 8 | 9 | import uvicorn 10 | from dotenv import load_dotenv 11 | from fastapi import FastAPI, HTTPException, WebSocket, WebSocketDisconnect 12 | from fastapi.middleware.cors import CORSMiddleware 13 | from fastapi.responses import FileResponse, JSONResponse, StreamingResponse 14 | from pydantic import BaseModel 15 | 16 | from backend.graph import Graph 17 | from backend.services.mongodb import MongoDBService 18 | from backend.services.pdf_service import PDFService 19 | from backend.services.websocket_manager import WebSocketManager 20 | 21 | # Load environment variables from .env file at startup 22 | env_path = Path(__file__).parent / '.env' 23 | if env_path.exists(): 24 | load_dotenv(dotenv_path=env_path, override=True) 25 | 26 | # Configure logging 27 | logger = logging.getLogger() 28 | logger.setLevel(logging.INFO) 29 | console_handler = logging.StreamHandler() 30 | logger.addHandler(console_handler) 31 | 32 | app = FastAPI(title="Tavily Company Research API") 33 | 34 | app.add_middleware( 35 | CORSMiddleware, 36 | allow_origins=["*"], 37 | allow_credentials=True, 38 | allow_methods=["GET", "POST", "OPTIONS"], 39 | allow_headers=["*"], 40 | ) 41 | 42 | manager = WebSocketManager() 43 | pdf_service = PDFService({"pdf_output_dir": "pdfs"}) 44 | 45 | job_status = defaultdict(lambda: { 46 | "status": "pending", 47 | "result": None, 48 | "error": None, 49 | "debug_info": [], 50 | "company": None, 51 | "report": None, 52 | "last_update": datetime.now().isoformat() 53 | }) 54 | 55 | mongodb = None 56 | if mongo_uri := os.getenv("MONGODB_URI"): 57 | try: 58 | mongodb = MongoDBService(mongo_uri) 59 | logger.info("MongoDB integration enabled") 60 | except Exception as e: 61 | logger.warning(f"Failed to initialize MongoDB: {e}. Continuing without persistence.") 62 | 63 | class ResearchRequest(BaseModel): 64 | company: str 65 | company_url: str | None = None 66 | industry: str | None = None 67 | hq_location: str | None = None 68 | 69 | class PDFGenerationRequest(BaseModel): 70 | report_content: str 71 | company_name: str | None = None 72 | 73 | @app.options("/research") 74 | async def preflight(): 75 | response = JSONResponse(content=None, status_code=200) 76 | response.headers["Access-Control-Allow-Origin"] = "*" 77 | response.headers["Access-Control-Allow-Methods"] = "POST, OPTIONS" 78 | response.headers["Access-Control-Allow-Headers"] = "Content-Type, Authorization" 79 | return response 80 | 81 | @app.post("/research") 82 | async def research(data: ResearchRequest): 83 | try: 84 | logger.info(f"Received research request for {data.company}") 85 | job_id = str(uuid.uuid4()) 86 | asyncio.create_task(process_research(job_id, data)) 87 | 88 | response = JSONResponse(content={ 89 | "status": "accepted", 90 | "job_id": job_id, 91 | "message": "Research started. Connect to WebSocket for updates.", 92 | "websocket_url": f"/research/ws/{job_id}" 93 | }) 94 | response.headers["Access-Control-Allow-Origin"] = "*" 95 | response.headers["Access-Control-Allow-Methods"] = "POST, OPTIONS" 96 | response.headers["Access-Control-Allow-Headers"] = "Content-Type, Authorization" 97 | return response 98 | 99 | except Exception as e: 100 | logger.error(f"Error initiating research: {str(e)}", exc_info=True) 101 | raise HTTPException(status_code=500, detail=str(e)) 102 | 103 | async def process_research(job_id: str, data: ResearchRequest): 104 | try: 105 | if mongodb: 106 | mongodb.create_job(job_id, data.dict()) 107 | await asyncio.sleep(1) # Allow WebSocket connection 108 | 109 | await manager.send_status_update(job_id, status="processing", message="Starting research") 110 | 111 | graph = Graph( 112 | company=data.company, 113 | url=data.company_url, 114 | industry=data.industry, 115 | hq_location=data.hq_location, 116 | websocket_manager=manager, 117 | job_id=job_id 118 | ) 119 | 120 | state = {} 121 | async for s in graph.run(thread={}): 122 | state.update(s) 123 | 124 | # Look for the compiled report in either location. 125 | report_content = state.get('report') or (state.get('editor') or {}).get('report') 126 | if report_content: 127 | logger.info(f"Found report in final state (length: {len(report_content)})") 128 | job_status[job_id].update({ 129 | "status": "completed", 130 | "report": report_content, 131 | "company": data.company, 132 | "last_update": datetime.now().isoformat() 133 | }) 134 | if mongodb: 135 | mongodb.update_job(job_id=job_id, status="completed") 136 | mongodb.store_report(job_id=job_id, report_data={"report": report_content}) 137 | await manager.send_status_update( 138 | job_id=job_id, 139 | status="completed", 140 | message="Research completed successfully", 141 | result={ 142 | "report": report_content, 143 | "company": data.company 144 | } 145 | ) 146 | else: 147 | logger.error(f"Research completed without finding report. State keys: {list(state.keys())}") 148 | logger.error(f"Editor state: {state.get('editor', {})}") 149 | 150 | # Check if there was a specific error in the state 151 | error_message = "No report found" 152 | if error := state.get('error'): 153 | error_message = f"Error: {error}" 154 | 155 | await manager.send_status_update( 156 | job_id=job_id, 157 | status="failed", 158 | message="Research completed but no report was generated", 159 | error=error_message 160 | ) 161 | 162 | except Exception as e: 163 | logger.error(f"Research failed: {str(e)}") 164 | await manager.send_status_update( 165 | job_id=job_id, 166 | status="failed", 167 | message=f"Research failed: {str(e)}", 168 | error=str(e) 169 | ) 170 | if mongodb: 171 | mongodb.update_job(job_id=job_id, status="failed", error=str(e)) 172 | @app.get("/") 173 | async def ping(): 174 | return {"message": "Alive"} 175 | 176 | @app.get("/research/pdf/{filename}") 177 | async def get_pdf(filename: str): 178 | pdf_path = os.path.join("pdfs", filename) 179 | if not os.path.exists(pdf_path): 180 | raise HTTPException(status_code=404, detail="PDF not found") 181 | return FileResponse(pdf_path, media_type='application/pdf', filename=filename) 182 | 183 | @app.websocket("/research/ws/{job_id}") 184 | async def websocket_endpoint(websocket: WebSocket, job_id: str): 185 | try: 186 | await websocket.accept() 187 | await manager.connect(websocket, job_id) 188 | 189 | if job_id in job_status: 190 | status = job_status[job_id] 191 | await manager.send_status_update( 192 | job_id, 193 | status=status["status"], 194 | message="Connected to status stream", 195 | error=status["error"], 196 | result=status["result"] 197 | ) 198 | 199 | while True: 200 | try: 201 | await websocket.receive_text() 202 | except WebSocketDisconnect: 203 | manager.disconnect(websocket, job_id) 204 | break 205 | 206 | except Exception as e: 207 | logger.error(f"WebSocket error for job {job_id}: {str(e)}", exc_info=True) 208 | manager.disconnect(websocket, job_id) 209 | 210 | @app.get("/research/{job_id}") 211 | async def get_research(job_id: str): 212 | if not mongodb: 213 | raise HTTPException(status_code=501, detail="Database persistence not configured") 214 | job = mongodb.get_job(job_id) 215 | if not job: 216 | raise HTTPException(status_code=404, detail="Research job not found") 217 | return job 218 | 219 | @app.get("/research/{job_id}/report") 220 | async def get_research_report(job_id: str): 221 | if not mongodb: 222 | if job_id in job_status: 223 | result = job_status[job_id] 224 | if report := result.get("report"): 225 | return {"report": report} 226 | raise HTTPException(status_code=404, detail="Report not found") 227 | 228 | report = mongodb.get_report(job_id) 229 | if not report: 230 | raise HTTPException(status_code=404, detail="Research report not found") 231 | return report 232 | 233 | @app.post("/generate-pdf") 234 | async def generate_pdf(data: PDFGenerationRequest): 235 | """Generate a PDF from markdown content and stream it to the client.""" 236 | try: 237 | success, result = pdf_service.generate_pdf_stream(data.report_content, data.company_name) 238 | if success: 239 | pdf_buffer, filename = result 240 | return StreamingResponse( 241 | pdf_buffer, 242 | media_type='application/pdf', 243 | headers={ 244 | 'Content-Disposition': f'attachment; filename="{filename}"' 245 | } 246 | ) 247 | else: 248 | raise HTTPException(status_code=500, detail=result) 249 | except Exception as e: 250 | raise HTTPException(status_code=500, detail=str(e)) 251 | 252 | if __name__ == "__main__": 253 | uvicorn.run(app, host="0.0.0.0", port=8000) -------------------------------------------------------------------------------- /backend/__init__.py: -------------------------------------------------------------------------------- 1 | """Backend package for tavily-company-research.""" 2 | 3 | import os 4 | import sys 5 | from pathlib import Path 6 | import logging 7 | from dotenv import load_dotenv 8 | 9 | # Set up logging 10 | logger = logging.getLogger(__name__) 11 | 12 | # Load environment variables from .env file 13 | env_path = Path(__file__).parent.parent / '.env' 14 | if env_path.exists(): 15 | logger.info(f"Loading environment variables from {env_path}") 16 | load_dotenv(dotenv_path=env_path, override=True) 17 | else: 18 | logger.warning(f".env file not found at {env_path}. Using system environment variables.") 19 | 20 | # Check for critical environment variables 21 | if not os.getenv("TAVILY_API_KEY"): 22 | logger.warning("TAVILY_API_KEY environment variable is not set.") 23 | 24 | if not os.getenv("OPENAI_API_KEY"): 25 | logger.warning("OPENAI_API_KEY environment variable is not set.") 26 | 27 | if not os.getenv("GEMINI_API_KEY"): 28 | logger.warning("GEMINI_API_KEY environment variable is not set.") 29 | 30 | from .graph import Graph 31 | 32 | __all__ = ["Graph"] 33 | -------------------------------------------------------------------------------- /backend/classes/__init__.py: -------------------------------------------------------------------------------- 1 | from .state import InputState, ResearchState 2 | 3 | __all__ = ["InputState", "ResearchState"] -------------------------------------------------------------------------------- /backend/classes/state.py: -------------------------------------------------------------------------------- 1 | from typing import TypedDict, NotRequired, Required, Dict, List, Any 2 | from backend.services.websocket_manager import WebSocketManager 3 | 4 | #Define the input state 5 | class InputState(TypedDict, total=False): 6 | company: Required[str] 7 | company_url: NotRequired[str] 8 | hq_location: NotRequired[str] 9 | industry: NotRequired[str] 10 | websocket_manager: NotRequired[WebSocketManager] 11 | job_id: NotRequired[str] 12 | 13 | class ResearchState(InputState): 14 | site_scrape: Dict[str, Any] 15 | messages: List[Any] 16 | financial_data: Dict[str, Any] 17 | news_data: Dict[str, Any] 18 | industry_data: Dict[str, Any] 19 | company_data: Dict[str, Any] 20 | curated_financial_data: Dict[str, Any] 21 | curated_news_data: Dict[str, Any] 22 | curated_industry_data: Dict[str, Any] 23 | curated_company_data: Dict[str, Any] 24 | financial_briefing: str 25 | news_briefing: str 26 | industry_briefing: str 27 | company_briefing: str 28 | references: List[str] 29 | briefings: Dict[str, Any] 30 | report: str -------------------------------------------------------------------------------- /backend/graph.py: -------------------------------------------------------------------------------- 1 | import logging 2 | from typing import Any, AsyncIterator, Dict 3 | 4 | from langchain_core.messages import SystemMessage 5 | from langgraph.graph import StateGraph 6 | 7 | from .classes.state import InputState 8 | from .nodes import GroundingNode 9 | from .nodes.briefing import Briefing 10 | from .nodes.collector import Collector 11 | from .nodes.curator import Curator 12 | from .nodes.editor import Editor 13 | from .nodes.enricher import Enricher 14 | from .nodes.researchers import ( 15 | CompanyAnalyzer, 16 | FinancialAnalyst, 17 | IndustryAnalyzer, 18 | NewsScanner, 19 | ) 20 | 21 | logger = logging.getLogger(__name__) 22 | 23 | class Graph: 24 | def __init__(self, company=None, url=None, hq_location=None, industry=None, 25 | websocket_manager=None, job_id=None): 26 | self.websocket_manager = websocket_manager 27 | self.job_id = job_id 28 | 29 | # Initialize InputState 30 | self.input_state = InputState( 31 | company=company, 32 | company_url=url, 33 | hq_location=hq_location, 34 | industry=industry, 35 | websocket_manager=websocket_manager, 36 | job_id=job_id, 37 | messages=[ 38 | SystemMessage(content="Expert researcher starting investigation") 39 | ] 40 | ) 41 | 42 | # Initialize nodes with WebSocket manager and job ID 43 | self._init_nodes() 44 | self._build_workflow() 45 | 46 | def _init_nodes(self): 47 | """Initialize all workflow nodes""" 48 | self.ground = GroundingNode() 49 | self.financial_analyst = FinancialAnalyst() 50 | self.news_scanner = NewsScanner() 51 | self.industry_analyst = IndustryAnalyzer() 52 | self.company_analyst = CompanyAnalyzer() 53 | self.collector = Collector() 54 | self.curator = Curator() 55 | self.enricher = Enricher() 56 | self.briefing = Briefing() 57 | self.editor = Editor() 58 | 59 | def _build_workflow(self): 60 | """Configure the state graph workflow""" 61 | self.workflow = StateGraph(InputState) 62 | 63 | # Add nodes with their respective processing functions 64 | self.workflow.add_node("grounding", self.ground.run) 65 | self.workflow.add_node("financial_analyst", self.financial_analyst.run) 66 | self.workflow.add_node("news_scanner", self.news_scanner.run) 67 | self.workflow.add_node("industry_analyst", self.industry_analyst.run) 68 | self.workflow.add_node("company_analyst", self.company_analyst.run) 69 | self.workflow.add_node("collector", self.collector.run) 70 | self.workflow.add_node("curator", self.curator.run) 71 | self.workflow.add_node("enricher", self.enricher.run) 72 | self.workflow.add_node("briefing", self.briefing.run) 73 | self.workflow.add_node("editor", self.editor.run) 74 | 75 | # Configure workflow edges 76 | self.workflow.set_entry_point("grounding") 77 | self.workflow.set_finish_point("editor") 78 | 79 | research_nodes = [ 80 | "financial_analyst", 81 | "news_scanner", 82 | "industry_analyst", 83 | "company_analyst" 84 | ] 85 | 86 | # Connect grounding to all research nodes 87 | for node in research_nodes: 88 | self.workflow.add_edge("grounding", node) 89 | self.workflow.add_edge(node, "collector") 90 | 91 | # Connect remaining nodes 92 | self.workflow.add_edge("collector", "curator") 93 | self.workflow.add_edge("curator", "enricher") 94 | self.workflow.add_edge("enricher", "briefing") 95 | self.workflow.add_edge("briefing", "editor") 96 | 97 | async def run(self, thread: Dict[str, Any]) -> AsyncIterator[Dict[str, Any]]: 98 | """Execute the research workflow""" 99 | compiled_graph = self.workflow.compile() 100 | 101 | async for state in compiled_graph.astream( 102 | self.input_state, 103 | thread 104 | ): 105 | if self.websocket_manager and self.job_id: 106 | await self._handle_ws_update(state) 107 | yield state 108 | 109 | async def _handle_ws_update(self, state: Dict[str, Any]): 110 | """Handle WebSocket updates based on state changes""" 111 | update = { 112 | "type": "state_update", 113 | "data": { 114 | "current_node": state.get("current_node", "unknown"), 115 | "progress": state.get("progress", 0), 116 | "keys": list(state.keys()) 117 | } 118 | } 119 | await self.websocket_manager.broadcast_to_job( 120 | self.job_id, 121 | update 122 | ) 123 | 124 | def compile(self): 125 | graph = self.workflow.compile() 126 | return graph -------------------------------------------------------------------------------- /backend/nodes/__init__.py: -------------------------------------------------------------------------------- 1 | from .grounding import GroundingNode 2 | 3 | __all__ = ["GroundingNode"] -------------------------------------------------------------------------------- /backend/nodes/collector.py: -------------------------------------------------------------------------------- 1 | from langchain_core.messages import AIMessage 2 | 3 | from ..classes import ResearchState 4 | 5 | 6 | class Collector: 7 | """Collects and organizes all research data before curation.""" 8 | 9 | async def collect(self, state: ResearchState) -> ResearchState: 10 | """Collect and verify all research data is present.""" 11 | company = state.get('company', 'Unknown Company') 12 | msg = [f"📦 Collecting research data for {company}:"] 13 | 14 | if websocket_manager := state.get('websocket_manager'): 15 | if job_id := state.get('job_id'): 16 | await websocket_manager.send_status_update( 17 | job_id=job_id, 18 | status="processing", 19 | message=f"Collecting research data for {company}", 20 | result={"step": "Collecting"} 21 | ) 22 | 23 | # Check each type of research data 24 | research_types = { 25 | 'financial_data': '💰 Financial', 26 | 'news_data': '📰 News', 27 | 'industry_data': '🏭 Industry', 28 | 'company_data': '🏢 Company' 29 | } 30 | 31 | for data_field, label in research_types.items(): 32 | data = state.get(data_field, {}) 33 | if data: 34 | msg.append(f"• {label}: {len(data)} documents collected") 35 | else: 36 | msg.append(f"• {label}: No data found") 37 | 38 | # Update state with collection message 39 | messages = state.get('messages', []) 40 | messages.append(AIMessage(content="\n".join(msg))) 41 | state['messages'] = messages 42 | 43 | return state 44 | 45 | async def run(self, state: ResearchState) -> ResearchState: 46 | return await self.collect(state) -------------------------------------------------------------------------------- /backend/nodes/curator.py: -------------------------------------------------------------------------------- 1 | import logging 2 | from typing import Dict 3 | from urllib.parse import urljoin, urlparse 4 | 5 | from langchain_core.messages import AIMessage 6 | 7 | from ..classes import ResearchState 8 | from ..utils.references import process_references_from_search_results 9 | 10 | logger = logging.getLogger(__name__) 11 | 12 | class Curator: 13 | def __init__(self) -> None: 14 | self.relevance_threshold = 0.4 # Fixed initialization of class attribute 15 | logger.info("Curator initialized with relevance threshold: {relevance_threshhold}") 16 | 17 | async def evaluate_documents(self, state: ResearchState, docs: list, context: Dict[str, str]) -> list: 18 | """Evaluate documents based on Tavily's scoring.""" 19 | if websocket_manager := state.get('websocket_manager'): 20 | if job_id := state.get('job_id'): 21 | logger.info(f"Sending initial curation status update for job {job_id}") 22 | await websocket_manager.send_status_update( 23 | job_id=job_id, 24 | status="processing", 25 | message="Evaluating documents", 26 | result={ 27 | "step": "Curation", 28 | } 29 | ) 30 | 31 | if not docs: 32 | return [] 33 | 34 | logger.info(f"Evaluating {len(docs)} documents") 35 | 36 | evaluated_docs = [] 37 | try: 38 | # Evaluate each document using Tavily's score 39 | for doc in docs: 40 | try: 41 | # Ensure score is a valid float 42 | tavily_score = float(doc.get('score', 0)) # Default to 0 if no score 43 | 44 | # Keep documents with good Tavily score 45 | if tavily_score >= self.relevance_threshold: 46 | logger.info(f"Document passed threshold with score {tavily_score:.4f} for '{doc.get('title', 'No title')}'") 47 | 48 | evaluated_doc = { 49 | **doc, 50 | "evaluation": { 51 | "overall_score": tavily_score, # Store as float 52 | "query": doc.get('query', '') 53 | } 54 | } 55 | evaluated_docs.append(evaluated_doc) 56 | 57 | # Send incremental update for kept document 58 | if websocket_manager := state.get('websocket_manager'): 59 | if job_id := state.get('job_id'): 60 | await websocket_manager.send_status_update( 61 | job_id=job_id, 62 | status="document_kept", 63 | message=f"Kept document: {doc.get('title', 'No title')}", 64 | result={ 65 | "step": "Curation", 66 | "doc_type": doc.get('doc_type', 'unknown'), 67 | "title": doc.get('title', 'No title'), 68 | "score": tavily_score 69 | } 70 | ) 71 | else: 72 | logger.info(f"Document below threshold with score {tavily_score:.4f} for '{doc.get('title', 'No title')}'") 73 | except (ValueError, TypeError) as e: 74 | logger.warning(f"Error processing score for document: {e}") 75 | continue 76 | 77 | except Exception as e: 78 | logger.error(f"Error during document evaluation: {e}") 79 | return [] 80 | 81 | # Sort evaluated docs by score before returning 82 | evaluated_docs.sort(key=lambda x: float(x['evaluation']['overall_score']), reverse=True) 83 | logger.info(f"Returning {len(evaluated_docs)} evaluated documents") 84 | 85 | return evaluated_docs 86 | 87 | async def curate_data(self, state: ResearchState) -> ResearchState: 88 | """Curate all collected data based on Tavily scores.""" 89 | company = state.get('company', 'Unknown Company') 90 | logger.info(f"Starting curation for company: {company}") 91 | 92 | # Send initial status update through WebSocket 93 | if websocket_manager := state.get('websocket_manager'): 94 | if job_id := state.get('job_id'): 95 | logger.info(f"Sending initial curation status update for job {job_id}") 96 | await websocket_manager.send_status_update( 97 | job_id=job_id, 98 | status="processing", 99 | message=f"Starting document curation for {company}", 100 | result={ 101 | "step": "Curation", 102 | "doc_counts": { 103 | "company": {"initial": 0, "kept": 0}, 104 | "industry": {"initial": 0, "kept": 0}, 105 | "financial": {"initial": 0, "kept": 0}, 106 | "news": {"initial": 0, "kept": 0} 107 | } 108 | } 109 | ) 110 | 111 | industry = state.get('industry', 'Unknown') 112 | context = { 113 | "company": company, 114 | "industry": industry, 115 | "hq_location": state.get('hq_location', 'Unknown') 116 | } 117 | 118 | msg = [f"🔍 Curating research data for {company}"] 119 | 120 | data_types = { 121 | 'financial_data': ('💰 Financial', 'financial'), 122 | 'news_data': ('📰 News', 'news'), 123 | 'industry_data': ('🏭 Industry', 'industry'), 124 | 'company_data': ('🏢 Company', 'company') 125 | } 126 | 127 | # Create all evaluation tasks upfront 128 | curation_tasks = [] 129 | for data_field, (emoji, doc_type) in data_types.items(): 130 | data = state.get(data_field, {}) 131 | if not data: 132 | continue 133 | 134 | # Filter and normalize URLs 135 | unique_docs = {} 136 | for url, doc in data.items(): 137 | try: 138 | parsed = urlparse(url) 139 | if not parsed.scheme: 140 | url = urljoin('https://', url) 141 | clean_url = parsed._replace(query='', fragment='').geturl() 142 | if clean_url not in unique_docs: 143 | doc['url'] = clean_url 144 | doc['doc_type'] = doc_type 145 | unique_docs[clean_url] = doc 146 | except Exception: 147 | continue 148 | 149 | docs = list(unique_docs.values()) 150 | curation_tasks.append((data_field, emoji, doc_type, unique_docs.keys(), docs)) 151 | 152 | # Track document counts for each type 153 | doc_counts = {} 154 | 155 | for data_field, emoji, doc_type, urls, docs in curation_tasks: 156 | msg.append(f"\n{emoji}: Found {len(docs)} documents") 157 | 158 | if websocket_manager := state.get('websocket_manager'): 159 | if job_id := state.get('job_id'): 160 | await websocket_manager.send_status_update( 161 | job_id=job_id, 162 | status="category_start", 163 | message=f"Processing {doc_type} documents", 164 | result={ 165 | "step": "Curation", 166 | "doc_type": doc_type, 167 | "initial_count": len(docs) 168 | } 169 | ) 170 | 171 | evaluated_docs = await self.evaluate_documents(state, docs, context) 172 | 173 | if not evaluated_docs: 174 | msg.append(" ⚠️ No relevant documents found") 175 | doc_counts[data_field] = {"initial": len(docs), "kept": 0} 176 | continue 177 | 178 | # Filter and sort by Tavily score 179 | relevant_docs = {url: doc for url, doc in zip(urls, evaluated_docs)} 180 | sorted_items = sorted(relevant_docs.items(), key=lambda item: item[1]['evaluation']['overall_score'], reverse=True) 181 | 182 | # Limit to top 30 documents per category 183 | if len(sorted_items) > 30: 184 | sorted_items = sorted_items[:30] 185 | relevant_docs = dict(sorted_items) 186 | 187 | doc_counts[data_field] = { 188 | "initial": len(docs), 189 | "kept": len(relevant_docs) 190 | } 191 | 192 | if relevant_docs: 193 | msg.append(f" ✓ Kept {len(relevant_docs)} relevant documents") 194 | logger.info(f"Kept {len(relevant_docs)} documents for {doc_type} with scores above threshold") 195 | else: 196 | msg.append(" ⚠️ No documents met relevance threshold") 197 | logger.info(f"No documents met relevance threshold for {doc_type}") 198 | 199 | # Store curated documents in state 200 | state[f'curated_{data_field}'] = relevant_docs 201 | 202 | # Process references using the references module 203 | top_reference_urls, reference_titles, reference_info = process_references_from_search_results(state) 204 | logger.info(f"Selected top {len(top_reference_urls)} references for the report") 205 | 206 | # Update state with references and their titles 207 | messages = state.get('messages', []) 208 | messages.append(AIMessage(content="\n".join(msg))) 209 | state['messages'] = messages 210 | state['references'] = top_reference_urls 211 | state['reference_titles'] = reference_titles 212 | state['reference_info'] = reference_info 213 | 214 | # Send final curation stats 215 | if websocket_manager := state.get('websocket_manager'): 216 | if job_id := state.get('job_id'): 217 | await websocket_manager.send_status_update( 218 | job_id=job_id, 219 | status="curation_complete", 220 | message="Document curation complete", 221 | result={ 222 | "step": "Curation", 223 | "doc_counts": { 224 | "company": doc_counts.get('company_data', {"initial": 0, "kept": 0}), 225 | "industry": doc_counts.get('industry_data', {"initial": 0, "kept": 0}), 226 | "financial": doc_counts.get('financial_data', {"initial": 0, "kept": 0}), 227 | "news": doc_counts.get('news_data', {"initial": 0, "kept": 0}) 228 | } 229 | } 230 | ) 231 | 232 | return state 233 | 234 | async def run(self, state: ResearchState) -> ResearchState: 235 | return await self.curate_data(state) 236 | -------------------------------------------------------------------------------- /backend/nodes/grounding.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import os 3 | 4 | from langchain_core.messages import AIMessage 5 | from tavily import AsyncTavilyClient 6 | 7 | from ..classes import InputState, ResearchState 8 | 9 | logger = logging.getLogger(__name__) 10 | 11 | class GroundingNode: 12 | """Gathers initial grounding data about the company.""" 13 | 14 | def __init__(self) -> None: 15 | self.tavily_client = AsyncTavilyClient(api_key=os.getenv("TAVILY_API_KEY")) 16 | 17 | async def initial_search(self, state: InputState) -> ResearchState: 18 | # Add debug logging at the start to check websocket manager 19 | if websocket_manager := state.get('websocket_manager'): 20 | logger.info("Websocket manager found in state") 21 | else: 22 | logger.warning("No websocket manager found in state") 23 | 24 | company = state.get('company', 'Unknown Company') 25 | msg = f"🎯 Initiating research for {company}...\n" 26 | 27 | if websocket_manager := state.get('websocket_manager'): 28 | if job_id := state.get('job_id'): 29 | await websocket_manager.send_status_update( 30 | job_id=job_id, 31 | status="processing", 32 | message=f"🎯 Initiating research for {company}", 33 | result={"step": "Initializing"} 34 | ) 35 | 36 | site_scrape = {} 37 | 38 | # Only attempt extraction if we have a URL 39 | if url := state.get('company_url'): 40 | msg += f"\n🌐 Analyzing company website: {url}" 41 | logger.info(f"Starting website analysis for {url}") 42 | 43 | # Send initial briefing status 44 | if websocket_manager := state.get('websocket_manager'): 45 | if job_id := state.get('job_id'): 46 | await websocket_manager.send_status_update( 47 | job_id=job_id, 48 | status="processing", 49 | message="Analyzing company website", 50 | result={"step": "Initial Site Scrape"} 51 | ) 52 | 53 | try: 54 | logger.info("Initiating Tavily extraction") 55 | site_extraction = await self.tavily_client.extract(url, extract_depth="basic") 56 | 57 | raw_contents = [] 58 | for item in site_extraction.get("results", []): 59 | if content := item.get("raw_content"): 60 | raw_contents.append(content) 61 | 62 | if raw_contents: 63 | site_scrape = { 64 | 'title': company, 65 | 'raw_content': "\n\n".join(raw_contents) 66 | } 67 | logger.info(f"Successfully extracted {len(raw_contents)} content sections") 68 | msg += "\n✅ Successfully extracted content from website" 69 | if websocket_manager := state.get('websocket_manager'): 70 | if job_id := state.get('job_id'): 71 | await websocket_manager.send_status_update( 72 | job_id=job_id, 73 | status="processing", 74 | message="Successfully extracted content from website", 75 | result={"step": "Initial Site Scrape"} 76 | ) 77 | else: 78 | logger.warning("No content found in extraction results") 79 | msg += "\n⚠️ No content found in website extraction" 80 | if websocket_manager := state.get('websocket_manager'): 81 | if job_id := state.get('job_id'): 82 | await websocket_manager.send_status_update( 83 | job_id=job_id, 84 | status="processing", 85 | message="⚠️ No content found in provided URL", 86 | result={"step": "Initial Site Scrape"} 87 | ) 88 | except Exception as e: 89 | error_str = str(e) 90 | logger.error(f"Website extraction error: {error_str}", exc_info=True) 91 | error_msg = f"⚠️ Error extracting website content: {error_str}" 92 | print(error_msg) 93 | msg += f"\n{error_msg}" 94 | if websocket_manager := state.get('websocket_manager'): 95 | if job_id := state.get('job_id'): 96 | await websocket_manager.send_status_update( 97 | job_id=job_id, 98 | status="website_error", 99 | message=error_msg, 100 | result={ 101 | "step": "Initial Site Scrape", 102 | "error": error_str, 103 | "continue_research": True # Continue with research even if website extraction fails 104 | } 105 | ) 106 | else: 107 | msg += "\n⏩ No company URL provided, proceeding directly to research phase" 108 | if websocket_manager := state.get('websocket_manager'): 109 | if job_id := state.get('job_id'): 110 | await websocket_manager.send_status_update( 111 | job_id=job_id, 112 | status="processing", 113 | message="No company URL provided, proceeding directly to research phase", 114 | result={"step": "Initializing"} 115 | ) 116 | # Add context about what information we have 117 | context_data = {} 118 | if hq := state.get('hq_location'): 119 | msg += f"\n📍 Company HQ: {hq}" 120 | context_data["hq_location"] = hq 121 | if industry := state.get('industry'): 122 | msg += f"\n🏭 Industry: {industry}" 123 | context_data["industry"] = industry 124 | 125 | # Initialize ResearchState with input information 126 | research_state = { 127 | # Copy input fields 128 | "company": state.get('company'), 129 | "company_url": state.get('company_url'), 130 | "hq_location": state.get('hq_location'), 131 | "industry": state.get('industry'), 132 | # Initialize research fields 133 | "messages": [AIMessage(content=msg)], 134 | "site_scrape": site_scrape, 135 | # Pass through websocket info 136 | "websocket_manager": state.get('websocket_manager'), 137 | "job_id": state.get('job_id') 138 | } 139 | 140 | # If there was an error in the initial extraction, store it in the state 141 | if "⚠️ Error extracting website content:" in msg: 142 | research_state["error"] = error_str 143 | 144 | return research_state 145 | 146 | async def run(self, state: InputState) -> ResearchState: 147 | return await self.initial_search(state) 148 | -------------------------------------------------------------------------------- /backend/nodes/researchers/__init__.py: -------------------------------------------------------------------------------- 1 | from .financial import FinancialAnalyst 2 | from .news import NewsScanner 3 | from .industry import IndustryAnalyzer 4 | from .company import CompanyAnalyzer 5 | 6 | __all__ = ["FinancialAnalyst", "NewsScanner", "IndustryAnalyzer", "CompanyAnalyzer"] -------------------------------------------------------------------------------- /backend/nodes/researchers/company.py: -------------------------------------------------------------------------------- 1 | from typing import Any, Dict 2 | 3 | from langchain_core.messages import AIMessage 4 | 5 | from ...classes import ResearchState 6 | from .base import BaseResearcher 7 | 8 | 9 | class CompanyAnalyzer(BaseResearcher): 10 | def __init__(self) -> None: 11 | super().__init__() 12 | self.analyst_type = "company_analyzer" 13 | 14 | async def analyze(self, state: ResearchState) -> Dict[str, Any]: 15 | company = state.get('company', 'Unknown Company') 16 | msg = [f"🏢 Company Analyzer analyzing {company}"] 17 | 18 | # Generate search queries using LLM 19 | queries = await self.generate_queries(state, """ 20 | Generate queries on the company fundamentals of {company} in the {industry} industry such as: 21 | - Core products and services 22 | - Company history and milestones 23 | - Leadership team 24 | - Business model and strategy 25 | """) 26 | 27 | # Add message to show subqueries with emojis 28 | subqueries_msg = "🔍 Subqueries for company analysis:\n" + "\n".join([f"• {query}" for query in queries]) 29 | messages = state.get('messages', []) 30 | messages.append(AIMessage(content=subqueries_msg)) 31 | state['messages'] = messages 32 | 33 | # Send queries through WebSocket 34 | if websocket_manager := state.get('websocket_manager'): 35 | if job_id := state.get('job_id'): 36 | await websocket_manager.send_status_update( 37 | job_id=job_id, 38 | status="processing", 39 | message="Company analysis queries generated", 40 | result={ 41 | "step": "Company Analyst", 42 | "analyst_type": "Company Analyst", 43 | "queries": queries 44 | } 45 | ) 46 | 47 | company_data = {} 48 | 49 | # If we have site_scrape data, include it first 50 | if site_scrape := state.get('site_scrape'): 51 | msg.append("\n📊 Including site scrape data in company analysis...") 52 | company_url = state.get('company_url', 'company-website') 53 | company_data[company_url] = { 54 | 'title': state.get('company', 'Unknown Company'), 55 | 'raw_content': site_scrape, 56 | 'query': f'Company overview and information about {company}' # Add a default query for site scrape 57 | } 58 | 59 | # Perform additional research with comprehensive search 60 | try: 61 | # Store documents with their respective queries 62 | for query in queries: 63 | documents = await self.search_documents(state, [query]) 64 | if documents: # Only process if we got results 65 | for url, doc in documents.items(): 66 | doc['query'] = query # Associate each document with its query 67 | company_data[url] = doc 68 | 69 | msg.append(f"\n✓ Found {len(company_data)} documents") 70 | if websocket_manager := state.get('websocket_manager'): 71 | if job_id := state.get('job_id'): 72 | await websocket_manager.send_status_update( 73 | job_id=job_id, 74 | status="processing", 75 | message=f"Used Tavily Search to find {len(company_data)} documents", 76 | result={ 77 | "step": "Searching", 78 | "analyst_type": "Company Analyst", 79 | "queries": queries 80 | } 81 | ) 82 | except Exception as e: 83 | msg.append(f"\n⚠️ Error during research: {str(e)}") 84 | 85 | # Update state with our findings 86 | messages = state.get('messages', []) 87 | messages.append(AIMessage(content="\n".join(msg))) 88 | state['messages'] = messages 89 | state['company_data'] = company_data 90 | 91 | return { 92 | 'message': msg, 93 | 'company_data': company_data 94 | } 95 | 96 | async def run(self, state: ResearchState) -> Dict[str, Any]: 97 | return await self.analyze(state) -------------------------------------------------------------------------------- /backend/nodes/researchers/financial.py: -------------------------------------------------------------------------------- 1 | import logging 2 | from typing import Any, Dict 3 | 4 | from langchain_core.messages import AIMessage 5 | 6 | from ...classes import ResearchState 7 | from .base import BaseResearcher 8 | 9 | logger = logging.getLogger(__name__) 10 | 11 | class FinancialAnalyst(BaseResearcher): 12 | def __init__(self) -> None: 13 | super().__init__() 14 | self.analyst_type = "financial_analyzer" 15 | 16 | async def analyze(self, state: ResearchState) -> Dict[str, Any]: 17 | company = state.get('company', 'Unknown Company') 18 | websocket_manager = state.get('websocket_manager') 19 | job_id = state.get('job_id') 20 | 21 | try: 22 | # Generate search queries 23 | queries = await self.generate_queries( 24 | state, 25 | """ 26 | Generate queries on the financial analysis of {company} in the {industry} industry such as: 27 | - Fundraising history and valuation 28 | - Financial statements and key metrics 29 | - Revenue and profit sources 30 | """) 31 | 32 | # Add message to show subqueries with emojis 33 | subqueries_msg = "🔍 Subqueries for financial analysis:\n" + "\n".join([f"• {query}" for query in queries]) 34 | messages = state.get('messages', []) 35 | messages.append(AIMessage(content=subqueries_msg)) 36 | state['messages'] = messages 37 | 38 | # Send queries through WebSocket 39 | if websocket_manager: 40 | if job_id: 41 | await websocket_manager.send_status_update( 42 | job_id=job_id, 43 | status="processing", 44 | message="Financial analysis queries generated", 45 | result={ 46 | "step": "Financial Analyst", 47 | "analyst_type": "Financial Analyst", 48 | "queries": queries 49 | } 50 | ) 51 | 52 | # Process site scrape data 53 | financial_data = {} 54 | if site_scrape := state.get('site_scrape'): 55 | company_url = state.get('company_url', 'company-website') 56 | financial_data[company_url] = { 57 | 'title': state.get('company', 'Unknown Company'), 58 | 'raw_content': site_scrape, 59 | 'query': f'Financial information on {company}' 60 | } 61 | 62 | for query in queries: 63 | documents = await self.search_documents(state, [query]) 64 | for url, doc in documents.items(): 65 | doc['query'] = query 66 | financial_data[url] = doc 67 | 68 | # Final status update 69 | completion_msg = f"Completed analysis with {len(financial_data)} documents" 70 | 71 | if websocket_manager: 72 | if job_id: 73 | await websocket_manager.send_status_update( 74 | job_id=job_id, 75 | status="processing", 76 | message=f"Used Tavily Search to find {len(financial_data)} documents", 77 | result={ 78 | "step": "Searching", 79 | "analyst_type": "Financial Analyst", 80 | "queries": queries 81 | } 82 | ) 83 | 84 | # Update state 85 | messages.append(AIMessage(content=completion_msg)) 86 | state['messages'] = messages 87 | state['financial_data'] = financial_data 88 | 89 | # Send completion status with final queries 90 | if websocket_manager and job_id: 91 | await websocket_manager.send_status_update( 92 | job_id=job_id, 93 | status="processing", 94 | message=completion_msg, 95 | result={ 96 | "analyst_type": "Financial Analyst", 97 | "queries": queries, 98 | "documents_found": len(financial_data) 99 | } 100 | ) 101 | 102 | return { 103 | 'message': completion_msg, 104 | 'financial_data': financial_data, 105 | 'analyst_type': self.analyst_type, 106 | 'queries': queries 107 | } 108 | 109 | except Exception as e: 110 | error_msg = f"Financial analysis failed: {str(e)}" 111 | # Send error status 112 | if websocket_manager: 113 | if job_id: 114 | await websocket_manager.send_status_update( 115 | job_id=job_id, 116 | status="error", 117 | message=error_msg, 118 | result={ 119 | "analyst_type": "Financial Analyst", 120 | "error": str(e) 121 | } 122 | ) 123 | raise # Re-raise to maintain error flow 124 | 125 | async def run(self, state: ResearchState) -> Dict[str, Any]: 126 | return await self.analyze(state) -------------------------------------------------------------------------------- /backend/nodes/researchers/industry.py: -------------------------------------------------------------------------------- 1 | from typing import Any, Dict 2 | 3 | from langchain_core.messages import AIMessage 4 | 5 | from ...classes import ResearchState 6 | from .base import BaseResearcher 7 | 8 | 9 | class IndustryAnalyzer(BaseResearcher): 10 | def __init__(self) -> None: 11 | super().__init__() 12 | self.analyst_type = "industry_analyzer" 13 | 14 | async def analyze(self, state: ResearchState) -> Dict[str, Any]: 15 | company = state.get('company', 'Unknown Company') 16 | industry = state.get('industry', 'Unknown Industry') 17 | msg = [f"🏭 Industry Analyzer analyzing {company} in {industry}"] 18 | 19 | # Generate search queries using LLM 20 | queries = await self.generate_queries(state, """ 21 | Generate queries on the industry analysis of {company} in the {industry} industry such as: 22 | - Market position 23 | - Competitors 24 | - {industry} industry trends and challenges 25 | - Market size and growth 26 | """) 27 | 28 | subqueries_msg = "🔍 Subqueries for industry analysis:\n" + "\n".join([f"• {query}" for query in queries]) 29 | messages = state.get('messages', []) 30 | messages.append(AIMessage(content=subqueries_msg)) 31 | state['messages'] = messages 32 | 33 | # Send queries through WebSocket 34 | if websocket_manager := state.get('websocket_manager'): 35 | if job_id := state.get('job_id'): 36 | await websocket_manager.send_status_update( 37 | job_id=job_id, 38 | status="processing", 39 | message="Industry analysis queries generated", 40 | result={ 41 | "step": "Industry Analyst", 42 | "analyst_type": "Industry Analyst", 43 | "queries": queries 44 | } 45 | ) 46 | 47 | industry_data = {} 48 | 49 | # If we have site_scrape data, include it first 50 | if site_scrape := state.get('site_scrape'): 51 | msg.append("\n📊 Including site scrape data in company analysis...") 52 | company_url = state.get('company_url', 'company-website') 53 | industry_data[company_url] = { 54 | 'title': state.get('company', 'Unknown Company'), 55 | 'raw_content': site_scrape, 56 | 'query': f'Industry analysis on {company}' # Add a default query for site scrape 57 | } 58 | 59 | # Perform additional research with increased search depth 60 | try: 61 | # Store documents with their respective queries 62 | for query in queries: 63 | documents = await self.search_documents(state, [query]) 64 | if documents: # Only process if we got results 65 | for url, doc in documents.items(): 66 | doc['query'] = query # Associate each document with its query 67 | industry_data[url] = doc 68 | 69 | msg.append(f"\n✓ Found {len(industry_data)} documents") 70 | if websocket_manager := state.get('websocket_manager'): 71 | if job_id := state.get('job_id'): 72 | await websocket_manager.send_status_update( 73 | job_id=job_id, 74 | status="processing", 75 | message=f"Used Tavily Search to find {len(industry_data)} documents", 76 | result={ 77 | "step": "Searching", 78 | "analyst_type": "Industry Analyst", 79 | "queries": queries 80 | } 81 | ) 82 | except Exception as e: 83 | msg.append(f"\n⚠️ Error during research: {str(e)}") 84 | 85 | # Update state with our findings 86 | messages = state.get('messages', []) 87 | messages.append(AIMessage(content="\n".join(msg))) 88 | state['messages'] = messages 89 | state['industry_data'] = industry_data 90 | 91 | return { 92 | 'message': msg, 93 | 'industry_data': industry_data 94 | } 95 | 96 | async def run(self, state: ResearchState) -> Dict[str, Any]: 97 | return await self.analyze(state) -------------------------------------------------------------------------------- /backend/nodes/researchers/news.py: -------------------------------------------------------------------------------- 1 | from typing import Any, Dict 2 | 3 | from langchain_core.messages import AIMessage 4 | 5 | from ...classes import ResearchState 6 | from .base import BaseResearcher 7 | 8 | 9 | class NewsScanner(BaseResearcher): 10 | def __init__(self) -> None: 11 | super().__init__() 12 | self.analyst_type = "news_analyzer" 13 | 14 | async def analyze(self, state: ResearchState) -> Dict[str, Any]: 15 | company = state.get('company', 'Unknown Company') 16 | msg = [f"📰 News Scanner analyzing {company}"] 17 | 18 | # Generate search queries using LLM 19 | queries = await self.generate_queries(state, """ 20 | Generate queries on the recent news coverage of {company} such as: 21 | - Recent company announcements 22 | - Press releases 23 | - New partnerships 24 | """) 25 | 26 | subqueries_msg = "🔍 Subqueries for news analysis:\n" + "\n".join([f"• {query}" for query in queries]) 27 | messages = state.get('messages', []) 28 | messages.append(AIMessage(content=subqueries_msg)) 29 | state['messages'] = messages 30 | 31 | news_data = {} 32 | 33 | # If we have site_scrape data, include it first 34 | if site_scrape := state.get('site_scrape'): 35 | msg.append("\n📊 Including site scrape data in company analysis...") 36 | company_url = state.get('company_url', 'company-website') 37 | news_data[company_url] = { 38 | 'title': state.get('company', 'Unknown Company'), 39 | 'raw_content': site_scrape, 40 | 'query': f'News and announcements about {company}' # Add a default query for site scrape 41 | } 42 | 43 | # Perform additional research with recent time filter 44 | try: 45 | # Store documents with their respective queries 46 | for query in queries: 47 | documents = await self.search_documents(state, [query]) 48 | if documents: # Only process if we got results 49 | for url, doc in documents.items(): 50 | doc['query'] = query # Associate each document with its query 51 | news_data[url] = doc 52 | 53 | msg.append(f"\n✓ Found {len(news_data)} documents") 54 | if websocket_manager := state.get('websocket_manager'): 55 | if job_id := state.get('job_id'): 56 | await websocket_manager.send_status_update( 57 | job_id=job_id, 58 | status="processing", 59 | message=f"Used Tavily Search to find {len(news_data)} documents", 60 | result={ 61 | "step": "Searching", 62 | "analyst_type": "News Scanner", 63 | "queries": queries 64 | } 65 | ) 66 | except Exception as e: 67 | msg.append(f"\n⚠️ Error during research: {str(e)}") 68 | 69 | # Update state with our findings 70 | messages = state.get('messages', []) 71 | messages.append(AIMessage(content="\n".join(msg))) 72 | state['messages'] = messages 73 | state['news_data'] = news_data 74 | 75 | return { 76 | 'message': msg, 77 | 'news_data': news_data 78 | } 79 | 80 | async def run(self, state: ResearchState) -> Dict[str, Any]: 81 | return await self.analyze(state) -------------------------------------------------------------------------------- /backend/services/mongodb.py: -------------------------------------------------------------------------------- 1 | from datetime import datetime 2 | from typing import Any, Dict, Optional 3 | 4 | import certifi 5 | from pymongo import MongoClient 6 | 7 | 8 | class MongoDBService: 9 | def __init__(self, uri: str): 10 | # Use certifi for SSL certificate verification with updated options 11 | self.client = MongoClient( 12 | uri, 13 | tlsCAFile=certifi.where(), 14 | retryWrites=True, 15 | w='majority' 16 | ) 17 | self.db = self.client.get_database('tavily_research') 18 | self.jobs = self.db.jobs 19 | self.reports = self.db.reports 20 | 21 | def create_job(self, job_id: str, inputs: Dict[str, Any]) -> None: 22 | """Create a new research job record.""" 23 | self.jobs.insert_one({ 24 | "job_id": job_id, 25 | "inputs": inputs, 26 | "status": "pending", 27 | "created_at": datetime.utcnow(), 28 | "updated_at": datetime.utcnow() 29 | }) 30 | 31 | def update_job(self, job_id: str, 32 | status: str = None, 33 | result: Dict[str, Any] = None, 34 | error: str = None) -> None: 35 | """Update a research job with results or status.""" 36 | update_data = {"updated_at": datetime.utcnow()} 37 | if status: 38 | update_data["status"] = status 39 | if result: 40 | update_data["result"] = result 41 | if error: 42 | update_data["error"] = error 43 | 44 | self.jobs.update_one( 45 | {"job_id": job_id}, 46 | {"$set": update_data} 47 | ) 48 | 49 | def get_job(self, job_id: str) -> Optional[Dict[str, Any]]: 50 | """Retrieve a job by ID.""" 51 | return self.jobs.find_one({"job_id": job_id}) 52 | 53 | def store_report(self, job_id: str, report_data: Dict[str, Any]) -> None: 54 | """Store the finalized research report.""" 55 | self.reports.insert_one({ 56 | "job_id": job_id, 57 | "report_content": report_data.get("report", ""), 58 | "references": report_data.get("references", []), 59 | "sections": report_data.get("sections_completed", []), 60 | "analyst_queries": report_data.get("analyst_queries", {}), 61 | "created_at": datetime.utcnow() 62 | }) 63 | 64 | def get_report(self, job_id: str) -> Optional[Dict[str, Any]]: 65 | """Retrieve a report by job ID.""" 66 | return self.reports.find_one({"job_id": job_id}) -------------------------------------------------------------------------------- /backend/services/pdf_service.py: -------------------------------------------------------------------------------- 1 | import io 2 | import logging 3 | import os 4 | import re 5 | 6 | from backend.utils.utils import generate_pdf_from_md 7 | 8 | logger = logging.getLogger(__name__) 9 | 10 | class PDFService: 11 | def __init__(self, config): 12 | self.output_dir = config.get("pdf_output_dir", "pdfs") 13 | # Create output directory if it doesn't exist 14 | os.makedirs(self.output_dir, exist_ok=True) 15 | 16 | def _sanitize_company_name(self, company_name): 17 | """Sanitize company name for use in filenames.""" 18 | # Replace spaces with underscores and remove special characters 19 | sanitized = re.sub(r'[^\w\s-]', '', company_name).strip().replace(' ', '_') 20 | return sanitized.lower() 21 | 22 | def _generate_pdf_filename(self, company_name): 23 | """Generate a PDF filename based on the company name.""" 24 | sanitized_name = self._sanitize_company_name(company_name) 25 | return f"{sanitized_name}_report.pdf" 26 | 27 | def generate_pdf_stream(self, markdown_content, company_name=None): 28 | """ 29 | Generate a PDF from markdown content and return it as a stream. 30 | 31 | Args: 32 | markdown_content (str): The markdown content to convert to PDF 33 | company_name (str, optional): The company name to use in the filename 34 | 35 | Returns: 36 | tuple: (success status, PDF stream or error message) 37 | """ 38 | try: 39 | # Extract company name from the first line if not provided 40 | if not company_name: 41 | first_line = markdown_content.split('\n')[0].strip() 42 | if first_line.startswith('# '): 43 | company_name = first_line[2:].strip() 44 | else: 45 | company_name = "Company Research" 46 | 47 | # Generate the output filename 48 | pdf_filename = self._generate_pdf_filename(company_name) 49 | 50 | # Create a BytesIO object to store the PDF 51 | pdf_buffer = io.BytesIO() 52 | 53 | # Generate the PDF directly to the buffer 54 | generate_pdf_from_md(markdown_content, pdf_buffer) 55 | 56 | # Reset buffer position to start 57 | pdf_buffer.seek(0) 58 | 59 | # Return success and the buffer 60 | return True, (pdf_buffer, pdf_filename) 61 | 62 | except Exception as e: 63 | error_msg = f"Error generating PDF: {str(e)}" 64 | logger.error(error_msg) 65 | return False, error_msg -------------------------------------------------------------------------------- /backend/services/websocket_manager.py: -------------------------------------------------------------------------------- 1 | import json 2 | import logging 3 | from datetime import datetime 4 | from typing import Dict, Set 5 | 6 | from fastapi import WebSocket 7 | 8 | # Set up logging 9 | logger = logging.getLogger(__name__) 10 | 11 | class WebSocketManager: 12 | def __init__(self): 13 | # Store active connections for each job 14 | self.active_connections: Dict[str, Set[WebSocket]] = {} 15 | 16 | async def connect(self, websocket: WebSocket, job_id: str): 17 | """Connect a new client to a specific job.""" 18 | if job_id not in self.active_connections: 19 | self.active_connections[job_id] = set() 20 | self.active_connections[job_id].add(websocket) 21 | logger.info(f"New WebSocket connection for job {job_id}") 22 | logger.info(f"Total connections for job: {len(self.active_connections[job_id])}") 23 | logger.info(f"All active jobs: {list(self.active_connections.keys())}") 24 | 25 | def disconnect(self, websocket: WebSocket, job_id: str): 26 | """Disconnect a client from a specific job.""" 27 | if job_id in self.active_connections: 28 | self.active_connections[job_id].discard(websocket) 29 | if not self.active_connections[job_id]: 30 | del self.active_connections[job_id] 31 | logger.info(f"WebSocket disconnected for job {job_id}") 32 | logger.info(f"Remaining connections for job: {len(self.active_connections.get(job_id, set()))}") 33 | logger.info(f"Remaining active jobs: {list(self.active_connections.keys())}") 34 | 35 | async def broadcast_to_job(self, job_id: str, message: dict): 36 | """Send a message to all clients connected to a specific job.""" 37 | if job_id not in self.active_connections: 38 | logger.warning(f"No active connections for job {job_id}") 39 | return 40 | 41 | # Add timestamp to message 42 | message["timestamp"] = datetime.now().isoformat() 43 | 44 | # Convert message to JSON string 45 | message_str = json.dumps(message) 46 | logger.info(f"Message content: {message_str}") 47 | 48 | # Send to all connected clients for this job 49 | success_count = 0 50 | disconnected = set() 51 | for connection in self.active_connections[job_id]: 52 | try: 53 | await connection.send_text(message_str) 54 | success_count += 1 55 | except Exception as e: 56 | logger.error(f"Error sending message to client: {str(e)}", exc_info=True) 57 | disconnected.add(connection) 58 | 59 | # Clean up disconnected clients 60 | for connection in disconnected: 61 | self.disconnect(connection, job_id) 62 | 63 | async def send_status_update(self, job_id: str, status: str, message: str = None, error: str = None, result: dict = None): 64 | """Helper method to send formatted status updates.""" 65 | update = { 66 | "type": "status_update", 67 | "data": { 68 | "status": status, 69 | "message": message, 70 | "error": error, 71 | "result": result 72 | } 73 | } 74 | #logger.info(f"Status: {status}, Message: {message}") 75 | await self.broadcast_to_job(job_id, update) -------------------------------------------------------------------------------- /backend/utils/__init__.py: -------------------------------------------------------------------------------- 1 | from .utils import generate_pdf_from_md, clean_text 2 | from .references import ( 3 | extract_domain_name, 4 | extract_title_from_url_path, 5 | clean_title, 6 | normalize_url, 7 | extract_website_name_from_domain, 8 | process_references_from_search_results, 9 | format_reference_for_markdown, 10 | extract_link_info, 11 | format_references_section 12 | ) -------------------------------------------------------------------------------- /docker-compose.yml: -------------------------------------------------------------------------------- 1 | services: 2 | backend: 3 | build: 4 | context: . 5 | dockerfile: Dockerfile 6 | ports: 7 | - "8000:8000" 8 | environment: 9 | - PYTHONUNBUFFERED=1 10 | - PORT=8000 11 | volumes: 12 | - ./reports:/app/reports 13 | - ./backend:/app/backend 14 | - ./application.py:/app/application.py 15 | env_file: 16 | - .env 17 | 18 | frontend: 19 | image: node:20-slim 20 | working_dir: /ui 21 | command: sh -c "npm install && npm run dev" 22 | ports: 23 | - "5174:5174" 24 | volumes: 25 | - ./ui:/ui 26 | environment: 27 | - NODE_ENV=development 28 | - VITE_API_URL=http://localhost:8000 29 | - VITE_WS_URL=ws://localhost:8000 -------------------------------------------------------------------------------- /langgraph.json: -------------------------------------------------------------------------------- 1 | { 2 | "dockerfile_lines": [], 3 | "graphs": { 4 | "agent": "./langgraph_entry.py:graph" 5 | }, 6 | "env": ".env", 7 | "python_version": "3.11", 8 | "dependencies": [ 9 | "." 10 | ] 11 | } -------------------------------------------------------------------------------- /langgraph_entry.py: -------------------------------------------------------------------------------- 1 | # langgraph_entry.py 2 | from backend.graph import Graph 3 | 4 | graph = Graph().compile() -------------------------------------------------------------------------------- /package-lock.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "tavily-company-research", 3 | "lockfileVersion": 3, 4 | "requires": true, 5 | "packages": {} 6 | } 7 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | certifi==2025.1.31 2 | fastapi==0.115.11 3 | langchain_core==0.3.41 4 | langgraph==0.3.5 5 | openai==1.65.4 6 | protobuf~=4.25.0 7 | pydantic==2.10.6 8 | pymongo==4.6.3 9 | reportlab==4.3.1 10 | tavily_python==0.5.1 11 | uvicorn[standard]==0.34.0 12 | websockets==12.0 13 | google-generativeai==0.8.4 -------------------------------------------------------------------------------- /setup.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Text styling 4 | BOLD='\033[1m' 5 | GREEN='\033[0;32m' 6 | BLUE='\033[0;34m' 7 | NC='\033[0m' # No Color 8 | 9 | # Version comparison function 10 | version_compare() { 11 | echo "$@" | awk -F. '{ printf("%d%03d%03d%03d\n", $1,$2,$3,$4); }' 12 | } 13 | 14 | echo -e "${BOLD}🚀 Welcome to the Agentic Company Researcher Setup!${NC}\n" 15 | 16 | # Check if Python 3.11+ is installed 17 | echo -e "${BLUE}Checking Python version...${NC}" 18 | if command -v python3 >/dev/null 2>&1; then 19 | python_version=$(python3 -c 'import sys; print(".".join(map(str, sys.version_info[:2])))') 20 | if [ "$(version_compare "$python_version")" -ge "$(version_compare "3.11")" ]; then 21 | echo -e "${GREEN}✓ Python $python_version is installed${NC}" 22 | else 23 | echo "❌ Python 3.11 or higher is required. Current version: $python_version" 24 | echo "Please install Python 3.11 or higher from https://www.python.org/downloads/" 25 | exit 1 26 | fi 27 | else 28 | echo "❌ Python 3 is not installed" 29 | echo "Please install Python 3.11 or higher from https://www.python.org/downloads/" 30 | exit 1 31 | fi 32 | 33 | # Check if Node.js 18+ is installed 34 | echo -e "\n${BLUE}Checking Node.js version...${NC}" 35 | if command -v node >/dev/null 2>&1; then 36 | node_version=$(node -v | cut -d'v' -f2) 37 | if [ "$(version_compare "$node_version")" -ge "$(version_compare "18.0.0")" ]; then 38 | echo -e "${GREEN}✓ Node.js $node_version is installed${NC}" 39 | else 40 | echo "❌ Node.js 18 or higher is required. Current version: $node_version" 41 | echo "Please install Node.js 18 or higher from https://nodejs.org/" 42 | exit 1 43 | fi 44 | else 45 | echo "❌ Node.js is not installed" 46 | echo "Please install Node.js 18 or higher from https://nodejs.org/" 47 | exit 1 48 | fi 49 | 50 | # Ask about virtual environment 51 | echo -e "\n${BLUE}Would you like to set up a Python virtual environment? (Recommended) [Y/n]${NC}" 52 | read -r use_venv 53 | use_venv=${use_venv:-Y} 54 | 55 | if [[ $use_venv =~ ^[Yy]$ ]]; then 56 | echo -e "\n${BLUE}Setting up Python virtual environment...${NC}" 57 | python3 -m venv .venv 58 | source .venv/bin/activate 59 | echo -e "${GREEN}✓ Virtual environment created and activated${NC}" 60 | 61 | # Install Python dependencies in venv 62 | echo -e "\n${BLUE}Installing Python dependencies in virtual environment...${NC}" 63 | pip install -r requirements.txt 64 | echo -e "${GREEN}✓ Python dependencies installed${NC}" 65 | else 66 | # Prompt for global installation 67 | echo -e "\n${BLUE}Would you like to install Python dependencies globally? This may affect other Python projects. [y/N]${NC}" 68 | read -r install_global 69 | install_global=${install_global:-N} 70 | 71 | if [[ $install_global =~ ^[Yy]$ ]]; then 72 | echo -e "\n${BLUE}Installing Python dependencies globally...${NC}" 73 | pip3 install -r requirements.txt 74 | echo -e "${GREEN}✓ Python dependencies installed${NC}" 75 | echo -e "${BLUE}Note: Dependencies have been installed in your global Python environment${NC}" 76 | else 77 | echo -e "${BLUE}Skipping Python dependency installation. You'll need to install them manually later.${NC}" 78 | echo -e "${BLUE}You can do this by running: pip install -r requirements.txt${NC}" 79 | fi 80 | fi 81 | 82 | # Install Node.js dependencies 83 | echo -e "\n${BLUE}Installing Node.js dependencies...${NC}" 84 | cd ui 85 | npm install 86 | # Create or overwrite .env.development for frontend dev environment 87 | cat > .env.development << EOL 88 | VITE_API_URL=http://localhost:8000 89 | VITE_WS_URL=ws://localhost:8000 90 | EOL 91 | cd .. 92 | echo -e "${GREEN}✓ Node.js dependencies installed${NC}" 93 | 94 | # Setup .env file 95 | echo -e "\n${BLUE}Setting up environment variables...${NC}" 96 | if [ -f ".env" ]; then 97 | echo "Found existing .env file. Would you like to overwrite it? (y/n)" 98 | read -r overwrite 99 | if [ "$overwrite" != "y" ]; then 100 | echo "Keeping existing .env file" 101 | else 102 | setup_env=true 103 | fi 104 | else 105 | setup_env=true 106 | fi 107 | 108 | if [ "$setup_env" = true ]; then 109 | echo -e "\nPlease enter your API keys:" 110 | echo -n "Tavily API Key: " 111 | read -r tavily_key 112 | echo -n "Google Gemini API Key: " 113 | read -r gemini_key 114 | echo -n "OpenAI API Key: " 115 | read -r openai_key 116 | echo -n "MongoDB URI (optional - press enter to skip): " 117 | read -r mongodb_uri 118 | 119 | # Create .env file 120 | cat > .env << EOL 121 | TAVILY_API_KEY=$tavily_key 122 | GEMINI_API_KEY=$gemini_key 123 | OPENAI_API_KEY=$openai_key 124 | EOL 125 | 126 | # Add MongoDB URI if provided 127 | if [ ! -z "$mongodb_uri" ]; then 128 | echo "MONGODB_URI=$mongodb_uri" >> .env 129 | fi 130 | 131 | echo -e "${GREEN}✓ Environment variables saved to .env${NC}" 132 | fi 133 | 134 | # Final instructions and server startup options 135 | echo -e "\n${BOLD}🎉 Setup complete!${NC}" 136 | 137 | if [[ $use_venv =~ ^[Yy]$ ]]; then 138 | echo -e "\n${BLUE}Virtual environment is now activated and ready to use${NC}" 139 | fi 140 | 141 | # Ask about starting servers 142 | echo -e "\n${BLUE}Would you like to start the application servers now? [Y/n]${NC}" 143 | read -r start_servers 144 | start_servers=${start_servers:-Y} 145 | 146 | if [[ $start_servers =~ ^[Yy]$ ]]; then 147 | echo -e "\n${BLUE}Choose backend server option:${NC}" 148 | echo "1) python -m application.py" 149 | echo "2) uvicorn application:app --reload --port 8000" 150 | read -r backend_choice 151 | 152 | # Start backend server in background 153 | if [ "$backend_choice" = "1" ]; then 154 | echo -e "\n${GREEN}Starting backend server with python...${NC}" 155 | python -m application.py & 156 | else 157 | echo -e "\n${GREEN}Starting backend server with uvicorn...${NC}" 158 | uvicorn application:app --reload --port 8000 & 159 | fi 160 | 161 | # Store backend PID 162 | backend_pid=$! 163 | 164 | # Wait a moment for backend to start 165 | sleep 2 166 | 167 | # Start frontend server 168 | echo -e "\n${GREEN}Starting frontend server...${NC}" 169 | cd ui 170 | npm run dev & 171 | frontend_pid=$! 172 | cd .. 173 | 174 | echo -e "\n${GREEN}Servers are starting up! The application will be available at:${NC}" 175 | echo -e "${BOLD}http://localhost:5173${NC}" 176 | 177 | # Add trap to handle script termination 178 | trap 'kill $backend_pid $frontend_pid 2>/dev/null' EXIT 179 | 180 | # Keep script running until user stops it 181 | echo -e "\n${BLUE}Press Ctrl+C to stop the servers${NC}" 182 | wait 183 | else 184 | echo -e "\n${BOLD}To start the application manually:${NC}" 185 | echo -e "\n1. Start the backend server (choose one):" 186 | echo " Option 1: python -m application.py" 187 | echo " Option 2: uvicorn application:app --reload --port 8000" 188 | echo -e "\n2. In a new terminal, start the frontend:" 189 | echo " cd ui" 190 | echo " npm run dev" 191 | echo -e "\n3. Access the application at ${BOLD}http://localhost:5173${NC}" 192 | fi 193 | 194 | echo -e "\n${BOLD}Need help?${NC}" 195 | echo "- Documentation: README.md" 196 | echo "- Issues: https://github.com/pogjester/tavily-company-research/issues" 197 | echo -e "\n${GREEN}Happy researching! 🚀${NC}" -------------------------------------------------------------------------------- /static/agent-flow.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pogjester/company-research-agent/1814a7a9b26831decf4cd8893d7a80a6dbe28b0b/static/agent-flow.png -------------------------------------------------------------------------------- /static/demo.mp4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pogjester/company-research-agent/1814a7a9b26831decf4cd8893d7a80a6dbe28b0b/static/demo.mp4 -------------------------------------------------------------------------------- /static/ui-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pogjester/company-research-agent/1814a7a9b26831decf4cd8893d7a80a6dbe28b0b/static/ui-1.png -------------------------------------------------------------------------------- /static/ui-2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pogjester/company-research-agent/1814a7a9b26831decf4cd8893d7a80a6dbe28b0b/static/ui-2.png -------------------------------------------------------------------------------- /ui/.env.development.example: -------------------------------------------------------------------------------- 1 | VITE_API_URL=http://localhost:8000 2 | VITE_WS_URL=ws://localhost:8000 -------------------------------------------------------------------------------- /ui/.gitignore: -------------------------------------------------------------------------------- 1 | .vercel 2 | /dist 3 | /node_modules 4 | .env 5 | .env.local 6 | .env.development.local 7 | .env.test.local 8 | .env.production.local 9 | .env.development 10 | .env.production 11 | .env.test 12 | .vite -------------------------------------------------------------------------------- /ui/eslint.config.js: -------------------------------------------------------------------------------- 1 | import js from '@eslint/js'; 2 | import globals from 'globals'; 3 | import reactHooks from 'eslint-plugin-react-hooks'; 4 | import reactRefresh from 'eslint-plugin-react-refresh'; 5 | import tseslint from 'typescript-eslint'; 6 | 7 | export default tseslint.config( 8 | { ignores: ['dist'] }, 9 | { 10 | extends: [js.configs.recommended, ...tseslint.configs.recommended], 11 | files: ['**/*.{ts,tsx}'], 12 | languageOptions: { 13 | ecmaVersion: 2020, 14 | globals: globals.browser, 15 | }, 16 | plugins: { 17 | 'react-hooks': reactHooks, 18 | 'react-refresh': reactRefresh, 19 | }, 20 | rules: { 21 | ...reactHooks.configs.recommended.rules, 22 | 'react-refresh/only-export-components': [ 23 | 'warn', 24 | { allowConstantExport: true }, 25 | ], 26 | }, 27 | } 28 | ); 29 | -------------------------------------------------------------------------------- /ui/index.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | Company Research 8 | 9 | 10 |
11 | 12 | 13 | 14 | -------------------------------------------------------------------------------- /ui/package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "tavily-company-research-ui", 3 | "private": true, 4 | "version": "0.0.0", 5 | "type": "module", 6 | "scripts": { 7 | "dev": "vite", 8 | "build": "tsc && vite build", 9 | "start": "serve -s build -l $PORT", 10 | "lint": "eslint . --ext ts,tsx --report-unused-disable-directives --max-warnings 0", 11 | "preview": "vite preview" 12 | }, 13 | "dependencies": { 14 | "framer-motion": "^12.4.10", 15 | "lucide-react": "^0.344.0", 16 | "react": "^18.2.0", 17 | "react-dom": "^18.2.0", 18 | "react-markdown": "^9.0.1", 19 | "rehype-raw": "^7.0.0", 20 | "remark-gfm": "^4.0.0", 21 | "serve": "^14.2.1" 22 | }, 23 | "devDependencies": { 24 | "@types/google.maps": "^3.58.1", 25 | "@types/node": "^22.13.9", 26 | "@types/react": "^18.2.56", 27 | "@types/react-dom": "^18.2.19", 28 | "@typescript-eslint/eslint-plugin": "^7.0.2", 29 | "@typescript-eslint/parser": "^7.0.2", 30 | "@vitejs/plugin-react": "^4.2.1", 31 | "autoprefixer": "^10.4.17", 32 | "eslint": "^8.56.0", 33 | "eslint-plugin-react-hooks": "^4.6.0", 34 | "eslint-plugin-react-refresh": "^0.4.5", 35 | "postcss": "^8.4.35", 36 | "tailwindcss": "^3.4.1", 37 | "typescript": "^5.2.2", 38 | "vite": "^6.3.4" 39 | }, 40 | "engines": { 41 | "node": ">=14.x" 42 | } 43 | } 44 | -------------------------------------------------------------------------------- /ui/postcss.config.js: -------------------------------------------------------------------------------- 1 | export default { 2 | plugins: { 3 | tailwindcss: {}, 4 | autoprefixer: {}, 5 | }, 6 | }; 7 | -------------------------------------------------------------------------------- /ui/public/favicon.ico: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pogjester/company-research-agent/1814a7a9b26831decf4cd8893d7a80a6dbe28b0b/ui/public/favicon.ico -------------------------------------------------------------------------------- /ui/public/tavilylogo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pogjester/company-research-agent/1814a7a9b26831decf4cd8893d7a80a6dbe28b0b/ui/public/tavilylogo.png -------------------------------------------------------------------------------- /ui/src/components/CurationExtraction.tsx: -------------------------------------------------------------------------------- 1 | import React from 'react'; 2 | import { ChevronDown, ChevronUp, Loader2 } from 'lucide-react'; 3 | 4 | type EnrichmentCounts = { 5 | company: { total: number; enriched: number }; 6 | industry: { total: number; enriched: number }; 7 | financial: { total: number; enriched: number }; 8 | news: { total: number; enriched: number }; 9 | }; 10 | 11 | interface CurationExtractionProps { 12 | enrichmentCounts: EnrichmentCounts | undefined; 13 | isExpanded: boolean; 14 | onToggleExpand: () => void; 15 | isResetting: boolean; 16 | loaderColor: string; 17 | } 18 | 19 | const CurationExtraction: React.FC = ({ 20 | enrichmentCounts, 21 | isExpanded, 22 | onToggleExpand, 23 | isResetting, 24 | loaderColor 25 | }) => { 26 | const glassStyle = "backdrop-filter backdrop-blur-lg bg-white/80 border border-gray-200 shadow-xl"; 27 | const glassCardStyle = `${glassStyle} rounded-2xl p-6`; 28 | 29 | return ( 30 |
35 |
39 |

40 | Curation and Extraction 41 |

42 | 49 |
50 | 51 |
54 |
55 | {['company', 'industry', 'financial', 'news'].map((category) => { 56 | const counts = enrichmentCounts?.[category as keyof EnrichmentCounts]; 57 | return ( 58 |
59 |

{category}

60 |
61 |
62 | {counts ? ( 63 | 64 | {counts.enriched} 65 | 66 | ) : ( 67 | 68 | )} 69 |
70 |
71 | {counts ? ( 72 | `selected from ${counts.total}` 73 | ) : ( 74 | "waiting..." 75 | )} 76 |
77 |
78 |
79 | ); 80 | })} 81 |
82 |
83 | 84 | {!isExpanded && enrichmentCounts && ( 85 |
86 | {Object.values(enrichmentCounts).reduce((acc, curr) => acc + curr.enriched, 0)} documents enriched from {Object.values(enrichmentCounts).reduce((acc, curr) => acc + curr.total, 0)} total 87 |
88 | )} 89 |
90 | ); 91 | }; 92 | 93 | export default CurationExtraction; -------------------------------------------------------------------------------- /ui/src/components/ExamplePopup.tsx: -------------------------------------------------------------------------------- 1 | import React, { useState, useEffect, RefObject } from 'react'; 2 | import { ArrowRight, Sparkles } from 'lucide-react'; 3 | 4 | // Sample companies for examples 5 | export const EXAMPLE_COMPANIES = [ 6 | { 7 | name: "Stripe", 8 | url: "stripe.com", 9 | hq: "San Francisco, CA", 10 | industry: "Financial Technology" 11 | }, 12 | { 13 | name: "Shopify", 14 | url: "shopify.com", 15 | hq: "Ottawa, Canada", 16 | industry: "E-commerce" 17 | }, 18 | { 19 | name: "Notion", 20 | url: "notion.so", 21 | hq: "San Francisco, CA", 22 | industry: "Productivity Software" 23 | }, 24 | { 25 | name: "Tesla", 26 | url: "tesla.com", 27 | hq: "Austin, TX", 28 | industry: "Automotive & Energy" 29 | }, 30 | { 31 | name: "Airbnb", 32 | url: "airbnb.com", 33 | hq: "San Francisco, CA", 34 | industry: "Travel & Hospitality" 35 | }, 36 | { 37 | name: "Slack", 38 | url: "slack.com", 39 | hq: "San Francisco, CA", 40 | industry: "Business Communication" 41 | }, 42 | { 43 | name: "Spotify", 44 | url: "spotify.com", 45 | hq: "Stockholm, Sweden", 46 | industry: "Music Streaming" 47 | } 48 | ]; 49 | 50 | export type ExampleCompany = typeof EXAMPLE_COMPANIES[0]; 51 | 52 | export interface ExamplePopupProps { 53 | visible: boolean; 54 | onExampleSelect: (example: ExampleCompany) => void; 55 | glassStyle: { 56 | card: string; 57 | input: string; 58 | }; 59 | exampleRef: RefObject; 60 | } 61 | 62 | // Example Popup Component 63 | const ExamplePopup: React.FC = ({ 64 | visible, 65 | onExampleSelect, 66 | glassStyle, 67 | exampleRef 68 | }) => { 69 | const [selectedExample, setSelectedExample] = useState(0); 70 | const [isNameAnimating, setIsNameAnimating] = useState(false); 71 | 72 | // Cycle through examples periodically 73 | useEffect(() => { 74 | const interval = setInterval(() => { 75 | // Trigger name animation 76 | setIsNameAnimating(true); 77 | setTimeout(() => { 78 | setSelectedExample((prev) => (prev + 1) % EXAMPLE_COMPANIES.length); 79 | setTimeout(() => { 80 | setIsNameAnimating(false); 81 | }, 150); 82 | }, 150); 83 | }, 5000); 84 | return () => clearInterval(interval); 85 | }, []); 86 | 87 | if (!visible) return null; 88 | 89 | return ( 90 |
onExampleSelect(EXAMPLE_COMPANIES[selectedExample])} 96 | style={{ 97 | borderTopLeftRadius: '12px', 98 | borderTopRightRadius: '12px', 99 | borderBottomRightRadius: '12px', 100 | borderBottomLeftRadius: '4px', 101 | }} 102 | > 103 | 104 |
105 | Try an example: 106 | 114 | {EXAMPLE_COMPANIES[selectedExample].name} 115 | 116 |
117 | 118 |
119 | ); 120 | }; 121 | 122 | export default ExamplePopup; -------------------------------------------------------------------------------- /ui/src/components/Header.tsx: -------------------------------------------------------------------------------- 1 | import React from 'react'; 2 | import { Github } from 'lucide-react'; 3 | 4 | interface HeaderProps { 5 | glassStyle: string; 6 | } 7 | 8 | const Header: React.FC = ({ glassStyle }) => { 9 | const handleImageError = (e: React.SyntheticEvent) => { 10 | console.error('Failed to load Tavily logo'); 11 | console.log('Image path:', e.currentTarget.src); 12 | e.currentTarget.style.display = 'none'; 13 | }; 14 | 15 | return ( 16 |
17 |
18 |

19 | Company Research Agent 20 |

21 |

22 | Conduct in-depth company diligence powered by Tavily 23 |

24 |
25 | 65 |
66 | ); 67 | }; 68 | 69 | export default Header; -------------------------------------------------------------------------------- /ui/src/components/LocationInput.tsx: -------------------------------------------------------------------------------- 1 | import React, { useEffect, useRef, useState, useCallback } from 'react'; 2 | import { MapPin } from 'lucide-react'; 3 | 4 | interface LocationInputProps { 5 | value: string; 6 | onChange: (value: string) => void; 7 | className?: string; 8 | } 9 | 10 | declare global { 11 | interface Window { 12 | google: any; 13 | initGoogleMapsCallback: () => void; 14 | } 15 | } 16 | 17 | // Create a global script loader to ensure we only load the script once 18 | let scriptPromise: Promise | null = null; 19 | 20 | const loadGoogleMapsScript = (): Promise => { 21 | if (scriptPromise) { 22 | return scriptPromise; 23 | } 24 | 25 | scriptPromise = new Promise((resolve) => { 26 | // If already loaded, resolve immediately 27 | if (window.google?.maps?.places) { 28 | resolve(); 29 | return; 30 | } 31 | 32 | // Define the callback function 33 | window.initGoogleMapsCallback = () => { 34 | resolve(); 35 | }; 36 | 37 | // Create script element 38 | const script = document.createElement('script'); 39 | // Use loading=async parameter as recommended by Google 40 | script.src = `https://maps.googleapis.com/maps/api/js?key=${import.meta.env.VITE_GOOGLE_MAPS_API_KEY}&libraries=places&loading=async&callback=initGoogleMapsCallback`; 41 | script.async = true; 42 | script.defer = true; 43 | 44 | // Handle errors 45 | script.onerror = (error) => { 46 | console.error('Error loading Google Maps script:', error); 47 | scriptPromise = null; 48 | }; 49 | 50 | // Append to document 51 | document.head.appendChild(script); 52 | }); 53 | 54 | return scriptPromise; 55 | }; 56 | 57 | const LocationInput: React.FC = ({ value, onChange, className }) => { 58 | const inputRef = useRef(null); 59 | const autocompleteRef = useRef(null); 60 | const [isApiLoaded, setIsApiLoaded] = useState(false); 61 | const onChangeRef = useRef(onChange); 62 | const isInitializedRef = useRef(false); 63 | 64 | // Update the ref when onChange changes 65 | useEffect(() => { 66 | onChangeRef.current = onChange; 67 | }, [onChange]); 68 | 69 | // Load the Google Maps API 70 | useEffect(() => { 71 | // Check if script is already in the document 72 | const existingScript = document.querySelector('script[src*="maps.googleapis.com/maps/api/js"]'); 73 | if (existingScript) { 74 | console.warn('Google Maps script is already loaded elsewhere in the application'); 75 | // If script exists but API not available yet, wait for it 76 | if (!window.google?.maps?.places) { 77 | const checkInterval = setInterval(() => { 78 | if (window.google?.maps?.places) { 79 | setIsApiLoaded(true); 80 | clearInterval(checkInterval); 81 | } 82 | }, 100); 83 | 84 | // Clear interval after 10 seconds to prevent infinite checking 85 | setTimeout(() => clearInterval(checkInterval), 10000); 86 | } else { 87 | setIsApiLoaded(true); 88 | } 89 | return; 90 | } 91 | 92 | const loadApi = async () => { 93 | try { 94 | await loadGoogleMapsScript(); 95 | setIsApiLoaded(true); 96 | } catch (error) { 97 | console.error('Failed to load Google Maps API:', error); 98 | } 99 | }; 100 | 101 | loadApi(); 102 | }, []); 103 | 104 | // Initialize autocomplete when API is loaded and input is available 105 | useEffect(() => { 106 | if (!isApiLoaded || !inputRef.current || !window.google?.maps?.places || isInitializedRef.current) { 107 | return; 108 | } 109 | 110 | try { 111 | // Initialize autocomplete 112 | autocompleteRef.current = new window.google.maps.places.Autocomplete(inputRef.current, { 113 | types: ['(cities)'], 114 | }); 115 | 116 | // Style the autocomplete dropdown 117 | const style = document.createElement('style'); 118 | style.textContent = ` 119 | .pac-container { 120 | background-color: white !important; 121 | border: 1px solid rgba(70, 139, 255, 0.1) !important; 122 | border-radius: 0.75rem !important; 123 | margin-top: 0.5rem !important; 124 | font-family: "Noto Sans", sans-serif !important; 125 | overflow: hidden !important; 126 | box-shadow: none !important; 127 | } 128 | .pac-item { 129 | padding: 0.875rem 1.25rem !important; 130 | cursor: pointer !important; 131 | transition: all 0.2s ease-in-out !important; 132 | border-bottom: 1px solid rgba(70, 139, 255, 0.05) !important; 133 | } 134 | .pac-item:last-child { 135 | border-bottom: none !important; 136 | } 137 | .pac-item:hover { 138 | background-color: rgba(70, 139, 255, 0.03) !important; 139 | } 140 | .pac-item-selected { 141 | background-color: rgba(70, 139, 255, 0.05) !important; 142 | } 143 | .pac-item-query { 144 | color: #1a365d !important; 145 | font-size: 0.9375rem !important; 146 | font-weight: 500 !important; 147 | } 148 | .pac-matched { 149 | font-weight: 600 !important; 150 | } 151 | .pac-item span:not(.pac-item-query) { 152 | color: #64748b !important; 153 | font-size: 0.8125rem !important; 154 | margin-left: 0.5rem !important; 155 | } 156 | /* Hide the location icon */ 157 | .pac-icon { 158 | display: none !important; 159 | } 160 | `; 161 | document.head.appendChild(style); 162 | 163 | // Add place_changed listener 164 | const autocomplete = autocompleteRef.current; 165 | if (autocomplete) { 166 | autocomplete.addListener('place_changed', () => { 167 | const place = autocomplete.getPlace(); 168 | if (place?.formatted_address) { 169 | onChangeRef.current(place.formatted_address); 170 | } 171 | }); 172 | } 173 | 174 | isInitializedRef.current = true; 175 | } catch (error) { 176 | console.error('Error initializing Google Maps Autocomplete:', error); 177 | } 178 | 179 | // Cleanup 180 | return () => { 181 | if (autocompleteRef.current && window.google?.maps?.event) { 182 | window.google.maps.event.clearInstanceListeners(autocompleteRef.current); 183 | autocompleteRef.current = null; 184 | isInitializedRef.current = false; 185 | } 186 | }; 187 | }, [isApiLoaded]); // Removed onChange from dependencies 188 | 189 | // Handle manual input changes 190 | const handleInputChange = useCallback((e: React.ChangeEvent) => { 191 | onChange(e.target.value); 192 | }, [onChange]); 193 | 194 | return ( 195 |
196 |
197 | 198 | { 204 | if (e.key === 'Enter') { 205 | e.preventDefault(); 206 | } 207 | }} 208 | className={`${className} !font-['DM_Sans']`} 209 | placeholder="City, Country" 210 | /> 211 |
212 | ); 213 | }; 214 | 215 | export default LocationInput; -------------------------------------------------------------------------------- /ui/src/components/ResearchBriefings.tsx: -------------------------------------------------------------------------------- 1 | import React from 'react'; 2 | import { ChevronDown, ChevronUp, CheckCircle2 } from 'lucide-react'; 3 | 4 | type BriefingStatus = { 5 | company: boolean; 6 | industry: boolean; 7 | financial: boolean; 8 | news: boolean; 9 | }; 10 | 11 | interface ResearchBriefingsProps { 12 | briefingStatus: BriefingStatus; 13 | isExpanded: boolean; 14 | onToggleExpand: () => void; 15 | isResetting: boolean; 16 | } 17 | 18 | const ResearchBriefings: React.FC = ({ 19 | briefingStatus, 20 | isExpanded, 21 | onToggleExpand, 22 | isResetting 23 | }) => { 24 | const glassStyle = "backdrop-filter backdrop-blur-lg bg-white/80 border border-gray-200 shadow-xl"; 25 | const cardGlassStyle = "backdrop-filter backdrop-blur-lg bg-white/80 shadow-sm"; 26 | 27 | return ( 28 |
33 |
37 |

38 | Research Briefings 39 |

40 | 47 |
48 | 49 |
52 |
53 | {['company', 'industry', 'financial', 'news'].map((category) => ( 54 |
62 | {/* Background decoration element (only visible when active) */} 63 |
69 | 70 |
71 |

{category}

76 | {briefingStatus[category as keyof BriefingStatus] ? ( 77 | 78 | ) : ( 79 |
80 | )} 81 |
82 |
83 | ))} 84 |
85 |
86 | 87 | {!isExpanded && ( 88 |
89 | {Object.values(briefingStatus).filter(Boolean).length} of {Object.keys(briefingStatus).length} briefings completed 90 |
91 | )} 92 |
93 | ); 94 | }; 95 | 96 | export default ResearchBriefings; -------------------------------------------------------------------------------- /ui/src/components/ResearchForm.tsx: -------------------------------------------------------------------------------- 1 | import React, { useState, useRef, useEffect } from 'react'; 2 | import { Building2, Factory, Globe, Loader2, Search } from 'lucide-react'; 3 | import LocationInput from './LocationInput'; 4 | import ExamplePopup, { ExampleCompany } from './ExamplePopup'; 5 | 6 | interface FormData { 7 | companyName: string; 8 | companyUrl: string; 9 | companyHq: string; 10 | companyIndustry: string; 11 | } 12 | 13 | interface ResearchFormProps { 14 | onSubmit: (formData: FormData) => Promise; 15 | isResearching: boolean; 16 | glassStyle: { 17 | card: string; 18 | input: string; 19 | }; 20 | loaderColor: string; 21 | } 22 | 23 | const ResearchForm: React.FC = ({ 24 | onSubmit, 25 | isResearching, 26 | glassStyle, 27 | loaderColor 28 | }) => { 29 | const [formData, setFormData] = useState({ 30 | companyName: "", 31 | companyUrl: "", 32 | companyHq: "", 33 | companyIndustry: "", 34 | }); 35 | 36 | // Animation states 37 | const [showExampleSuggestion, setShowExampleSuggestion] = useState(true); 38 | const [isExampleAnimating, setIsExampleAnimating] = useState(false); 39 | const [wasResearching, setWasResearching] = useState(false); 40 | 41 | // Refs for form fields for animation 42 | const formRef = useRef(null); 43 | const exampleRef = useRef(null); 44 | 45 | // Hide example suggestion when form is filled 46 | useEffect(() => { 47 | if (formData.companyName) { 48 | setShowExampleSuggestion(false); 49 | } else if (!isExampleAnimating) { 50 | setShowExampleSuggestion(true); 51 | } 52 | }, [formData.companyName, isExampleAnimating]); 53 | 54 | // Track research state changes to show example popup when research completes 55 | useEffect(() => { 56 | // If we were researching and now we're not, research just completed 57 | if (wasResearching && !isResearching) { 58 | // Add a slight delay to let animations complete 59 | setTimeout(() => { 60 | // Reset form fields to empty values 61 | setFormData({ 62 | companyName: "", 63 | companyUrl: "", 64 | companyHq: "", 65 | companyIndustry: "", 66 | }); 67 | 68 | // Show the example suggestion again 69 | setShowExampleSuggestion(true); 70 | }, 1000); 71 | } 72 | 73 | // Update tracking state 74 | setWasResearching(isResearching); 75 | }, [isResearching, wasResearching]); 76 | 77 | const handleSubmit = async (e: React.FormEvent) => { 78 | e.preventDefault(); 79 | await onSubmit(formData); 80 | }; 81 | 82 | const fillExampleData = (example: ExampleCompany) => { 83 | // Start animation 84 | setIsExampleAnimating(true); 85 | 86 | // Animate the suggestion moving into the form 87 | if (exampleRef.current && formRef.current) { 88 | const exampleRect = exampleRef.current.getBoundingClientRect(); 89 | const formRect = formRef.current.getBoundingClientRect(); 90 | 91 | // Calculate the distance to move 92 | const moveX = formRect.left + 20 - exampleRect.left; 93 | const moveY = formRect.top + 20 - exampleRect.top; 94 | 95 | // Apply animation 96 | exampleRef.current.style.transform = `translate(${moveX}px, ${moveY}px) scale(0.6)`; 97 | exampleRef.current.style.opacity = '0'; 98 | } 99 | 100 | // Fill in form data after a short delay for animation 101 | setTimeout(() => { 102 | const newFormData = { 103 | companyName: example.name, 104 | companyUrl: example.url, 105 | companyHq: example.hq, 106 | companyIndustry: example.industry 107 | }; 108 | 109 | // Update form data 110 | setFormData(newFormData); 111 | 112 | // Start research automatically (only if not already researching) 113 | if (!isResearching) { 114 | onSubmit(newFormData); 115 | } 116 | 117 | setIsExampleAnimating(false); 118 | }, 500); 119 | }; 120 | 121 | return ( 122 |
123 | {/* Example Suggestion */} 124 | 130 | 131 | {/* Main Form */} 132 |
133 |
134 |
135 | {/* Company Name */} 136 |
137 | 143 |
144 |
145 | 146 | 152 | setFormData((prev) => ({ 153 | ...prev, 154 | companyName: e.target.value, 155 | })) 156 | } 157 | className={`${glassStyle.input} transition-all duration-300 focus:border-[#468BFF]/50 focus:ring-1 focus:ring-[#468BFF]/50 group-hover:border-[#468BFF]/30 bg-white/80 backdrop-blur-sm text-lg py-4 pl-12 font-['DM_Sans']`} 158 | placeholder="Enter company name" 159 | /> 160 |
161 |
162 | 163 | {/* Company URL */} 164 |
165 | 171 |
172 |
173 | 174 | 179 | setFormData((prev) => ({ 180 | ...prev, 181 | companyUrl: e.target.value, 182 | })) 183 | } 184 | className={`${glassStyle.input} transition-all duration-300 focus:border-[#468BFF]/50 focus:ring-1 focus:ring-[#468BFF]/50 group-hover:border-[#468BFF]/30 bg-white/80 backdrop-blur-sm text-lg py-4 pl-12 font-['DM_Sans']`} 185 | placeholder="example.com" 186 | /> 187 |
188 |
189 | 190 | {/* Company HQ */} 191 |
192 | 198 | 201 | setFormData((prev) => ({ 202 | ...prev, 203 | companyHq: value, 204 | })) 205 | } 206 | className={`${glassStyle.input} transition-all duration-300 focus:border-[#468BFF]/50 focus:ring-1 focus:ring-[#468BFF]/50 group-hover:border-[#468BFF]/30 bg-white/80 backdrop-blur-sm text-lg py-4 pl-12 font-['DM_Sans']`} 207 | /> 208 |
209 | 210 | {/* Company Industry */} 211 |
212 | 218 |
219 |
220 | 221 | 226 | setFormData((prev) => ({ 227 | ...prev, 228 | companyIndustry: e.target.value, 229 | })) 230 | } 231 | className={`${glassStyle.input} transition-all duration-300 focus:border-[#468BFF]/50 focus:ring-1 focus:ring-[#468BFF]/50 group-hover:border-[#468BFF]/30 bg-white/80 backdrop-blur-sm text-lg py-4 pl-12 font-['DM_Sans']`} 232 | placeholder="e.g. Technology, Healthcare" 233 | /> 234 |
235 |
236 |
237 | 238 | 258 |
259 |
260 |
261 | ); 262 | }; 263 | 264 | export default ResearchForm; -------------------------------------------------------------------------------- /ui/src/components/ResearchQueries.tsx: -------------------------------------------------------------------------------- 1 | import React from 'react'; 2 | import { ChevronDown, ChevronUp } from 'lucide-react'; 3 | import { ResearchQueriesProps } from '../types'; 4 | 5 | const ResearchQueries: React.FC = ({ 6 | queries, 7 | streamingQueries, 8 | isExpanded, 9 | onToggleExpand, 10 | isResetting, 11 | glassStyle 12 | }) => { 13 | const glassCardStyle = `${glassStyle} rounded-2xl p-6`; 14 | const fadeInAnimation = "transition-all duration-300 ease-in-out"; 15 | 16 | return ( 17 |
20 |
24 |

25 | Generated Research Queries 26 |

27 | 34 |
35 | 36 |
39 |
40 | {['company', 'industry', 'financial', 'news'].map((category) => ( 41 |
42 |

43 | {category.charAt(0).toUpperCase() + category.slice(1)} Queries 44 |

45 |
46 | {/* Show streaming queries first */} 47 | {Object.entries(streamingQueries) 48 | .filter(([key]) => key.startsWith(category)) 49 | .map(([key, query]) => ( 50 |
51 | {query.text} 52 | | 53 |
54 | ))} 55 | {/* Then show completed queries */} 56 | {queries 57 | .filter((q) => q.category.startsWith(category)) 58 | .map((query, idx) => ( 59 |
60 | {query.text} 61 |
62 | ))} 63 |
64 |
65 | ))} 66 |
67 |
68 | 69 | {!isExpanded && ( 70 |
71 | {queries.length} queries generated across {['company', 'industry', 'financial', 'news'].length} categories 72 |
73 | )} 74 |
75 | ); 76 | }; 77 | 78 | export default ResearchQueries; -------------------------------------------------------------------------------- /ui/src/components/ResearchReport.tsx: -------------------------------------------------------------------------------- 1 | import React from 'react'; 2 | import ReactMarkdown from "react-markdown"; 3 | import rehypeRaw from 'rehype-raw'; 4 | import remarkGfm from 'remark-gfm'; 5 | import { Check, Copy, Download, Loader2 } from 'lucide-react'; 6 | import { GlassStyle, AnimationStyle } from '../types'; 7 | 8 | interface ResearchReportProps { 9 | output: { 10 | summary: string; 11 | details: { 12 | report: string; 13 | }; 14 | } | null; 15 | isResetting: boolean; 16 | glassStyle: GlassStyle; 17 | fadeInAnimation: AnimationStyle; 18 | loaderColor: string; 19 | isGeneratingPdf: boolean; 20 | isCopied: boolean; 21 | onCopyToClipboard: () => void; 22 | onGeneratePdf: () => void; 23 | } 24 | 25 | const ResearchReport: React.FC = ({ 26 | output, 27 | isResetting, 28 | glassStyle, 29 | fadeInAnimation, 30 | loaderColor, 31 | isGeneratingPdf, 32 | isCopied, 33 | onCopyToClipboard, 34 | onGeneratePdf 35 | }) => { 36 | if (!output || !output.details) return null; 37 | 38 | return ( 39 |
42 |
43 | {output?.details?.report && ( 44 | <> 45 | 55 | 72 | 73 | )} 74 |
75 |
76 |
77 | ( 82 |
83 | ), 84 | h1: ({node, children, ...props}) => { 85 | const text = String(children); 86 | const isFirstH1 = text.includes("Research Report"); 87 | const isReferences = text.includes("References"); 88 | return ( 89 |
90 |

94 | {children} 95 |

96 | {isReferences && ( 97 |
98 | )} 99 |
100 | ); 101 | }, 102 | h2: ({node, ...props}) => ( 103 |

104 | ), 105 | h3: ({node, ...props}) => ( 106 |

107 | ), 108 | p: ({node, children, ...props}) => { 109 | const text = String(children); 110 | const isSubsectionHeader = ( 111 | text.includes('\n') === false && 112 | text.length < 50 && 113 | (text.endsWith(':') || /^[A-Z][A-Za-z\s\/]+$/.test(text)) 114 | ); 115 | 116 | if (isSubsectionHeader) { 117 | return ( 118 |

119 | {text.endsWith(':') ? text.slice(0, -1) : text} 120 |

121 | ); 122 | } 123 | 124 | const isBulletLabel = text.startsWith('•') && text.includes(':'); 125 | if (isBulletLabel) { 126 | const [label, content] = text.split(':'); 127 | return ( 128 |
129 | 130 | {label.replace('•', '').trim()}: 131 | 132 | {content} 133 |
134 | ); 135 | } 136 | 137 | const urlRegex = /(https?:\/\/[^\s<>"]+)/g; 138 | if (urlRegex.test(text)) { 139 | const parts = text.split(urlRegex); 140 | return ( 141 |

142 | {parts.map((part, i) => 143 | urlRegex.test(part) ? ( 144 | 151 | {part} 152 | 153 | ) : part 154 | )} 155 |

156 | ); 157 | } 158 | 159 | return

{children}

; 160 | }, 161 | ul: ({node, ...props}) => ( 162 |
181 |
182 |
183 | ); 184 | }; 185 | 186 | export default ResearchReport; -------------------------------------------------------------------------------- /ui/src/components/ResearchStatus.tsx: -------------------------------------------------------------------------------- 1 | import React from 'react'; 2 | import { Loader2, CheckCircle2, XCircle } from 'lucide-react'; 3 | import { ResearchStatusProps } from '../types'; 4 | 5 | const ResearchStatus: React.FC = ({ 6 | status, 7 | error, 8 | isComplete, 9 | currentPhase, 10 | isResetting, 11 | glassStyle, 12 | loaderColor, 13 | statusRef 14 | }) => { 15 | const glassCardStyle = `${glassStyle.base} rounded-2xl p-6`; 16 | const fadeInAnimation = "transition-all duration-300 ease-in-out"; 17 | 18 | if (!status) return null; 19 | 20 | return ( 21 |
25 |
26 |
27 | {error ? ( 28 |
29 | 30 |
31 | ) : status?.step === "Complete" || isComplete ? ( 32 |
33 | 34 |
35 | ) : currentPhase === 'search' || currentPhase === 'enrichment' || (status?.step === "Processing" && status.message.includes("scraping")) ? ( 36 |
37 | 38 |
39 | ) : currentPhase === 'briefing' ? ( 40 |
41 | 42 |
43 | ) : ( 44 |
45 | 46 |
47 | )} 48 |
49 |
50 |

{status.step}

51 |

52 | {error || status.message} 53 |

54 |
55 |
56 |
57 | ); 58 | }; 59 | 60 | export default ResearchStatus; -------------------------------------------------------------------------------- /ui/src/components/index.ts: -------------------------------------------------------------------------------- 1 | // Export all components 2 | export { default as Header } from './Header'; 3 | export { default as LocationInput } from './LocationInput'; 4 | export { default as ResearchStatus } from './ResearchStatus'; 5 | export { default as ResearchReport } from './ResearchReport'; 6 | export { default as ResearchForm } from './ResearchForm'; -------------------------------------------------------------------------------- /ui/src/env.d.ts: -------------------------------------------------------------------------------- 1 | /// 2 | 3 | interface ImportMetaEnv { 4 | readonly VITE_API_URL: string; 5 | readonly VITE_WS_URL: string; 6 | readonly MODE: string; 7 | readonly DEV: boolean; 8 | readonly PROD: boolean; 9 | } 10 | 11 | interface ImportMeta { 12 | readonly env: ImportMetaEnv; 13 | } 14 | -------------------------------------------------------------------------------- /ui/src/index.css: -------------------------------------------------------------------------------- 1 | @import url('https://fonts.googleapis.com/css2?family=DM+Sans:opsz,wght@9..40,400;9..40,500;9..40,600;9..40,700&display=swap'); 2 | 3 | @tailwind base; 4 | @tailwind components; 5 | @tailwind utilities; 6 | 7 | @layer base { 8 | body { 9 | @apply bg-gray-900; 10 | font-family: "DM Sans", sans-serif; 11 | -webkit-font-smoothing: antialiased; 12 | text-rendering: optimizeLegibility; 13 | background-image: radial-gradient(circle at 1px 1px, rgba(255, 255, 255, 0.05) 1px, transparent 0); 14 | background-size: 24px 24px; 15 | background-position: center center; 16 | } 17 | 18 | h1 { 19 | font-family: "DM Sans", sans-serif; 20 | font-size: 48px; 21 | font-style: normal; 22 | font-variation-settings: normal; 23 | font-weight: 500; 24 | letter-spacing: -1px; 25 | text-rendering: optimizeLegibility; 26 | unicode-bidi: isolate; 27 | -webkit-font-smoothing: antialiased; 28 | } 29 | 30 | h2, h3, h4, h5, h6 { 31 | font-family: "DM Sans", sans-serif; 32 | font-weight: 500; 33 | letter-spacing: -0.5px; 34 | } 35 | 36 | p, span, div, li, a { 37 | font-family: "DM Sans", sans-serif; 38 | font-weight: 400; 39 | } 40 | 41 | input { 42 | font-family: "DM Sans", sans-serif; 43 | } 44 | 45 | button { 46 | font-family: "DM Sans", sans-serif; 47 | } 48 | 49 | select { 50 | font-family: "DM Sans", sans-serif; 51 | } 52 | } 53 | 54 | @layer components { 55 | .glass { 56 | @apply bg-gray-900/40 backdrop-blur-md border border-gray-700/50; 57 | } 58 | } -------------------------------------------------------------------------------- /ui/src/main.tsx: -------------------------------------------------------------------------------- 1 | import { StrictMode } from 'react'; 2 | import { createRoot } from 'react-dom/client'; 3 | import App from './App.tsx'; 4 | import './index.css'; 5 | 6 | createRoot(document.getElementById('root')!).render( 7 | 8 | 9 | 10 | ); 11 | -------------------------------------------------------------------------------- /ui/src/styles/index.ts: -------------------------------------------------------------------------------- 1 | export const colorAnimation = ` 2 | @keyframes colorTransition { 3 | 0% { stroke: #468BFF; } 4 | 15% { stroke: #8FBCFA; } 5 | 30% { stroke: #468BFF; } 6 | 45% { stroke: #FE363B; } 7 | 60% { stroke: #FF9A9D; } 8 | 75% { stroke: #FDBB11; } 9 | 90% { stroke: #F6D785; } 10 | 100% { stroke: #468BFF; } 11 | } 12 | 13 | .animate-colors { 14 | animation: colorTransition 8s ease-in-out infinite; 15 | animation-fill-mode: forwards; 16 | } 17 | 18 | .animate-spin { 19 | animation: spin 1s linear infinite; 20 | } 21 | 22 | @keyframes spin { 23 | from { 24 | transform: rotate(0deg); 25 | } 26 | to { 27 | transform: rotate(360deg); 28 | } 29 | } 30 | 31 | /* Add transition for smoother color changes */ 32 | .loader-icon { 33 | transition: stroke 1s ease-in-out; 34 | } 35 | `; 36 | 37 | export const dmSansStyle = ` 38 | @import url('https://fonts.googleapis.com/css2?family=DM+Sans:opsz,wght@9..40,400;9..40,500;9..40,600;9..40,700&display=swap'); 39 | 40 | /* Apply DM Sans globally */ 41 | body { 42 | font-family: 'DM Sans', sans-serif; 43 | } 44 | `; 45 | 46 | export const glassStyle = { 47 | base: "backdrop-filter backdrop-blur-lg bg-white/80 border border-gray-200 shadow-xl", 48 | card: "backdrop-filter backdrop-blur-lg bg-white/80 border border-gray-200 shadow-xl rounded-2xl p-6", 49 | input: "backdrop-filter backdrop-blur-lg bg-white/80 border border-gray-200 shadow-xl pl-10 w-full rounded-lg py-3 px-4 text-gray-900 focus:border-[#468BFF]/50 focus:outline-none focus:ring-1 focus:ring-[#468BFF]/50 placeholder-gray-400 bg-white/80 shadow-none" 50 | }; 51 | 52 | export const fadeInAnimation = { 53 | fadeIn: "transition-all duration-300 ease-in-out", 54 | writing: "animate-pulse", 55 | colorTransition: colorAnimation 56 | }; -------------------------------------------------------------------------------- /ui/src/types/index.ts: -------------------------------------------------------------------------------- 1 | export type ResearchStatusType = { 2 | step: string; 3 | message: string; 4 | }; 5 | 6 | export type ResearchOutput = { 7 | summary: string; 8 | details: { 9 | report: string; 10 | }; 11 | }; 12 | 13 | export type DocCount = { 14 | initial: number; 15 | kept: number; 16 | }; 17 | 18 | export type DocCounts = { 19 | [key: string]: DocCount; 20 | }; 21 | 22 | export type EnrichmentCounts = { 23 | company: { total: number; enriched: number }; 24 | industry: { total: number; enriched: number }; 25 | financial: { total: number; enriched: number }; 26 | news: { total: number; enriched: number }; 27 | }; 28 | 29 | export type ResearchState = { 30 | status: string; 31 | message: string; 32 | queries: Array<{ 33 | text: string; 34 | number: number; 35 | category: string; 36 | }>; 37 | streamingQueries: { 38 | [key: string]: { 39 | text: string; 40 | number: number; 41 | category: string; 42 | isComplete: boolean; 43 | }; 44 | }; 45 | briefingStatus: { 46 | company: boolean; 47 | industry: boolean; 48 | financial: boolean; 49 | news: boolean; 50 | }; 51 | enrichmentCounts?: EnrichmentCounts; 52 | docCounts?: DocCounts; 53 | }; 54 | 55 | export type GlassStyle = { 56 | base: string; 57 | card: string; 58 | input: string; 59 | }; 60 | 61 | export type AnimationStyle = { 62 | fadeIn: string; 63 | writing: string; 64 | colorTransition: string; 65 | }; 66 | 67 | export type ResearchStatusProps = { 68 | status: ResearchStatusType | null; 69 | error: string | null; 70 | isComplete: boolean; 71 | currentPhase: 'search' | 'enrichment' | 'briefing' | 'complete' | null; 72 | isResetting: boolean; 73 | glassStyle: GlassStyle; 74 | loaderColor: string; 75 | statusRef: React.RefObject; 76 | }; -------------------------------------------------------------------------------- /ui/src/utils/constants.ts: -------------------------------------------------------------------------------- 1 | // API and WebSocket URLs 2 | export const API_URL = import.meta.env.VITE_API_URL; 3 | export const WS_URL = import.meta.env.VITE_WS_URL; 4 | 5 | // WebSocket Configuration 6 | export const MAX_RECONNECT_ATTEMPTS = 3; 7 | export const RECONNECT_DELAY = 2000; // 2 seconds 8 | 9 | // Animation Styles 10 | export const writingAnimation = ` 11 | @keyframes writing { 12 | 0% { 13 | stroke-dashoffset: 1000; 14 | } 15 | 100% { 16 | stroke-dashoffset: 0; 17 | } 18 | } 19 | 20 | .animate-writing { 21 | animation: writing 1.5s linear infinite; 22 | } 23 | `; 24 | 25 | export const colorAnimation = ` 26 | @keyframes colorTransition { 27 | 0% { stroke: #468BFF; } 28 | 15% { stroke: #8FBCFA; } 29 | 30% { stroke: #468BFF; } 30 | 45% { stroke: #FE363B; } 31 | 60% { stroke: #FF9A9D; } 32 | 75% { stroke: #FDBB11; } 33 | 90% { stroke: #F6D785; } 34 | 100% { stroke: #468BFF; } 35 | } 36 | 37 | .animate-colors { 38 | animation: colorTransition 8s ease-in-out infinite; 39 | animation-fill-mode: forwards; 40 | } 41 | 42 | .animate-spin { 43 | animation: spin 1s linear infinite; 44 | } 45 | 46 | @keyframes spin { 47 | from { 48 | transform: rotate(0deg); 49 | } 50 | to { 51 | transform: rotate(360deg); 52 | } 53 | } 54 | 55 | /* Add transition for smoother color changes */ 56 | .loader-icon { 57 | transition: stroke 1s ease-in-out; 58 | } 59 | `; 60 | 61 | export const dmSansStyle = ` 62 | @import url('https://fonts.googleapis.com/css2?family=DM+Sans:opsz,wght@9..40,400;9..40,500;9..40,600;9..40,700&display=swap'); 63 | 64 | /* Apply DM Sans globally */ 65 | body { 66 | font-family: 'DM Sans', sans-serif; 67 | } 68 | `; 69 | 70 | // Color Palette 71 | export const colors = { 72 | primary: { 73 | blue: "#468BFF", 74 | lightBlue: "#8FBCFA", 75 | red: "#FE363B", 76 | lightRed: "#FF9A9D", 77 | yellow: "#FDBB11", 78 | lightYellow: "#F6D785" 79 | } 80 | }; 81 | 82 | // Animation Durations 83 | export const ANIMATION_DURATIONS = { 84 | reset: 300, 85 | collapse: 1000, 86 | briefingCollapse: 2000 87 | }; -------------------------------------------------------------------------------- /ui/src/utils/handlers.ts: -------------------------------------------------------------------------------- 1 | import { API_URL } from './constants'; 2 | import { ResearchOutput, ResearchState, ResearchStatusType } from '../types'; 3 | 4 | export const handleGeneratePdf = async ( 5 | output: ResearchOutput | null, 6 | originalCompanyName: string, 7 | setIsGeneratingPdf: (value: boolean) => void, 8 | setError: (error: string | null) => void, 9 | isGeneratingPdf: boolean 10 | ) => { 11 | if (!output || isGeneratingPdf) return; 12 | 13 | setIsGeneratingPdf(true); 14 | try { 15 | console.log("Generating PDF with company name:", originalCompanyName); 16 | const response = await fetch(`${API_URL}/generate-pdf`, { 17 | method: 'POST', 18 | headers: { 19 | 'Content-Type': 'application/json', 20 | }, 21 | body: JSON.stringify({ 22 | report_content: output.details.report, 23 | company_name: originalCompanyName || 'research_report' 24 | }), 25 | }); 26 | 27 | if (!response.ok) { 28 | throw new Error('Failed to generate PDF'); 29 | } 30 | 31 | // Get the blob from the response 32 | const blob = await response.blob(); 33 | 34 | // Create a URL for the blob 35 | const url = window.URL.createObjectURL(blob); 36 | 37 | // Create a temporary link element 38 | const link = document.createElement('a'); 39 | link.href = url; 40 | link.download = `${originalCompanyName || 'research_report'}.pdf`; 41 | 42 | // Append to body, click, and remove 43 | document.body.appendChild(link); 44 | link.click(); 45 | document.body.removeChild(link); 46 | 47 | // Clean up the URL 48 | window.URL.revokeObjectURL(url); 49 | 50 | } catch (error) { 51 | console.error('Error generating PDF:', error); 52 | setError(error instanceof Error ? error.message : 'Failed to generate PDF'); 53 | } finally { 54 | setIsGeneratingPdf(false); 55 | } 56 | }; 57 | 58 | export const handleCopyToClipboard = async ( 59 | output: ResearchOutput | null, 60 | setIsCopied: (value: boolean) => void, 61 | setError: (error: string | null) => void 62 | ) => { 63 | if (!output?.details?.report) return; 64 | 65 | try { 66 | await navigator.clipboard.writeText(output.details.report); 67 | setIsCopied(true); 68 | setTimeout(() => setIsCopied(false), 2000); // Reset after 2 seconds 69 | } catch (err) { 70 | console.error('Failed to copy text: ', err); 71 | setError('Failed to copy to clipboard'); 72 | } 73 | }; 74 | 75 | export const checkForFinalReport = async ( 76 | jobId: string, 77 | setOutput: (output: ResearchOutput | null) => void, 78 | setStatus: (status: ResearchStatusType | null) => void, 79 | setIsComplete: (value: boolean) => void, 80 | setIsResearching: (value: boolean) => void, 81 | setCurrentPhase: (phase: 'search' | 'enrichment' | 'briefing' | 'complete' | null) => void, 82 | setHasFinalReport: (value: boolean) => void, 83 | pollingIntervalRef: React.MutableRefObject 84 | ) => { 85 | try { 86 | const response = await fetch(`${API_URL}/research/status/${jobId}`); 87 | if (!response.ok) throw new Error('Failed to fetch status'); 88 | 89 | const data = await response.json(); 90 | 91 | if (data.status === "completed" && data.result?.report) { 92 | setOutput({ 93 | summary: "", 94 | details: { 95 | report: data.result.report, 96 | }, 97 | }); 98 | setStatus({ 99 | step: "Complete", 100 | message: "Research completed successfully" 101 | }); 102 | setIsComplete(true); 103 | setIsResearching(false); 104 | setCurrentPhase('complete'); 105 | setHasFinalReport(true); 106 | 107 | // Clear polling interval 108 | if (pollingIntervalRef.current) { 109 | clearInterval(pollingIntervalRef.current); 110 | pollingIntervalRef.current = null; 111 | } 112 | } 113 | } catch (error) { 114 | console.error('Error checking final report:', error); 115 | } 116 | }; 117 | 118 | export const resetResearch = ( 119 | setStatus: (status: ResearchStatusType | null) => void, 120 | setOutput: (output: ResearchOutput | null) => void, 121 | setError: (error: string | null) => void, 122 | setIsComplete: (value: boolean) => void, 123 | setResearchState: (state: ResearchState) => void, 124 | setPdfUrl: (url: string | null) => void, 125 | setCurrentPhase: (phase: 'search' | 'enrichment' | 'briefing' | 'complete' | null) => void, 126 | setIsSearchPhase: (value: boolean) => void, 127 | setShouldShowQueries: (value: boolean) => void, 128 | setIsQueriesExpanded: (value: boolean) => void, 129 | setIsBriefingExpanded: (value: boolean) => void, 130 | setIsEnrichmentExpanded: (value: boolean) => void, 131 | setIsResetting: (value: boolean) => void, 132 | setHasScrolledToStatus: (value: boolean) => void 133 | ) => { 134 | setIsResetting(true); 135 | 136 | // Use setTimeout to create a smooth transition 137 | setTimeout(() => { 138 | setStatus(null); 139 | setOutput(null); 140 | setError(null); 141 | setIsComplete(false); 142 | setResearchState({ 143 | status: "idle", 144 | message: "", 145 | queries: [], 146 | streamingQueries: {}, 147 | briefingStatus: { 148 | company: false, 149 | industry: false, 150 | financial: false, 151 | news: false 152 | } 153 | }); 154 | setPdfUrl(null); 155 | setCurrentPhase(null); 156 | setIsSearchPhase(false); 157 | setShouldShowQueries(false); 158 | setIsQueriesExpanded(true); 159 | setIsBriefingExpanded(true); 160 | setIsEnrichmentExpanded(true); 161 | setIsResetting(false); 162 | setHasScrolledToStatus(false); // Reset scroll flag when resetting research 163 | }, 300); // Match this with CSS transition duration 164 | }; -------------------------------------------------------------------------------- /ui/src/vite-env.d.ts: -------------------------------------------------------------------------------- 1 | /// 2 | -------------------------------------------------------------------------------- /ui/tailwind.config.js: -------------------------------------------------------------------------------- 1 | /** @type {import('tailwindcss').Config} */ 2 | export default { 3 | content: ["./index.html", "./src/**/*.{js,ts,jsx,tsx}"], 4 | theme: { 5 | extend: {}, 6 | }, 7 | plugins: [], 8 | }; 9 | -------------------------------------------------------------------------------- /ui/tsconfig.app.json: -------------------------------------------------------------------------------- 1 | { 2 | "compilerOptions": { 3 | "target": "ES2020", 4 | "useDefineForClassFields": true, 5 | "lib": ["ES2020", "DOM", "DOM.Iterable"], 6 | "module": "ESNext", 7 | "skipLibCheck": true, 8 | 9 | /* Bundler mode */ 10 | "moduleResolution": "bundler", 11 | "allowImportingTsExtensions": true, 12 | "isolatedModules": true, 13 | "moduleDetection": "force", 14 | "noEmit": true, 15 | "jsx": "react-jsx", 16 | 17 | /* Linting */ 18 | "strict": true, 19 | "noUnusedLocals": true, 20 | "noUnusedParameters": true, 21 | "noFallthroughCasesInSwitch": true 22 | }, 23 | "include": ["src"] 24 | } 25 | -------------------------------------------------------------------------------- /ui/tsconfig.json: -------------------------------------------------------------------------------- 1 | { 2 | "files": [], 3 | "references": [ 4 | { "path": "./tsconfig.app.json" }, 5 | { "path": "./tsconfig.node.json" } 6 | ] 7 | } 8 | -------------------------------------------------------------------------------- /ui/tsconfig.node.json: -------------------------------------------------------------------------------- 1 | { 2 | "compilerOptions": { 3 | "composite": true, 4 | "skipLibCheck": true, 5 | "module": "ESNext", 6 | "moduleResolution": "bundler", 7 | "allowSyntheticDefaultImports": true 8 | }, 9 | "include": ["vite.config.ts"] 10 | } 11 | -------------------------------------------------------------------------------- /ui/vercel.json: -------------------------------------------------------------------------------- 1 | { 2 | "rewrites": [ 3 | { 4 | "source": "/(.*)", 5 | "destination": "/index.html" 6 | } 7 | ] 8 | } 9 | -------------------------------------------------------------------------------- /ui/vite.config.ts: -------------------------------------------------------------------------------- 1 | import { defineConfig } from "vite"; 2 | import react from "@vitejs/plugin-react"; 3 | 4 | // https://vitejs.dev/config/ 5 | export default defineConfig({ 6 | plugins: [react()], 7 | optimizeDeps: { 8 | exclude: ["lucide-react"], 9 | }, 10 | build: { 11 | outDir: "dist", 12 | sourcemap: true, 13 | }, 14 | server: { 15 | port: 5174, 16 | strictPort: true, 17 | host: true, 18 | proxy: { 19 | '/api': { 20 | target: 'http://tavily-company-research.eba-h6x8kkzc.us-east-1.elasticbeanstalk.com', 21 | changeOrigin: true, 22 | secure: false, 23 | rewrite: (path) => path.replace(/^\/api/, '') 24 | } 25 | } 26 | }, 27 | }); 28 | --------------------------------------------------------------------------------