├── .dockerignore
├── .github
    └── dependabot.yml
├── .gitignore
├── Dockerfile
├── LICENSE
├── README.es.md
├── README.fr.md
├── README.jp.md
├── README.kr.md
├── README.md
├── README.zh.md
├── application.py
├── backend
    ├── __init__.py
    ├── classes
    │   ├── __init__.py
    │   └── state.py
    ├── graph.py
    ├── nodes
    │   ├── __init__.py
    │   ├── briefing.py
    │   ├── collector.py
    │   ├── curator.py
    │   ├── editor.py
    │   ├── enricher.py
    │   ├── grounding.py
    │   └── researchers
    │   │   ├── __init__.py
    │   │   ├── base.py
    │   │   ├── company.py
    │   │   ├── financial.py
    │   │   ├── industry.py
    │   │   └── news.py
    ├── services
    │   ├── mongodb.py
    │   ├── pdf_service.py
    │   └── websocket_manager.py
    └── utils
    │   ├── __init__.py
    │   ├── references.py
    │   └── utils.py
├── docker-compose.yml
├── langgraph.json
├── langgraph_entry.py
├── package-lock.json
├── requirements.txt
├── setup.sh
├── static
    ├── agent-flow.png
    ├── demo.mp4
    ├── ui-1.png
    └── ui-2.png
└── ui
    ├── .env.development.example
    ├── .gitignore
    ├── eslint.config.js
    ├── index.html
    ├── package-lock.json
    ├── package.json
    ├── postcss.config.js
    ├── public
        ├── favicon.ico
        └── tavilylogo.png
    ├── src
        ├── App.tsx
        ├── components
        │   ├── CurationExtraction.tsx
        │   ├── ExamplePopup.tsx
        │   ├── Header.tsx
        │   ├── LocationInput.tsx
        │   ├── ResearchBriefings.tsx
        │   ├── ResearchForm.tsx
        │   ├── ResearchQueries.tsx
        │   ├── ResearchReport.tsx
        │   ├── ResearchStatus.tsx
        │   └── index.ts
        ├── env.d.ts
        ├── index.css
        ├── main.tsx
        ├── styles
        │   └── index.ts
        ├── types
        │   └── index.ts
        ├── utils
        │   ├── constants.ts
        │   └── handlers.ts
        └── vite-env.d.ts
    ├── tailwind.config.js
    ├── tsconfig.app.json
    ├── tsconfig.json
    ├── tsconfig.node.json
    ├── vercel.json
    └── vite.config.ts


/.dockerignore:
--------------------------------------------------------------------------------
 1 | # Version control
 2 | .git
 3 | .gitignore
 4 | .gitattributes
 5 | 
 6 | # Environment files
 7 | .env
 8 | .env.*
 9 | *.env
10 | 
11 | # Python
12 | __pycache__/
13 | *.py[cod]
14 | *.so
15 | .Python
16 | *.egg
17 | *.egg-info/
18 | .eggs/
19 | *.pyc
20 | .pytest_cache/
21 | .coverage
22 | htmlcov/
23 | .tox/
24 | .venv/
25 | venv/
26 | 
27 | # Node.js
28 | ui/node_modules/
29 | ui/.npm
30 | ui/npm-debug.log*
31 | ui/yarn-debug.log*
32 | ui/yarn-error.log*
33 | ui/dist/
34 | 
35 | # IDE
36 | .idea/
37 | .vscode/
38 | *.swp
39 | *.swo
40 | 
41 | # OS
42 | .DS_Store
43 | Thumbs.db
44 | 
45 | # Project specific
46 | reports/*
47 | .langgraph/
48 | .elasticbeanstalk/
49 | README.md
50 | LICENSE
51 | *.md
52 | *.log 


--------------------------------------------------------------------------------
/.github/dependabot.yml:
--------------------------------------------------------------------------------
 1 | # To get started with Dependabot version updates, you'll need to specify which
 2 | # package ecosystems to update and where the package manifests are located.
 3 | # Please see the documentation for all configuration options:
 4 | # https://docs.github.com/code-security/dependabot/dependabot-version-updates/configuration-options-for-the-dependabot.yml-file
 5 | 
 6 | version: 2
 7 | updates:
 8 |   - package-ecosystem: "" # See documentation for possible values
 9 |     directory: "/" # Location of package manifests
10 |     schedule:
11 |       interval: "weekly"
12 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | # Environment and secrets
 2 | .env
 3 | reports/*
 4 | 
 5 | # Python
 6 | __pycache__/
 7 | *.py[cod]
 8 | *.egg-info/
 9 | 
10 | # Virtual Environment
11 | .venv/
12 | 
13 | # IDE and OS
14 | .vscode/
15 | .DS_Store
16 | 
17 | # Frontend
18 | node_modules/
19 | 
20 | # LangGraph
21 | .langgraph/
22 | 
23 | # Elastic Beanstalk Files
24 | .elasticbeanstalk/*
25 | !.elasticbeanstalk/*.cfg.yml
26 | !.elasticbeanstalk/*.global.yml
27 | Procfile
28 | .vercel
29 | .ebextensions


--------------------------------------------------------------------------------
/Dockerfile:
--------------------------------------------------------------------------------
 1 | # Stage 1: Build Frontend
 2 | FROM node:20-slim AS frontend-builder
 3 | WORKDIR /app/ui
 4 | COPY ui/package*.json ./
 5 | RUN npm install
 6 | COPY ui/ ./
 7 | RUN npm run build
 8 | 
 9 | # Stage 2: Build Backend
10 | FROM python:3.11-slim AS backend-builder
11 | WORKDIR /app
12 | COPY requirements.txt .
13 | RUN pip install --no-cache-dir -r requirements.txt
14 | 
15 | # Stage 3: Final Image
16 | FROM python:3.11-slim
17 | WORKDIR /app
18 | 
19 | # Install system dependencies
20 | RUN apt-get update && apt-get install -y --no-install-recommends \
21 |     curl \
22 |     && rm -rf /var/lib/apt/lists/*
23 | 
24 | # Copy backend
25 | COPY --from=backend-builder /usr/local/lib/python3.11/site-packages/ /usr/local/lib/python3.11/site-packages/
26 | COPY backend/ ./backend/
27 | COPY application.py .
28 | 
29 | # Copy frontend build
30 | COPY --from=frontend-builder /app/ui/dist/ ./ui/dist/
31 | 
32 | # Create reports directory
33 | RUN mkdir -p reports
34 | 
35 | # Set environment variables
36 | ENV PYTHONUNBUFFERED=1
37 | ENV PORT=8000
38 | 
39 | # Expose the port
40 | EXPOSE 8000
41 | 
42 | # Create a non-root user
43 | RUN useradd -m -u 1000 appuser
44 | RUN chown -R appuser:appuser /app
45 | USER appuser
46 | 
47 | # Start command
48 | CMD ["python", "-m", "uvicorn", "application:app", "--host", "0.0.0.0", "--port", "8000"] 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
  1 |                                  Apache License
  2 |                            Version 2.0, January 2004
  3 |                         http://www.apache.org/licenses/
  4 | 
  5 |    TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
  6 | 
  7 |    1. Definitions.
  8 | 
  9 |       "License" shall mean the terms and conditions for use, reproduction,
 10 |       and distribution as defined by Sections 1 through 9 of this document.
 11 | 
 12 |       "Licensor" shall mean the copyright owner or entity authorized by
 13 |       the copyright owner that is granting the License.
 14 | 
 15 |       "Legal Entity" shall mean the union of the acting entity and all
 16 |       other entities that control, are controlled by, or are under common
 17 |       control with that entity. For the purposes of this definition,
 18 |       "control" means (i) the power, direct or indirect, to cause the
 19 |       direction or management of such entity, whether by contract or
 20 |       otherwise, or (ii) ownership of fifty percent (50%) or more of the
 21 |       outstanding shares, or (iii) beneficial ownership of such entity.
 22 | 
 23 |       "You" (or "Your") shall mean an individual or Legal Entity
 24 |       exercising permissions granted by this License.
 25 | 
 26 |       "Source" form shall mean the preferred form for making modifications,
 27 |       including but not limited to software source code, documentation
 28 |       source, and configuration files.
 29 | 
 30 |       "Object" form shall mean any form resulting from mechanical
 31 |       transformation or translation of a Source form, including but
 32 |       not limited to compiled object code, generated documentation,
 33 |       and conversions to other media types.
 34 | 
 35 |       "Work" shall mean the work of authorship, whether in Source or
 36 |       Object form, made available under the License, as indicated by a
 37 |       copyright notice that is included in or attached to the work
 38 |       (an example is provided in the Appendix below).
 39 | 
 40 |       "Derivative Works" shall mean any work, whether in Source or Object
 41 |       form, that is based on (or derived from) the Work and for which the
 42 |       editorial revisions, annotations, elaborations, or other modifications
 43 |       represent, as a whole, an original work of authorship. For the purposes
 44 |       of this License, Derivative Works shall not include works that remain
 45 |       separable from, or merely link (or bind by name) to the interfaces of,
 46 |       the Work and Derivative Works thereof.
 47 | 
 48 |       "Contribution" shall mean any work of authorship, including
 49 |       the original version of the Work and any modifications or additions
 50 |       to that Work or Derivative Works thereof, that is intentionally
 51 |       submitted to Licensor for inclusion in the Work by the copyright owner
 52 |       or by an individual or Legal Entity authorized to submit on behalf of
 53 |       the copyright owner. For the purposes of this definition, "submitted"
 54 |       means any form of electronic, verbal, or written communication sent
 55 |       to the Licensor or its representatives, including but not limited to
 56 |       communication on electronic mailing lists, source code control systems,
 57 |       and issue tracking systems that are managed by, or on behalf of, the
 58 |       Licensor for the purpose of discussing and improving the Work, but
 59 |       excluding communication that is conspicuously marked or otherwise
 60 |       designated in writing by the copyright owner as "Not a Contribution."
 61 | 
 62 |       "Contributor" shall mean Licensor and any individual or Legal Entity
 63 |       on behalf of whom a Contribution has been received by Licensor and
 64 |       subsequently incorporated within the Work.
 65 | 
 66 |    2. Grant of Copyright License. Subject to the terms and conditions of
 67 |       this License, each Contributor hereby grants to You a perpetual,
 68 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 69 |       copyright license to reproduce, prepare Derivative Works of,
 70 |       publicly display, publicly perform, sublicense, and distribute the
 71 |       Work and such Derivative Works in Source or Object form.
 72 | 
 73 |    3. Grant of Patent License. Subject to the terms and conditions of
 74 |       this License, each Contributor hereby grants to You a perpetual,
 75 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 76 |       (except as stated in this section) patent license to make, have made,
 77 |       use, offer to sell, sell, import, and otherwise transfer the Work,
 78 |       where such license applies only to those patent claims licensable
 79 |       by such Contributor that are necessarily infringed by their
 80 |       Contribution(s) alone or by combination of their Contribution(s)
 81 |       with the Work to which such Contribution(s) was submitted. If You
 82 |       institute patent litigation against any entity (including a
 83 |       cross-claim or counterclaim in a lawsuit) alleging that the Work
 84 |       or a Contribution incorporated within the Work constitutes direct
 85 |       or contributory patent infringement, then any patent licenses
 86 |       granted to You under this License for that Work shall terminate
 87 |       as of the date such litigation is filed.
 88 | 
 89 |    4. Redistribution. You may reproduce and distribute copies of the
 90 |       Work or Derivative Works thereof in any medium, with or without
 91 |       modifications, and in Source or Object form, provided that You
 92 |       meet the following conditions:
 93 | 
 94 |       (a) You must give any other recipients of the Work or
 95 |           Derivative Works a copy of this License; and
 96 | 
 97 |       (b) You must cause any modified files to carry prominent notices
 98 |           stating that You changed the files; and
 99 | 
100 |       (c) You must retain, in the Source form of any Derivative Works
101 |           that You distribute, all copyright, patent, trademark, and
102 |           attribution notices from the Source form of the Work,
103 |           excluding those notices that do not pertain to any part of
104 |           the Derivative Works; and
105 | 
106 |       (d) If the Work includes a "NOTICE" text file as part of its
107 |           distribution, then any Derivative Works that You distribute must
108 |           include a readable copy of the attribution notices contained
109 |           within such NOTICE file, excluding those notices that do not
110 |           pertain to any part of the Derivative Works, in at least one
111 |           of the following places: within a NOTICE text file distributed
112 |           as part of the Derivative Works; within the Source form or
113 |           documentation, if provided along with the Derivative Works; or,
114 |           within a display generated by the Derivative Works, if and
115 |           wherever such third-party notices normally appear. The contents
116 |           of the NOTICE file are for informational purposes only and
117 |           do not modify the License. You may add Your own attribution
118 |           notices within Derivative Works that You distribute, alongside
119 |           or as an addendum to the NOTICE text from the Work, provided
120 |           that such additional attribution notices cannot be construed
121 |           as modifying the License.
122 | 
123 |       You may add Your own copyright statement to Your modifications and
124 |       may provide additional or different license terms and conditions
125 |       for use, reproduction, or distribution of Your modifications, or
126 |       for any such Derivative Works as a whole, provided Your use,
127 |       reproduction, and distribution of the Work otherwise complies with
128 |       the conditions stated in this License.
129 | 
130 |    5. Submission of Contributions. Unless You explicitly state otherwise,
131 |       any Contribution intentionally submitted for inclusion in the Work
132 |       by You to the Licensor shall be under the terms and conditions of
133 |       this License, without any additional terms or conditions.
134 |       Notwithstanding the above, nothing herein shall supersede or modify
135 |       the terms of any separate license agreement you may have executed
136 |       with Licensor regarding such Contributions.
137 | 
138 |    6. Trademarks. This License does not grant permission to use the trade
139 |       names, trademarks, service marks, or product names of the Licensor,
140 |       except as required for reasonable and customary use in describing the
141 |       origin of the Work and reproducing the content of the NOTICE file.
142 | 
143 |    7. Disclaimer of Warranty. Unless required by applicable law or
144 |       agreed to in writing, Licensor provides the Work (and each
145 |       Contributor provides its Contributions) on an "AS IS" BASIS,
146 |       WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 |       implied, including, without limitation, any warranties or conditions
148 |       of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 |       PARTICULAR PURPOSE. You are solely responsible for determining the
150 |       appropriateness of using or redistributing the Work and assume any
151 |       risks associated with Your exercise of permissions under this License.
152 | 
153 |    8. Limitation of Liability. In no event and under no legal theory,
154 |       whether in tort (including negligence), contract, or otherwise,
155 |       unless required by applicable law (such as deliberate and grossly
156 |       negligent acts) or agreed to in writing, shall any Contributor be
157 |       liable to You for damages, including any direct, indirect, special,
158 |       incidental, or consequential damages of any character arising as a
159 |       result of this License or out of the use or inability to use the
160 |       Work (including but not limited to damages for loss of goodwill,
161 |       work stoppage, computer failure or malfunction, or any and all
162 |       other commercial damages or losses), even if such Contributor
163 |       has been advised of the possibility of such damages.
164 | 
165 |    9. Accepting Warranty or Additional Liability. While redistributing
166 |       the Work or Derivative Works thereof, You may choose to offer,
167 |       and charge a fee for, acceptance of support, warranty, indemnity,
168 |       or other liability obligations and/or rights consistent with this
169 |       License. However, in accepting such obligations, You may act only
170 |       on Your own behalf and on Your sole responsibility, not on behalf
171 |       of any other Contributor, and only if You agree to indemnify,
172 |       defend, and hold each Contributor harmless for any liability
173 |       incurred by, or claims asserted against, such Contributor by reason
174 |       of your accepting any such warranty or additional liability.
175 | 
176 |    END OF TERMS AND CONDITIONS
177 | 
178 |    APPENDIX: How to apply the Apache License to your work.
179 | 
180 |       To apply the Apache License to your work, attach the following
181 |       boilerplate notice, with the fields enclosed by brackets "[]"
182 |       replaced with your own identifying information. (Don't include
183 |       the brackets!)  The text should be enclosed in the appropriate
184 |       comment syntax for the file format. We also recommend that a
185 |       file or class name and description of purpose be included on the
186 |       same "printed page" as the copyright notice for easier
187 |       identification within third-party archives.
188 | 
189 |    Copyright 2025 Guy Hartstein
190 | 
191 |    Licensed under the Apache License, Version 2.0 (the "License");
192 |    you may not use this file except in compliance with the License.
193 |    You may obtain a copy of the License at
194 | 
195 |        http://www.apache.org/licenses/LICENSE-2.0
196 | 
197 |    Unless required by applicable law or agreed to in writing, software
198 |    distributed under the License is distributed on an "AS IS" BASIS,
199 |    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200 |    See the License for the specific language governing permissions and
201 |    limitations under the License.
202 | 


--------------------------------------------------------------------------------
/README.es.md:
--------------------------------------------------------------------------------
  1 |  [![en](https://img.shields.io/badge/lang-en-red.svg)](https://github.com/pogjester/company-research-agent/blob/main/README.md)
  2 | [![zh](https://img.shields.io/badge/lang-zh-green.svg)](https://github.com/pogjester/company-research-agent/blob/main/README.zh.md)
  3 | [![fr](https://img.shields.io/badge/lang-fr-blue.svg)](https://github.com/pogjester/company-research-agent/blob/main/README.fr.md)
  4 | [![es](https://img.shields.io/badge/lang-es-yellow.svg)](https://github.com/pogjester/company-research-agent/blob/main/README.es.md)
  5 | [![jp](https://img.shields.io/badge/lang-jp-orange.svg)](https://github.com/pogjester/company-research-agent/blob/main/README.jp.md)
  6 | [![kr](https://img.shields.io/badge/lang-ko-purple.svg)](https://github.com/pogjester/company-research-agent/blob/main/README.kr.md)
  7 | 
  8 | # Investigador de Empresas 🔍
  9 | 
 10 | ![interfaz web](<static/ui-1.png>)
 11 | 
 12 | Una herramienta multi-agente que genera informes de investigación exhaustivos sobre empresas. La plataforma utiliza un sistema de agentes de IA para recopilar, seleccionar y sintetizar información sobre cualquier empresa.
 13 | 
 14 | ✨¡Pruébalo en línea! https://companyresearcher.tavily.com ✨
 15 | 
 16 | https://github.com/user-attachments/assets/0e373146-26a7-4391-b973-224ded3182a9
 17 | 
 18 | ## Características
 19 | 
 20 | - **Investigación Multi-Fuente**: Recopila datos de diversas fuentes, incluyendo sitios web de empresas, artículos de noticias, informes financieros y análisis sectoriales
 21 | - **Filtrado de Contenido Impulsado por IA**: Utiliza la puntuación de relevancia de Tavily para la selección de contenido
 22 | - **Transmisión de Progreso en Tiempo Real**: Utiliza conexiones WebSocket para transmitir el progreso de la investigación y los resultados
 23 | - **Arquitectura de Modelo Dual**: 
 24 |   - Gemini 2.0 Flash para síntesis de investigación de alto contexto
 25 |   - GPT-4.1 para formato preciso y edición de informes
 26 | - **Frontend Moderno en React**: Interfaz de usuario receptiva con actualizaciones en tiempo real, seguimiento de progreso y opciones de descarga
 27 | - **Arquitectura Modular**: Construido utilizando un sistema de nodos de investigación y procesamiento especializados
 28 | 
 29 | ## Marco de Agentes
 30 | 
 31 | ### Sistema de Investigación
 32 | 
 33 | La plataforma sigue un marco basado en agentes con nodos especializados que procesan datos secuencialmente:
 34 | 
 35 | 1. **Nodos de Investigación**:
 36 |    - `CompanyAnalyzer`: Investiga información básica del negocio
 37 |    - `IndustryAnalyzer`: Analiza posición de mercado y tendencias
 38 |    - `FinancialAnalyst`: Recopila métricas financieras y datos de rendimiento
 39 |    - `NewsScanner`: Recopila noticias y desarrollos recientes
 40 | 
 41 | 2. **Nodos de Procesamiento**:
 42 |    - `Collector`: Agrega datos de investigación de todos los analizadores
 43 |    - `Curator`: Implementa filtrado de contenido y puntuación de relevancia
 44 |    - `Briefing`: Genera resúmenes específicos por categoría utilizando Gemini 2.0 Flash
 45 |    - `Editor`: Compila y formatea los resúmenes en un informe final utilizando GPT-4.1-mini
 46 | 
 47 |    ![interfaz web](<static/agent-flow.png>)
 48 | 
 49 | ### Arquitectura de Generación de Contenido
 50 | 
 51 | La plataforma aprovecha modelos separados para un rendimiento óptimo:
 52 | 
 53 | 1. **Gemini 2.0 Flash** (`briefing.py`):
 54 |    - Maneja tareas de síntesis de investigación de alto contexto
 55 |    - Sobresale en el procesamiento y resumen de grandes volúmenes de datos
 56 |    - Utilizado para generar resúmenes iniciales por categoría
 57 |    - Eficiente en mantener el contexto a través de múltiples documentos
 58 | 
 59 | 2. **GPT-4.1 mini** (`editor.py`):
 60 |    - Se especializa en tareas precisas de formato y edición
 61 |    - Maneja la estructura y consistencia en markdown
 62 |    - Superior en seguir instrucciones exactas de formato
 63 |    - Utilizado para:
 64 |      - Compilación final del informe
 65 |      - Eliminación de duplicados de contenido
 66 |      - Formateo en markdown
 67 |      - Transmisión de informes en tiempo real
 68 | 
 69 | Este enfoque combina la fortaleza de Gemini en el manejo de ventanas de contexto grandes con la precisión de GPT-4.1-mini en seguir instrucciones específicas de formato.
 70 | 
 71 | ### Sistema de Selección de Contenido
 72 | 
 73 | La plataforma utiliza un sistema de filtrado de contenido en `curator.py`:
 74 | 
 75 | 1. **Puntuación de Relevancia**:
 76 |    - Los documentos son puntuados por la búsqueda potenciada por IA de Tavily
 77 |    - Se requiere un umbral mínimo (predeterminado 0.4) para proceder
 78 |    - Las puntuaciones reflejan la relevancia para la consulta de investigación específica
 79 |    - Puntuaciones más altas indican mejores coincidencias con la intención de la investigación
 80 | 
 81 | 2. **Procesamiento de Documentos**:
 82 |    - El contenido se normaliza y limpia
 83 |    - Las URLs se desduplicaron y estandarizaron
 84 |    - Los documentos se ordenan por puntuaciones de relevancia
 85 |    - Las actualizaciones de progreso en tiempo real se envían a través de WebSocket
 86 | 
 87 | ### Sistema de Comunicación en Tiempo Real
 88 | 
 89 | La plataforma implementa un sistema de comunicación en tiempo real basado en WebSocket:
 90 | 
 91 | ![interfaz web](<static/ui-2.png>)
 92 | 
 93 | 1. **Implementación Backend**:
 94 |    - Utiliza el soporte de WebSocket de FastAPI
 95 |    - Mantiene conexiones persistentes por trabajo de investigación
 96 |    - Envía actualizaciones de estado estructuradas para varios eventos:
 97 |      ```python
 98 |      await websocket_manager.send_status_update(
 99 |          job_id=job_id,
100 |          status="processing",
101 |          message=f"Generating {category} briefing",
102 |          result={
103 |              "step": "Briefing",
104 |              "category": category,
105 |              "total_docs": len(docs)
106 |          }
107 |      )
108 |      ```
109 | 
110 | 2. **Integración Frontend**:
111 |    - Los componentes de React se suscriben a actualizaciones WebSocket
112 |    - Las actualizaciones se procesan y muestran en tiempo real
113 |    - Diferentes componentes de UI manejan tipos específicos de actualizaciones:
114 |      - Progreso de generación de consultas
115 |      - Estadísticas de selección de documentos
116 |      - Estado de finalización de resúmenes
117 |      - Progreso de generación de informes
118 | 
119 | 3. **Tipos de Estado**:
120 |    - `query_generating`: Actualizaciones en tiempo real de creación de consultas
121 |    - `document_kept`: Progreso de selección de documentos
122 |    - `briefing_start/complete`: Estado de generación de resúmenes
123 |    - `report_chunk`: Transmisión de generación de informes
124 |    - `curation_complete`: Estadísticas finales de documentos
125 | 
126 | ## Instalación
127 | 
128 | ### Instalación Rápida (Recomendada)
129 | 
130 | La forma más sencilla de comenzar es utilizando el script de instalación:
131 | 
132 | 1. Clonar el repositorio:
133 | ```bash
134 | git clone https://github.com/pogjester/tavily-company-research.git
135 | cd tavily-company-research
136 | ```
137 | 
138 | 2. Hacer que el script de instalación sea ejecutable y ejecutarlo:
139 | ```bash
140 | chmod +x setup.sh
141 | ./setup.sh
142 | ```
143 | 
144 | El script de instalación hará lo siguiente:
145 | - Verificar las versiones requeridas de Python y Node.js
146 | - Opcionalmente crear un entorno virtual de Python (recomendado)
147 | - Instalar todas las dependencias (Python y Node.js)
148 | - Guiarte a través de la configuración de tus variables de entorno
149 | - Opcionalmente iniciar los servidores de backend y frontend
150 | 
151 | Necesitarás tener listas las siguientes claves API:
152 | - Clave API de Tavily
153 | - Clave API de Google Gemini
154 | - Clave API de OpenAI
155 | - URI de MongoDB (opcional)
156 | 
157 | ### Instalación Manual
158 | 
159 | Si prefieres realizar la instalación manualmente, sigue estos pasos:
160 | 
161 | 1. Clonar el repositorio:
162 | ```bash
163 | git clone https://github.com/pogjester/tavily-company-research.git
164 | cd tavily-company-research
165 | ```
166 | 
167 | 2. Instalar dependencias de backend:
168 | ```bash
169 | # Opcional: Crear y activar entorno virtual
170 | python -m venv .venv
171 | source .venv/bin/activate
172 | 
173 | # Instalar dependencias de Python
174 | pip install -r requirements.txt
175 | ```
176 | 
177 | 3. Instalar dependencias de frontend:
178 | ```bash
179 | cd ui
180 | npm install
181 | ```
182 | 
183 | 4. Crear un archivo `.env` con tus claves API:
184 | ```env
185 | TAVILY_API_KEY=tu_clave_tavily
186 | GEMINI_API_KEY=tu_clave_gemini
187 | OPENAI_API_KEY=tu_clave_openai
188 | 
189 | # Opcional: Habilitar persistencia en MongoDB
190 | # MONGODB_URI=tu_cadena_de_conexion_mongodb
191 | ```
192 | 
193 | ### Instalación con Docker
194 | 
195 | La aplicación puede ejecutarse utilizando Docker y Docker Compose:
196 | 
197 | 1. Clonar el repositorio:
198 | ```bash
199 | git clone https://github.com/pogjester/tavily-company-research.git
200 | cd tavily-company-research
201 | ```
202 | 
203 | 2. Crear un archivo `.env` con tus claves API:
204 | ```env
205 | TAVILY_API_KEY=tu_clave_tavily
206 | GEMINI_API_KEY=tu_clave_gemini
207 | OPENAI_API_KEY=tu_clave_openai
208 | 
209 | # Opcional: Habilitar persistencia en MongoDB
210 | # MONGODB_URI=tu_cadena_de_conexion_mongodb
211 | ```
212 | 
213 | 3. Construir e iniciar los contenedores:
214 | ```bash
215 | docker compose up --build
216 | ```
217 | 
218 | Esto iniciará los servicios de backend y frontend:
219 | - La API de backend estará disponible en `http://localhost:8000`
220 | - El frontend estará disponible en `http://localhost:5174`
221 | 
222 | Para detener los servicios:
223 | ```bash
224 | docker compose down
225 | ```
226 | 
227 | Nota: Al actualizar las variables de entorno en `.env`, necesitarás reiniciar los contenedores:
228 | ```bash
229 | docker compose down && docker compose up
230 | ```
231 | 
232 | ### Ejecutando la Aplicación
233 | 
234 | 1. Iniciar el servidor de backend (elige una opción):
235 | ```bash
236 | # Opción 1: Módulo Python Directo
237 | python -m application.py
238 | 
239 | # Opción 2: FastAPI con Uvicorn
240 | uvicorn application:app --reload --port 8000
241 | ```
242 | 
243 | 2. En una nueva terminal, iniciar el frontend:
244 | ```bash
245 | cd ui
246 | npm run dev
247 | ```
248 | 
249 | 3. Acceder a la aplicación en `http://localhost:5173`
250 | 
251 | ## Uso
252 | 
253 | ### Desarrollo Local
254 | 
255 | 1. Iniciar el servidor de backend (elige una opción):
256 | 
257 |    **Opción 1: Módulo Python Directo**
258 |    ```bash
259 |    python -m application.py
260 |    ```
261 | 
262 |    **Opción 2: FastAPI con Uvicorn**
263 |    ```bash
264 |    # Instalar uvicorn si aún no está instalado
265 |    pip install uvicorn
266 | 
267 |    # Ejecutar la aplicación FastAPI con recarga automática
268 |    uvicorn application:app --reload --port 8000
269 |    ```
270 | 
271 |    El backend estará disponible en:
272 |    - Punto de conexión API: `http://localhost:8000`
273 |    - Punto de conexión WebSocket: `ws://localhost:8000/research/ws/{job_id}`
274 | 
275 | 2. Iniciar el servidor de desarrollo del frontend:
276 |    ```bash
277 |    cd ui
278 |    npm run dev
279 |    ```
280 | 
281 | 3. Acceder a la aplicación en `http://localhost:5173`
282 | 
283 | ### Opciones de Despliegue
284 | 
285 | La aplicación puede desplegarse en varias plataformas en la nube. Aquí hay algunas opciones comunes:
286 | 
287 | #### AWS Elastic Beanstalk
288 | 
289 | 1. Instalar el EB CLI:
290 |    ```bash
291 |    pip install awsebcli
292 |    ```
293 | 
294 | 2. Inicializar la aplicación EB:
295 |    ```bash
296 |    eb init -p python-3.11 tavily-research
297 |    ```
298 | 
299 | 3. Crear y desplegar:
300 |    ```bash
301 |    eb create tavily-research-prod
302 |    ```
303 | 
304 | #### Otras Opciones de Despliegue
305 | 
306 | - **Docker**: La aplicación incluye un Dockerfile para despliegue en contenedores
307 | - **Heroku**: Despliegue directamente desde GitHub con el buildpack de Python
308 | - **Google Cloud Run**: Adecuado para despliegue en contenedores con escalado automático
309 | 
310 | Elige la plataforma que mejor se adapte a tus necesidades. La aplicación es independiente de la plataforma y puede alojarse en cualquier lugar que admita aplicaciones web Python.
311 | 
312 | ## Contribuir
313 | 
314 | 1. Haz un fork del repositorio
315 | 2. Crea una rama de características (`git checkout -b feature/caracteristica-increible`)
316 | 3. Haz commit de tus cambios (`git commit -m 'Añadir característica increíble'`)
317 | 4. Haz push a la rama (`git push origin feature/caracteristica-increible`)
318 | 5. Abre un Pull Request
319 | 
320 | ## Licencia
321 | 
322 | Este proyecto está licenciado bajo la Licencia MIT - consulta el archivo [LICENSE](LICENSE) para más detalles.
323 | 
324 | ## Agradecimientos
325 | 
326 | - [Tavily](https://tavily.com/) por la API de investigación
327 | - Todas las demás bibliotecas de código abierto y sus contribuyentes
328 | 


--------------------------------------------------------------------------------
/README.fr.md:
--------------------------------------------------------------------------------
  1 |  [![en](https://img.shields.io/badge/lang-en-red.svg)](https://github.com/pogjester/company-research-agent/blob/main/README.md)
  2 | [![zh](https://img.shields.io/badge/lang-zh-green.svg)](https://github.com/pogjester/company-research-agent/blob/main/README.zh.md)
  3 | [![fr](https://img.shields.io/badge/lang-fr-blue.svg)](https://github.com/pogjester/company-research-agent/blob/main/README.fr.md)
  4 | [![es](https://img.shields.io/badge/lang-es-yellow.svg)](https://github.com/pogjester/company-research-agent/blob/main/README.es.md)
  5 | [![jp](https://img.shields.io/badge/lang-jp-orange.svg)](https://github.com/pogjester/company-research-agent/blob/main/README.jp.md)
  6 | [![kr](https://img.shields.io/badge/lang-ko-purple.svg)](https://github.com/pogjester/company-research-agent/blob/main/README.kr.md)
  7 | 
  8 | 
  9 | # Agent de Recherche d'Entreprise 🔍
 10 | 
 11 | ![web ui](<static/ui-1.png>)
 12 | 
 13 | Un outil multi-agents qui génère des rapports de recherche d'entreprise complets. La plateforme utilise un pipeline d'agents IA pour collecter, organiser et synthétiser des informations sur n'importe quelle entreprise.
 14 | 
 15 | ✨Essayez-le en ligne ! https://companyresearcher.tavily.com ✨
 16 | 
 17 | https://github.com/user-attachments/assets/0e373146-26a7-4391-b973-224ded3182a9
 18 | 
 19 | ## Fonctionnalités
 20 | 
 21 | - **Recherche Multi-Sources** : Récupère des données de diverses sources, y compris les sites web d'entreprise, articles de presse, rapports financiers et analyses sectorielles
 22 | - **Filtrage de contenu par IA** : Utilise le score de pertinence de Tavily pour la curation du contenu
 23 | - **Streaming en temps réel** : Utilise les WebSockets pour diffuser l'avancement et les résultats de la recherche en temps réel
 24 | - **Architecture à double modèle** :
 25 |   - Gemini 2.0 Flash pour la synthèse de recherche à large contexte
 26 |   - GPT-4.1 pour la mise en forme et l'édition précises du rapport
 27 | - **Frontend React moderne** : Interface réactive avec mises à jour en temps réel, suivi de progression et options de téléchargement
 28 | - **Architecture modulaire** : Construite autour d'un pipeline de nœuds spécialisés de recherche et de traitement
 29 | 
 30 | ## Cadre Agentique
 31 | 
 32 | ### Pipeline de Recherche
 33 | 
 34 | La plateforme suit un cadre agentique avec des nœuds spécialisés qui traitent les données de manière séquentielle :
 35 | 
 36 | 1. **Nœuds de Recherche** :
 37 |    - `CompanyAnalyzer` : Recherche les informations principales sur l'entreprise
 38 |    - `IndustryAnalyzer` : Analyse la position sur le marché et les tendances
 39 |    - `FinancialAnalyst` : Récupère les indicateurs financiers et les données de performance
 40 |    - `NewsScanner` : Collecte les actualités et développements récents
 41 | 
 42 | 2. **Nœuds de Traitement** :
 43 |    - `Collector` : Agrège les données de recherche de tous les analyseurs
 44 |    - `Curator` : Met en œuvre le filtrage de contenu et le scoring de pertinence
 45 |    - `Briefing` : Génère des synthèses par catégorie à l'aide de Gemini 2.0 Flash
 46 |    - `Editor` : Compile et met en forme les synthèses dans un rapport final avec GPT-4.1-mini
 47 | 
 48 |    ![web ui](<static/agent-flow.png>)
 49 | 
 50 | ### Architecture de Génération de Contenu
 51 | 
 52 | La plateforme exploite des modèles distincts pour des performances optimales :
 53 | 
 54 | 1. **Gemini 2.0 Flash** (`briefing.py`) :
 55 |    - Gère la synthèse de recherche à large contexte
 56 |    - Excelle dans le traitement et le résumé de grands volumes de données
 57 |    - Utilisé pour générer les synthèses initiales par catégorie
 58 |    - Efficace pour maintenir le contexte sur plusieurs documents
 59 | 
 60 | 2. **GPT-4.1 mini** (`editor.py`) :
 61 |    - Spécialisé dans la mise en forme et l'édition précises
 62 |    - Gère la structure markdown et la cohérence
 63 |    - Supérieur pour suivre des instructions de formatage exactes
 64 |    - Utilisé pour :
 65 |      - Compilation du rapport final
 66 |      - Déduplication du contenu
 67 |      - Mise en forme markdown
 68 |      - Streaming du rapport en temps réel
 69 | 
 70 | Cette approche combine la capacité de Gemini à gérer de larges fenêtres de contexte avec la précision de GPT-4.1-mini pour le respect des consignes de formatage.
 71 | 
 72 | ### Système de Curation de Contenu
 73 | 
 74 | La plateforme utilise un système de filtrage de contenu dans `curator.py` :
 75 | 
 76 | 1. **Scoring de Pertinence** :
 77 |    - Les documents sont scorés par la recherche IA de Tavily
 78 |    - Un seuil minimum (par défaut 0,4) est requis pour continuer
 79 |    - Les scores reflètent la pertinence par rapport à la requête de recherche
 80 |    - Un score élevé indique une meilleure correspondance avec l'intention de recherche
 81 | 
 82 | 2. **Traitement des Documents** :
 83 |    - Le contenu est normalisé et nettoyé
 84 |    - Les URLs sont dédupliquées et standardisées
 85 |    - Les documents sont triés par score de pertinence
 86 |    - Les mises à jour de progression sont envoyées en temps réel via WebSocket
 87 | 
 88 | ### Système de Communication en Temps Réel
 89 | 
 90 | La plateforme implémente un système de communication en temps réel basé sur WebSocket :
 91 | 
 92 | ![web ui](<static/ui-2.png>)
 93 | 
 94 | 1. **Implémentation Backend** :
 95 |    - Utilise le support WebSocket de FastAPI
 96 |    - Maintient des connexions persistantes par tâche de recherche
 97 |    - Envoie des mises à jour structurées pour divers événements :
 98 |      ```python
 99 |      await websocket_manager.send_status_update(
100 |          job_id=job_id,
101 |          status="processing",
102 |          message=f"Génération du briefing {category}",
103 |          result={
104 |              "step": "Briefing",
105 |              "category": category,
106 |              "total_docs": len(docs)
107 |          }
108 |      )
109 |      ```
110 | 
111 | 2. **Intégration Frontend** :
112 |    - Les composants React s'abonnent aux mises à jour WebSocket
113 |    - Les mises à jour sont traitées et affichées en temps réel
114 |    - Différents composants UI gèrent des types d'updates spécifiques :
115 |      - Progression de la génération de requête
116 |      - Statistiques de curation de documents
117 |      - Statut de complétion des briefings
118 |      - Progression de la génération du rapport
119 | 
120 | 3. **Types de Statut** :
121 |    - `query_generating` : Mises à jour de création de requête en temps réel
122 |    - `document_kept` : Progression de la curation de documents
123 |    - `briefing_start/complete` : Statut de génération des briefings
124 |    - `report_chunk` : Streaming de la génération du rapport
125 |    - `curation_complete` : Statistiques finales des documents
126 | 
127 | ## Configuration
128 | 
129 | ### Configuration Rapide (Recommandée)
130 | 
131 | La façon la plus simple de commencer est d'utiliser le script de configuration :
132 | 
133 | 1. Clonez le dépôt :
134 | ```bash
135 | git clone https://github.com/pogjester/tavily-company-research.git
136 | cd tavily-company-research
137 | ```
138 | 
139 | 2. Rendez le script de configuration exécutable et lancez-le :
140 | ```bash
141 | chmod +x setup.sh
142 | ./setup.sh
143 | ```
144 | 
145 | Le script de configuration va :
146 | - Vérifier les versions requises de Python et Node.js
147 | - Créer éventuellement un environnement virtuel Python (recommandé)
148 | - Installer toutes les dépendances (Python et Node.js)
149 | - Vous guider dans la configuration de vos variables d'environnement
150 | - Démarrer éventuellement les serveurs backend et frontend
151 | 
152 | Vous aurez besoin des clés API suivantes :
153 | - Clé API Tavily
154 | - Clé API Google Gemini
155 | - Clé API OpenAI
156 | - URI MongoDB (optionnel)
157 | 
158 | ### Configuration Manuelle
159 | 
160 | Si vous préférez configurer manuellement, suivez ces étapes :
161 | 
162 | 1. Clonez le dépôt :
163 | ```bash
164 | git clone https://github.com/pogjester/tavily-company-research.git
165 | cd tavily-company-research
166 | ```
167 | 
168 | 2. Installez les dépendances backend :
169 | ```bash
170 | # Optionnel : Créez et activez un environnement virtuel
171 | python -m venv .venv
172 | source .venv/bin/activate
173 | 
174 | # Installez les dépendances Python
175 | pip install -r requirements.txt
176 | ```
177 | 
178 | 3. Installez les dépendances frontend :
179 | ```bash
180 | cd ui
181 | npm install
182 | ```
183 | 
184 | 4. Créez un fichier `.env` avec vos clés API :
185 | ```env
186 | TAVILY_API_KEY=votre_clé_tavily
187 | GEMINI_API_KEY=votre_clé_gemini
188 | OPENAI_API_KEY=votre_clé_openai
189 | 
190 | # Optionnel : Activez la persistance MongoDB
191 | # MONGODB_URI=votre_chaîne_de_connexion_mongodb
192 | ```
193 | 
194 | ### Configuration Docker
195 | 
196 | L'application peut être exécutée à l'aide de Docker et Docker Compose :
197 | 
198 | 1. Clonez le dépôt :
199 | ```bash
200 | git clone https://github.com/pogjester/tavily-company-research.git
201 | cd tavily-company-research
202 | ```
203 | 
204 | 2. Créez un fichier `.env` avec vos clés API :
205 | ```env
206 | TAVILY_API_KEY=votre_clé_tavily
207 | GEMINI_API_KEY=votre_clé_gemini
208 | OPENAI_API_KEY=votre_clé_openai
209 | 
210 | # Optionnel : Activez la persistance MongoDB
211 | # MONGODB_URI=votre_chaîne_de_connexion_mongodb
212 | ```
213 | 
214 | 3. Construisez et démarrez les conteneurs :
215 | ```bash
216 | docker compose up --build
217 | ```
218 | 
219 | Cela démarrera les services backend et frontend :
220 | - L'API backend sera disponible sur `http://localhost:8000`
221 | - Le frontend sera disponible sur `http://localhost:5174`
222 | 
223 | Pour arrêter les services :
224 | ```bash
225 | docker compose down
226 | ```
227 | 
228 | Remarque : Lors de la mise à jour des variables d'environnement dans `.env`, vous devrez redémarrer les conteneurs :
229 | ```bash
230 | docker compose down && docker compose up
231 | ```
232 | 
233 | ### Exécution de l'Application
234 | 
235 | 1. Démarrez le serveur backend (choisissez une option) :
236 | ```bash
237 | # Option 1 : Module Python Direct
238 | python -m application.py
239 | 
240 | # Option 2 : FastAPI avec Uvicorn
241 | uvicorn application:app --reload --port 8000
242 | ```
243 | 
244 | 2. Dans un nouveau terminal, démarrez le frontend :
245 | ```bash
246 | cd ui
247 | npm run dev
248 | ```
249 | 
250 | 3. Accédez à l'application sur `http://localhost:5173`
251 | 
252 | ## Utilisation
253 | 
254 | ### Développement Local
255 | 
256 | 1. Démarrez le serveur backend (choisissez une option) :
257 | 
258 |    **Option 1 : Module Python Direct**
259 |    ```bash
260 |    python -m application.py
261 |    ```
262 | 
263 |    **Option 2 : FastAPI avec Uvicorn**
264 |    ```bash
265 |    # Installez uvicorn si ce n'est pas déjà fait
266 |    pip install uvicorn
267 | 
268 |    # Exécutez l'application FastAPI avec rechargement à chaud
269 |    uvicorn application:app --reload --port 8000
270 |    ```
271 | 
272 |    Le backend sera disponible sur :
273 |    - Point d'accès API : `http://localhost:8000`
274 |    - Point d'accès WebSocket : `ws://localhost:8000/research/ws/{job_id}`
275 | 
276 | 2. Démarrez le serveur de développement frontend :
277 |    ```bash
278 |    cd ui
279 |    npm run dev
280 |    ```
281 | 
282 | 3. Accédez à l'application sur `http://localhost:5173`
283 | 
284 | ### Options de Déploiement
285 | 
286 | L'application peut être déployée sur diverses plateformes cloud. Voici quelques options courantes :
287 | 
288 | #### AWS Elastic Beanstalk
289 | 
290 | 1. Installez l'EB CLI :
291 |    ```bash
292 |    pip install awsebcli
293 |    ```
294 | 
295 | 2. Initialisez l'application EB :
296 |    ```bash
297 |    eb init -p python-3.11 tavily-research
298 |    ```
299 | 
300 | 3. Créez et déployez :
301 |    ```bash
302 |    eb create tavily-research-prod
303 |    ```
304 | 
305 | #### Autres Options de Déploiement
306 | 
307 | - **Docker** : L'application inclut un Dockerfile pour le déploiement conteneurisé
308 | - **Heroku** : Déployez directement depuis GitHub avec le buildpack Python
309 | - **Google Cloud Run** : Adapté au déploiement conteneurisé avec mise à l'échelle automatique
310 | 
311 | Choisissez la plateforme qui convient le mieux à vos besoins. L'application est indépendante de la plateforme et peut être hébergée partout où les applications web Python sont prises en charge.
312 | 
313 | ## Contribution
314 | 
315 | 1. Forkez le dépôt
316 | 2. Créez une branche de fonctionnalité (`git checkout -b fonctionnalite/superbe-fonction`)
317 | 3. Validez vos modifications (`git commit -m 'Ajout d'une superbe fonction'`)
318 | 4. Poussez vers la branche (`git push origin fonctionnalite/superbe-fonction`)
319 | 5. Ouvrez une Pull Request
320 | 
321 | ## Licence
322 | 
323 | Ce projet est sous licence MIT - voir le fichier [LICENSE](LICENSE) pour plus de détails.
324 | 
325 | ## Remerciements
326 | 
327 | - [Tavily](https://tavily.com/) pour l'API de recherche
328 | - Toutes les autres bibliothèques open-source et leurs contributeurs
329 | 


--------------------------------------------------------------------------------
/README.jp.md:
--------------------------------------------------------------------------------
  1 |  [![en](https://img.shields.io/badge/lang-en-red.svg)](https://github.com/pogjester/company-research-agent/blob/main/README.md)
  2 | [![zh](https://img.shields.io/badge/lang-zh-green.svg)](https://github.com/pogjester/company-research-agent/blob/main/README.zh.md)
  3 | [![fr](https://img.shields.io/badge/lang-fr-blue.svg)](https://github.com/pogjester/company-research-agent/blob/main/README.fr.md)
  4 | [![es](https://img.shields.io/badge/lang-es-yellow.svg)](https://github.com/pogjester/company-research-agent/blob/main/README.es.md)
  5 | [![jp](https://img.shields.io/badge/lang-jp-orange.svg)](https://github.com/pogjester/company-research-agent/blob/main/README.jp.md)
  6 | [![kr](https://img.shields.io/badge/lang-ko-purple.svg)](https://github.com/pogjester/company-research-agent/blob/main/README.kr.md)
  7 | 
  8 | 
  9 | # 企業調査エージェント 🔍
 10 | 
 11 | ![web ui](<static/ui-1.png>)
 12 | 
 13 | 包括的な企業調査レポートを生成するマルチエージェントツール。このプラットフォームは、AIエージェントのパイプラインを使用して、あらゆる企業に関する情報を収集、整理、統合します。
 14 | 
 15 | ✨オンラインで試してみてください！ https://companyresearcher.tavily.com ✨
 16 | 
 17 | https://github.com/user-attachments/assets/0e373146-26a7-4391-b973-224ded3182a9
 18 | 
 19 | ## 機能
 20 | 
 21 | - **マルチソース調査**：企業ウェブサイト、ニュース記事、財務報告書、業界分析など、様々なソースからデータを取得
 22 | - **AIによるコンテンツフィルタリング**：Tavilyの関連性スコアを使用したコンテンツキュレーション
 23 | - **リアルタイムストリーミング**：WebSocketを使用して調査の進捗と結果をリアルタイムで配信
 24 | - **デュアルモデルアーキテクチャ**：
 25 |   - 大規模コンテキスト調査統合のためのGemini 2.0 Flash
 26 |   - 精密なレポート書式設定と編集のためのGPT-4.1
 27 | - **モダンなReactフロントエンド**：リアルタイム更新、進捗追跡、ダウンロードオプションを備えたレスポンシブインターフェース
 28 | - **モジュラーアーキテクチャ**：専門的な調査・処理ノードのパイプラインを中心に構築
 29 | 
 30 | ## エージェントフレームワーク
 31 | 
 32 | ### 調査パイプライン
 33 | 
 34 | このプラットフォームは、データを順次処理する専門ノードを持つエージェントフレームワークに従います：
 35 | 
 36 | 1. **調査ノード**：
 37 |    - `CompanyAnalyzer`：主要な企業情報を調査
 38 |    - `IndustryAnalyzer`：市場ポジションとトレンドを分析
 39 |    - `FinancialAnalyst`：財務指標とパフォーマンスデータを取得
 40 |    - `NewsScanner`：最新のニュースと動向を収集
 41 | 
 42 | 2. **処理ノード**：
 43 |    - `Collector`：すべてのアナライザーから調査データを集約
 44 |    - `Curator`：コンテンツフィルタリングと関連性スコアリングを実装
 45 |    - `Briefing`：Gemini 2.0 Flashを使用してカテゴリ別の要約を生成
 46 |    - `Editor`：GPT-4.1-miniで要約を最終レポートにコンパイル・書式設定
 47 | 
 48 |    ![web ui](<static/agent-flow.png>)
 49 | 
 50 | ### コンテンツ生成アーキテクチャ
 51 | 
 52 | このプラットフォームは最適なパフォーマンスのために異なるモデルを活用します：
 53 | 
 54 | 1. **Gemini 2.0 Flash** (`briefing.py`)：
 55 |    - 大規模コンテキスト調査統合を処理
 56 |    - 大量データの処理と要約に優れる
 57 |    - カテゴリ別の初期要約生成に使用
 58 |    - 複数文書にわたるコンテキスト維持に効率的
 59 | 
 60 | 2. **GPT-4.1 mini** (`editor.py`)：
 61 |    - 精密な書式設定と編集に特化
 62 |    - Markdown構造と一貫性を処理
 63 |    - 正確な書式設定指示の遵守に優れる
 64 |    - 以下に使用：
 65 |      - 最終レポートのコンパイル
 66 |      - コンテンツの重複除去
 67 |      - Markdown書式設定
 68 |      - リアルタイムレポートストリーミング
 69 | 
 70 | このアプローチは、Geminiの大規模コンテキストウィンドウ処理能力とGPT-4.1-miniの書式設定指示精度を組み合わせます。
 71 | 
 72 | ### コンテンツキュレーションシステム
 73 | 
 74 | このプラットフォームは`curator.py`でコンテンツフィルタリングシステムを使用します：
 75 | 
 76 | 1. **関連性スコアリング**：
 77 |    - 文書はTavilyのAI検索によってスコア付けされます
 78 |    - 継続するには最小閾値（デフォルト0.4）が必要
 79 |    - スコアは検索クエリとの関連性を反映
 80 |    - 高スコアは検索意図とのより良い一致を示す
 81 | 
 82 | 2. **文書処理**：
 83 |    - コンテンツは正規化・クリーニングされます
 84 |    - URLは重複除去・標準化されます
 85 |    - 文書は関連性スコアでソートされます
 86 |    - 進捗更新はWebSocket経由でリアルタイム送信されます
 87 | 
 88 | ### リアルタイム通信システム
 89 | 
 90 | このプラットフォームはWebSocketベースのリアルタイム通信システムを実装します：
 91 | 
 92 | ![web ui](<static/ui-2.png>)
 93 | 
 94 | 1. **バックエンド実装**：
 95 |    - FastAPIのWebSocketサポートを使用
 96 |    - 調査タスクごとに永続的な接続を維持
 97 |    - 様々なイベントに対して構造化された更新を送信：
 98 |      ```python
 99 |      await websocket_manager.send_status_update(
100 |          job_id=job_id,
101 |          status="processing",
102 |          message=f"{category}ブリーフィング生成中",
103 |          result={
104 |              "step": "Briefing",
105 |              "category": category,
106 |              "total_docs": len(docs)
107 |          }
108 |      )
109 |      ```
110 | 
111 | 2. **フロントエンド統合**：
112 |    - ReactコンポーネントがWebSocket更新を購読
113 |    - 更新はリアルタイムで処理・表示されます
114 |    - 異なるUIコンポーネントが特定の更新タイプを処理：
115 |      - クエリ生成進捗
116 |      - 文書キュレーション統計
117 |      - ブリーフィング完了ステータス
118 |      - レポート生成進捗
119 | 
120 | 3. **ステータスタイプ**：
121 |    - `query_generating`：リアルタイムクエリ作成更新
122 |    - `document_kept`：文書キュレーション進捗
123 |    - `briefing_start/complete`：ブリーフィング生成ステータス
124 |    - `report_chunk`：レポート生成ストリーミング
125 |    - `curation_complete`：最終文書統計
126 | 
127 | ## セットアップ
128 | 
129 | ### クイックセットアップ（推奨）
130 | 
131 | 最も簡単な開始方法はセットアップスクリプトを使用することです：
132 | 
133 | 1. リポジトリをクローン：
134 | ```bash
135 | git clone https://github.com/pogjester/tavily-company-research.git
136 | cd tavily-company-research
137 | ```
138 | 
139 | 2. セットアップスクリプトを実行可能にして実行：
140 | ```bash
141 | chmod +x setup.sh
142 | ./setup.sh
143 | ```
144 | 
145 | セットアップスクリプトは以下を行います：
146 | - 必要なPythonとNode.jsのバージョンを確認
147 | - Python仮想環境を作成（推奨）
148 | - すべての依存関係をインストール（PythonとNode.js）
149 | - 環境変数の設定をガイド
150 | - バックエンドとフロントエンドサーバーを起動（オプション）
151 | 
152 | 以下のAPIキーが必要です：
153 | - Tavily APIキー
154 | - Google Gemini APIキー
155 | - OpenAI APIキー
156 | - MongoDB URI（オプション）
157 | 
158 | ### 手動セットアップ
159 | 
160 | 手動でセットアップしたい場合は、以下の手順に従ってください：
161 | 
162 | 1. リポジトリをクローン：
163 | ```bash
164 | git clone https://github.com/pogjester/tavily-company-research.git
165 | cd tavily-company-research
166 | ```
167 | 
168 | 2. バックエンド依存関係をインストール：
169 | ```bash
170 | # オプション：仮想環境を作成・アクティベート
171 | python -m venv .venv
172 | source .venv/bin/activate
173 | 
174 | # Python依存関係をインストール
175 | pip install -r requirements.txt
176 | ```
177 | 
178 | 3. フロントエンド依存関係をインストール：
179 | ```bash
180 | cd ui
181 | npm install
182 | ```
183 | 
184 | 4. APIキーを含む`.env`ファイルを作成：
185 | ```env
186 | TAVILY_API_KEY=your_tavily_key
187 | GEMINI_API_KEY=your_gemini_key
188 | OPENAI_API_KEY=your_openai_key
189 | 
190 | # オプション：MongoDB永続化を有効化
191 | # MONGODB_URI=your_mongodb_connection_string
192 | ```
193 | 
194 | ### Dockerセットアップ
195 | 
196 | アプリケーションはDockerとDocker Composeを使用して実行できます：
197 | 
198 | 1. リポジトリをクローン：
199 | ```bash
200 | git clone https://github.com/pogjester/tavily-company-research.git
201 | cd tavily-company-research
202 | ```
203 | 
204 | 2. APIキーを含む`.env`ファイルを作成：
205 | ```env
206 | TAVILY_API_KEY=your_tavily_key
207 | GEMINI_API_KEY=your_gemini_key
208 | OPENAI_API_KEY=your_openai_key
209 | 
210 | # オプション：MongoDB永続化を有効化
211 | # MONGODB_URI=your_mongodb_connection_string
212 | ```
213 | 
214 | 3. コンテナをビルド・起動：
215 | ```bash
216 | docker compose up --build
217 | ```
218 | 
219 | これによりバックエンドとフロントエンドサービスが起動します：
220 | - バックエンドAPIは`http://localhost:8000`で利用可能
221 | - フロントエンドは`http://localhost:5174`で利用可能
222 | 
223 | サービスを停止するには：
224 | ```bash
225 | docker compose down
226 | ```
227 | 
228 | 注意：`.env`の環境変数を更新する際は、コンテナを再起動する必要があります：
229 | ```bash
230 | docker compose down && docker compose up
231 | ```
232 | 
233 | ### アプリケーションの実行
234 | 
235 | 1. バックエンドサーバーを起動（オプションを選択）：
236 | ```bash
237 | # オプション1：直接Pythonモジュール
238 | python -m application.py
239 | 
240 | # オプション2：UvicornでFastAPI
241 | uvicorn application:app --reload --port 8000
242 | ```
243 | 
244 | 2. 新しいターミナルでフロントエンドを起動：
245 | ```bash
246 | cd ui
247 | npm run dev
248 | ```
249 | 
250 | 3. `http://localhost:5173`でアプリケーションにアクセス
251 | 
252 | ## 使用方法
253 | 
254 | ### ローカル開発
255 | 
256 | 1. バックエンドサーバーを起動（オプションを選択）：
257 | 
258 |    **オプション1：直接Pythonモジュール**
259 |    ```bash
260 |    python -m application.py
261 |    ```
262 | 
263 |    **オプション2：UvicornでFastAPI**
264 |    ```bash
265 |    # uvicornがインストールされていない場合はインストール
266 |    pip install uvicorn
267 | 
268 |    # ホットリロード付きでFastAPIアプリケーションを実行
269 |    uvicorn application:app --reload --port 8000
270 |    ```
271 | 
272 |    バックエンドは以下で利用可能：
273 |    - APIエンドポイント：`http://localhost:8000`
274 |    - WebSocketエンドポイント：`ws://localhost:8000/research/ws/{job_id}`
275 | 
276 | 2. フロントエンド開発サーバーを起動：
277 |    ```bash
278 |    cd ui
279 |    npm run dev
280 |    ```
281 | 
282 | 3. `http://localhost:5173`でアプリケーションにアクセス
283 | 
284 | ### デプロイメントオプション
285 | 
286 | アプリケーションは様々なクラウドプラットフォームにデプロイできます。一般的なオプションをいくつか紹介します：
287 | 
288 | #### AWS Elastic Beanstalk
289 | 
290 | 1. EB CLIをインストール：
291 |    ```bash
292 |    pip install awsebcli
293 |    ```
294 | 
295 | 2. EBアプリケーションを初期化：
296 |    ```bash
297 |    eb init -p python-3.11 tavily-research
298 |    ```
299 | 
300 | 3. 作成・デプロイ：
301 |    ```bash
302 |    eb create tavily-research-prod
303 |    ```
304 | 
305 | #### その他のデプロイメントオプション
306 | 
307 | - **Docker**：アプリケーションにはコンテナ化デプロイメント用のDockerfileが含まれています
308 | - **Heroku**：PythonビルドパックでGitHubから直接デプロイ
309 | - **Google Cloud Run**：自動スケーリング付きコンテナ化デプロイメントに適しています
310 | 
311 | ニーズに最も適したプラットフォームを選択してください。アプリケーションはプラットフォーム非依存で、Pythonウェブアプリケーションがサポートされているどこでもホストできます。
312 | 
313 | ## 貢献
314 | 
315 | 1. リポジトリをフォーク
316 | 2. 機能ブランチを作成（`git checkout -b feature/amazing-feature`）
317 | 3. 変更をコミット（`git commit -m 'Add some amazing feature'`）
318 | 4. ブランチにプッシュ（`git push origin feature/amazing-feature`）
319 | 5. プルリクエストを開く
320 | 
321 | ## ライセンス
322 | 
323 | このプロジェクトはMITライセンスの下でライセンスされています - 詳細は[LICENSE](LICENSE)ファイルを参照してください。
324 | 
325 | ## 謝辞
326 | 
327 | - 検索APIを提供する[Tavily](https://tavily.com/)
328 | - その他すべてのオープンソースライブラリとその貢献者


--------------------------------------------------------------------------------
/README.kr.md:
--------------------------------------------------------------------------------
  1 |  [![en](https://img.shields.io/badge/lang-en-red.svg)](https://github.com/pogjester/company-research-agent/blob/main/README.md)
  2 | [![zh](https://img.shields.io/badge/lang-zh-green.svg)](https://github.com/pogjester/company-research-agent/blob/main/README.zh.md)
  3 | [![fr](https://img.shields.io/badge/lang-fr-blue.svg)](https://github.com/pogjester/company-research-agent/blob/main/README.fr.md)
  4 | [![es](https://img.shields.io/badge/lang-es-yellow.svg)](https://github.com/pogjester/company-research-agent/blob/main/README.es.md)
  5 | [![jp](https://img.shields.io/badge/lang-jp-orange.svg)](https://github.com/pogjester/company-research-agent/blob/main/README.jp.md)
  6 | [![kr](https://img.shields.io/badge/lang-ko-purple.svg)](https://github.com/pogjester/company-research-agent/blob/main/README.kr.md)
  7 | 
  8 | 
  9 | # 기업 조사 에이전트 🔍
 10 | 
 11 | ![web ui](<static/ui-1.png>)
 12 | 
 13 | 포괄적인 기업 조사 보고서를 생성하는 멀티 에이전트 도구입니다. 이 플랫폼은 AI 에이전트 파이프라인을 사용하여 모든 기업에 대한 정보를 수집, 정리 및 종합합니다.
 14 | 
 15 | ✨온라인에서 체험해보세요! https://companyresearcher.tavily.com ✨
 16 | 
 17 | https://github.com/user-attachments/assets/0e373146-26a7-4391-b973-224ded3182a9
 18 | 
 19 | ## 기능
 20 | 
 21 | - **멀티소스 조사**: 기업 웹사이트, 뉴스 기사, 재무 보고서, 업계 분석 등 다양한 소스에서 데이터 수집
 22 | - **AI 콘텐츠 필터링**: Tavily의 관련성 점수를 사용한 콘텐츠 큐레이션
 23 | - **실시간 스트리밍**: WebSocket을 사용하여 조사 진행 상황과 결과를 실시간으로 스트리밍
 24 | - **듀얼 모델 아키텍처**:
 25 |   - 대규모 컨텍스트 조사 종합을 위한 Gemini 2.0 Flash
 26 |   - 정밀한 보고서 형식 지정 및 편집을 위한 GPT-4.1
 27 | - **모던 React 프론트엔드**: 실시간 업데이트, 진행 상황 추적, 다운로드 옵션을 갖춘 반응형 인터페이스
 28 | - **모듈러 아키텍처**: 전문화된 조사 및 처리 노드 파이프라인을 중심으로 구축
 29 | 
 30 | ## 에이전트 프레임워크
 31 | 
 32 | ### 조사 파이프라인
 33 | 
 34 | 이 플랫폼은 데이터를 순차적으로 처리하는 전문화된 노드를 가진 에이전트 프레임워크를 따릅니다:
 35 | 
 36 | 1. **조사 노드**:
 37 |    - `CompanyAnalyzer`: 핵심 기업 정보 조사
 38 |    - `IndustryAnalyzer`: 시장 위치 및 트렌드 분석
 39 |    - `FinancialAnalyst`: 재무 지표 및 성과 데이터 수집
 40 |    - `NewsScanner`: 최신 뉴스 및 개발 사항 수집
 41 | 
 42 | 2. **처리 노드**:
 43 |    - `Collector`: 모든 분석기에서 조사 데이터 집계
 44 |    - `Curator`: 콘텐츠 필터링 및 관련성 점수 매기기 구현
 45 |    - `Briefing`: Gemini 2.0 Flash를 사용하여 카테고리별 요약 생성
 46 |    - `Editor`: GPT-4.1-mini로 요약을 최종 보고서로 컴파일 및 형식 지정
 47 | 
 48 |    ![web ui](<static/agent-flow.png>)
 49 | 
 50 | ### 콘텐츠 생성 아키텍처
 51 | 
 52 | 이 플랫폼은 최적의 성능을 위해 서로 다른 모델을 활용합니다:
 53 | 
 54 | 1. **Gemini 2.0 Flash** (`briefing.py`):
 55 |    - 대규모 컨텍스트 조사 종합 처리
 56 |    - 대량의 데이터 처리 및 요약에 뛰어남
 57 |    - 카테고리별 초기 요약 생성에 사용
 58 |    - 여러 문서에 걸친 컨텍스트 유지에 효율적
 59 | 
 60 | 2. **GPT-4.1 mini** (`editor.py`):
 61 |    - 정밀한 형식 지정 및 편집에 특화
 62 |    - Markdown 구조 및 일관성 처리
 63 |    - 정확한 형식 지정 지침 준수에 우수
 64 |    - 다음 용도로 사용:
 65 |      - 최종 보고서 컴파일
 66 |      - 콘텐츠 중복 제거
 67 |      - Markdown 형식 지정
 68 |      - 실시간 보고서 스트리밍
 69 | 
 70 | 이 접근 방식은 Gemini의 대규모 컨텍스트 윈도우 처리 능력과 GPT-4.1-mini의 형식 지정 지침 정밀도를 결합합니다.
 71 | 
 72 | ### 콘텐츠 큐레이션 시스템
 73 | 
 74 | 이 플랫폼은 `curator.py`에서 콘텐츠 필터링 시스템을 사용합니다:
 75 | 
 76 | 1. **관련성 점수 매기기**:
 77 |    - 문서는 Tavily의 AI 검색으로 점수가 매겨집니다
 78 |    - 계속 진행하려면 최소 임계값(기본값 0.4)이 필요합니다
 79 |    - 점수는 검색 쿼리와의 관련성을 반영합니다
 80 |    - 높은 점수는 검색 의도와의 더 나은 일치를 나타냅니다
 81 | 
 82 | 2. **문서 처리**:
 83 |    - 콘텐츠가 정규화되고 정리됩니다
 84 |    - URL이 중복 제거되고 표준화됩니다
 85 |    - 문서가 관련성 점수로 정렬됩니다
 86 |    - 진행 상황 업데이트가 WebSocket을 통해 실시간으로 전송됩니다
 87 | 
 88 | ### 실시간 통신 시스템
 89 | 
 90 | 이 플랫폼은 WebSocket 기반 실시간 통신 시스템을 구현합니다:
 91 | 
 92 | ![web ui](<static/ui-2.png>)
 93 | 
 94 | 1. **백엔드 구현**:
 95 |    - FastAPI의 WebSocket 지원 사용
 96 |    - 조사 작업당 지속적인 연결 유지
 97 |    - 다양한 이벤트에 대한 구조화된 업데이트 전송:
 98 |      ```python
 99 |      await websocket_manager.send_status_update(
100 |          job_id=job_id,
101 |          status="processing",
102 |          message=f"{category} 브리핑 생성 중",
103 |          result={
104 |              "step": "Briefing",
105 |              "category": category,
106 |              "total_docs": len(docs)
107 |          }
108 |      )
109 |      ```
110 | 
111 | 2. **프론트엔드 통합**:
112 |    - React 컴포넌트가 WebSocket 업데이트를 구독
113 |    - 업데이트가 실시간으로 처리되고 표시됩니다
114 |    - 다양한 UI 컴포넌트가 특정 업데이트 유형을 처리:
115 |      - 쿼리 생성 진행 상황
116 |      - 문서 큐레이션 통계
117 |      - 브리핑 완료 상태
118 |      - 보고서 생성 진행 상황
119 | 
120 | 3. **상태 유형**:
121 |    - `query_generating`: 실시간 쿼리 생성 업데이트
122 |    - `document_kept`: 문서 큐레이션 진행 상황
123 |    - `briefing_start/complete`: 브리핑 생성 상태
124 |    - `report_chunk`: 보고서 생성 스트리밍
125 |    - `curation_complete`: 최종 문서 통계
126 | 
127 | ## 설정
128 | 
129 | ### 빠른 설정 (권장)
130 | 
131 | 시작하는 가장 쉬운 방법은 설정 스크립트를 사용하는 것입니다:
132 | 
133 | 1. 저장소 클론:
134 | ```bash
135 | git clone https://github.com/pogjester/tavily-company-research.git
136 | cd tavily-company-research
137 | ```
138 | 
139 | 2. 설정 스크립트를 실행 가능하게 만들고 실행:
140 | ```bash
141 | chmod +x setup.sh
142 | ./setup.sh
143 | ```
144 | 
145 | 설정 스크립트는 다음을 수행합니다:
146 | - 필요한 Python 및 Node.js 버전 확인
147 | - Python 가상 환경 생성 (권장)
148 | - 모든 종속성 설치 (Python 및 Node.js)
149 | - 환경 변수 설정 안내
150 | - 백엔드 및 프론트엔드 서버 시작 (선택사항)
151 | 
152 | 다음 API 키가 필요합니다:
153 | - Tavily API 키
154 | - Google Gemini API 키
155 | - OpenAI API 키
156 | - MongoDB URI (선택사항)
157 | 
158 | ### 수동 설정
159 | 
160 | 수동으로 설정하려면 다음 단계를 따르세요:
161 | 
162 | 1. 저장소 클론:
163 | ```bash
164 | git clone https://github.com/pogjester/tavily-company-research.git
165 | cd tavily-company-research
166 | ```
167 | 
168 | 2. 백엔드 종속성 설치:
169 | ```bash
170 | # 선택사항: 가상 환경 생성 및 활성화
171 | python -m venv .venv
172 | source .venv/bin/activate
173 | 
174 | # Python 종속성 설치
175 | pip install -r requirements.txt
176 | ```
177 | 
178 | 3. 프론트엔드 종속성 설치:
179 | ```bash
180 | cd ui
181 | npm install
182 | ```
183 | 
184 | 4. API 키가 포함된 `.env` 파일 생성:
185 | ```env
186 | TAVILY_API_KEY=your_tavily_key
187 | GEMINI_API_KEY=your_gemini_key
188 | OPENAI_API_KEY=your_openai_key
189 | 
190 | # 선택사항: MongoDB 지속성 활성화
191 | # MONGODB_URI=your_mongodb_connection_string
192 | ```
193 | 
194 | ### Docker 설정
195 | 
196 | 애플리케이션은 Docker 및 Docker Compose를 사용하여 실행할 수 있습니다:
197 | 
198 | 1. 저장소 클론:
199 | ```bash
200 | git clone https://github.com/pogjester/tavily-company-research.git
201 | cd tavily-company-research
202 | ```
203 | 
204 | 2. API 키가 포함된 `.env` 파일 생성:
205 | ```env
206 | TAVILY_API_KEY=your_tavily_key
207 | GEMINI_API_KEY=your_gemini_key
208 | OPENAI_API_KEY=your_openai_key
209 | 
210 | # 선택사항: MongoDB 지속성 활성화
211 | # MONGODB_URI=your_mongodb_connection_string
212 | ```
213 | 
214 | 3. 컨테이너 빌드 및 시작:
215 | ```bash
216 | docker compose up --build
217 | ```
218 | 
219 | 이렇게 하면 백엔드 및 프론트엔드 서비스가 시작됩니다:
220 | - 백엔드 API는 `http://localhost:8000`에서 사용 가능
221 | - 프론트엔드는 `http://localhost:5174`에서 사용 가능
222 | 
223 | 서비스를 중지하려면:
224 | ```bash
225 | docker compose down
226 | ```
227 | 
228 | 참고: `.env`의 환경 변수를 업데이트할 때 컨테이너를 다시 시작해야 합니다:
229 | ```bash
230 | docker compose down && docker compose up
231 | ```
232 | 
233 | ### 애플리케이션 실행
234 | 
235 | 1. 백엔드 서버 시작 (옵션 선택):
236 | ```bash
237 | # 옵션 1: 직접 Python 모듈
238 | python -m application.py
239 | 
240 | # 옵션 2: Uvicorn으로 FastAPI
241 | uvicorn application:app --reload --port 8000
242 | ```
243 | 
244 | 2. 새 터미널에서 프론트엔드 시작:
245 | ```bash
246 | cd ui
247 | npm run dev
248 | ```
249 | 
250 | 3. `http://localhost:5173`에서 애플리케이션에 액세스
251 | 
252 | ## 사용법
253 | 
254 | ### 로컬 개발
255 | 
256 | 1. 백엔드 서버 시작 (옵션 선택):
257 | 
258 |    **옵션 1: 직접 Python 모듈**
259 |    ```bash
260 |    python -m application.py
261 |    ```
262 | 
263 |    **옵션 2: Uvicorn으로 FastAPI**
264 |    ```bash
265 |    # uvicorn이 설치되지 않은 경우 설치
266 |    pip install uvicorn
267 | 
268 |    # 핫 리로드로 FastAPI 애플리케이션 실행
269 |    uvicorn application:app --reload --port 8000
270 |    ```
271 | 
272 |    백엔드는 다음에서 사용 가능합니다:
273 |    - API 엔드포인트: `http://localhost:8000`
274 |    - WebSocket 엔드포인트: `ws://localhost:8000/research/ws/{job_id}`
275 | 
276 | 2. 프론트엔드 개발 서버 시작:
277 |    ```bash
278 |    cd ui
279 |    npm run dev
280 |    ```
281 | 
282 | 3. `http://localhost:5173`에서 애플리케이션에 액세스
283 | 
284 | ### 배포 옵션
285 | 
286 | 애플리케이션은 다양한 클라우드 플랫폼에 배포할 수 있습니다. 몇 가지 일반적인 옵션은 다음과 같습니다:
287 | 
288 | #### AWS Elastic Beanstalk
289 | 
290 | 1. EB CLI 설치:
291 |    ```bash
292 |    pip install awsebcli
293 |    ```
294 | 
295 | 2. EB 애플리케이션 초기화:
296 |    ```bash
297 |    eb init -p python-3.11 tavily-research
298 |    ```
299 | 
300 | 3. 생성 및 배포:
301 |    ```bash
302 |    eb create tavily-research-prod
303 |    ```
304 | 
305 | #### 기타 배포 옵션
306 | 
307 | - **Docker**: 애플리케이션에는 컨테이너화된 배포를 위한 Dockerfile이 포함되어 있습니다
308 | - **Heroku**: Python 빌드팩으로 GitHub에서 직접 배포
309 | - **Google Cloud Run**: 자동 스케일링을 통한 컨테이너화된 배포에 적합
310 | 
311 | 귀하의 요구 사항에 가장 적합한 플랫폼을 선택하세요. 애플리케이션은 플랫폼에 구애받지 않으며 Python 웹 애플리케이션이 지원되는 곳이면 어디든 호스팅할 수 있습니다.
312 | 
313 | ## 기여
314 | 
315 | 1. 저장소 포크
316 | 2. 기능 브랜치 생성 (`git checkout -b feature/amazing-feature`)
317 | 3. 변경 사항 커밋 (`git commit -m 'Add some amazing feature'`)
318 | 4. 브랜치에 푸시 (`git push origin feature/amazing-feature`)
319 | 5. Pull Request 열기
320 | 
321 | ## 라이선스
322 | 
323 | 이 프로젝트는 MIT 라이선스 하에 라이선스가 부여됩니다 - 자세한 내용은 [LICENSE](LICENSE) 파일을 참조하세요.
324 | 
325 | ## 감사의 말
326 | 
327 | - 검색 API를 제공하는 [Tavily](https://tavily.com/)
328 | - 모든 다른 오픈 소스 라이브러리와 그 기여자들


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 |  [![en](https://img.shields.io/badge/lang-en-red.svg)](https://github.com/pogjester/company-research-agent/blob/main/README.md)
  2 | [![zh](https://img.shields.io/badge/lang-zh-green.svg)](https://github.com/pogjester/company-research-agent/blob/main/README.zh.md)
  3 | [![fr](https://img.shields.io/badge/lang-fr-blue.svg)](https://github.com/pogjester/company-research-agent/blob/main/README.fr.md)
  4 | [![es](https://img.shields.io/badge/lang-es-yellow.svg)](https://github.com/pogjester/company-research-agent/blob/main/README.es.md)
  5 | [![jp](https://img.shields.io/badge/lang-jp-orange.svg)](https://github.com/pogjester/company-research-agent/blob/main/README.jp.md)
  6 | [![kr](https://img.shields.io/badge/lang-ko-purple.svg)](https://github.com/pogjester/company-research-agent/blob/main/README.kr.md)
  7 | 
  8 | 
  9 | # Agentic Company Researcher 🔍
 10 | 
 11 | ![web ui](<static/ui-1.png>)
 12 | 
 13 | A multi-agent tool that generates comprehensive company research reports. The platform uses a pipeline of AI agents to gather, curate, and synthesize information about any company.
 14 | 
 15 | ✨Check it out online! https://companyresearcher.tavily.com ✨
 16 | 
 17 | https://github.com/user-attachments/assets/0e373146-26a7-4391-b973-224ded3182a9
 18 | 
 19 | ## Features
 20 | 
 21 | - **Multi-Source Research**: Gathers data from various sources including company websites, news articles, financial reports, and industry analyses
 22 | - **AI-Powered Content Filtering**: Uses Tavily's relevance scoring for content curation
 23 | - **Real-Time Progress Streaming**: Uses WebSocket connections to stream research progress and results
 24 | - **Dual Model Architecture**: 
 25 |   - Gemini 2.0 Flash for high-context research synthesis
 26 |   - GPT-4.1 for precise report formatting and editing
 27 | - **Modern React Frontend**: Responsive UI with real-time updates, progress tracking, and download options
 28 | - **Modular Architecture**: Built using a pipeline of specialized research and processing nodes
 29 | 
 30 | ## Agent Framework
 31 | 
 32 | ### Research Pipeline
 33 | 
 34 | The platform follows an agentic framework with specialized nodes that process data sequentially:
 35 | 
 36 | 1. **Research Nodes**:
 37 |    - `CompanyAnalyzer`: Researches core business information
 38 |    - `IndustryAnalyzer`: Analyzes market position and trends
 39 |    - `FinancialAnalyst`: Gathers financial metrics and performance data
 40 |    - `NewsScanner`: Collects recent news and developments
 41 | 
 42 | 2. **Processing Nodes**:
 43 |    - `Collector`: Aggregates research data from all analyzers
 44 |    - `Curator`: Implements content filtering and relevance scoring
 45 |    - `Briefing`: Generates category-specific summaries using Gemini 2.0 Flash
 46 |    - `Editor`: Compiles and formats the briefings into a final report using GPT-4.1-mini
 47 | 
 48 |    ![web ui](<static/agent-flow.png>)
 49 | 
 50 | ### Content Generation Architecture
 51 | 
 52 | The platform leverages separate models for optimal performance:
 53 | 
 54 | 1. **Gemini 2.0 Flash** (`briefing.py`):
 55 |    - Handles high-context research synthesis tasks
 56 |    - Excels at processing and summarizing large volumes of data
 57 |    - Used for generating initial category briefings
 58 |    - Efficient at maintaining context across multiple documents
 59 | 
 60 | 2. **GPT-4.1 mini** (`editor.py`):
 61 |    - Specializes in precise formatting and editing tasks
 62 |    - Handles markdown structure and consistency
 63 |    - Superior at following exact formatting instructions
 64 |    - Used for:
 65 |      - Final report compilation
 66 |      - Content deduplication
 67 |      - Markdown formatting
 68 |      - Real-time report streaming
 69 | 
 70 | This approach combines Gemini's strength in handling large context windows with GPT-4.1-mini's precision in following specific formatting instructions.
 71 | 
 72 | ### Content Curation System
 73 | 
 74 | The platform uses a content filtering system in `curator.py`:
 75 | 
 76 | 1. **Relevance Scoring**:
 77 |    - Documents are scored by Tavily's AI-powered search
 78 |    - A minimum threshold (default 0.4) is required to proceed
 79 |    - Scores reflect relevance to the specific research query
 80 |    - Higher scores indicate better matches to the research intent
 81 | 
 82 | 2. **Document Processing**:
 83 |    - Content is normalized and cleaned
 84 |    - URLs are deduplicated and standardized
 85 |    - Documents are sorted by relevance scores
 86 |    - Real-time progress updates are sent via WebSocket
 87 | 
 88 | ### Real-Time Communication System
 89 | 
 90 | The platform implements a WebSocket-based real-time communication system:
 91 | 
 92 | ![web ui](<static/ui-2.png>)
 93 | 
 94 | 1. **Backend Implementation**:
 95 |    - Uses FastAPI's WebSocket support
 96 |    - Maintains persistent connections per research job
 97 |    - Sends structured status updates for various events:
 98 |      ```python
 99 |      await websocket_manager.send_status_update(
100 |          job_id=job_id,
101 |          status="processing",
102 |          message=f"Generating {category} briefing",
103 |          result={
104 |              "step": "Briefing",
105 |              "category": category,
106 |              "total_docs": len(docs)
107 |          }
108 |      )
109 |      ```
110 | 
111 | 2. **Frontend Integration**:
112 |    - React components subscribe to WebSocket updates
113 |    - Updates are processed and displayed in real-time
114 |    - Different UI components handle specific update types:
115 |      - Query generation progress
116 |      - Document curation statistics
117 |      - Briefing completion status
118 |      - Report generation progress
119 | 
120 | 3. **Status Types**:
121 |    - `query_generating`: Real-time query creation updates
122 |    - `document_kept`: Document curation progress
123 |    - `briefing_start/complete`: Briefing generation status
124 |    - `report_chunk`: Streaming report generation
125 |    - `curation_complete`: Final document statistics
126 | 
127 | ## Setup
128 | 
129 | ### Quick Setup (Recommended)
130 | 
131 | The easiest way to get started is using the setup script:
132 | 
133 | 1. Clone the repository:
134 | ```bash
135 | git clone https://github.com/pogjester/tavily-company-research.git
136 | cd tavily-company-research
137 | ```
138 | 
139 | 2. Make the setup script executable and run it:
140 | ```bash
141 | chmod +x setup.sh
142 | ./setup.sh
143 | ```
144 | 
145 | The setup script will:
146 | - Check for required Python and Node.js versions
147 | - Optionally create a Python virtual environment (recommended)
148 | - Install all dependencies (Python and Node.js)
149 | - Guide you through setting up your environment variables
150 | - Optionally start both backend and frontend servers
151 | 
152 | You'll need the following API keys ready:
153 | - Tavily API Key
154 | - Google Gemini API Key
155 | - OpenAI API Key
156 | - MongoDB URI (optional)
157 | 
158 | ### Manual Setup
159 | 
160 | If you prefer to set up manually, follow these steps:
161 | 
162 | 1. Clone the repository:
163 | ```bash
164 | git clone https://github.com/pogjester/tavily-company-research.git
165 | cd tavily-company-research
166 | ```
167 | 
168 | 2. Install backend dependencies:
169 | ```bash
170 | # Optional: Create and activate virtual environment
171 | python -m venv .venv
172 | source .venv/bin/activate
173 | 
174 | # Install Python dependencies
175 | pip install -r requirements.txt
176 | ```
177 | 
178 | 3. Install frontend dependencies:
179 | ```bash
180 | cd ui
181 | npm install
182 | ```
183 | 
184 | 4. Create a `.env` file with your API keys:
185 | ```env
186 | TAVILY_API_KEY=your_tavily_key
187 | GEMINI_API_KEY=your_gemini_key
188 | OPENAI_API_KEY=your_openai_key
189 | 
190 | # Optional: Enable MongoDB persistence
191 | # MONGODB_URI=your_mongodb_connection_string
192 | ```
193 | 
194 | ### Docker Setup
195 | 
196 | The application can be run using Docker and Docker Compose:
197 | 
198 | 1. Clone the repository:
199 | ```bash
200 | git clone https://github.com/pogjester/tavily-company-research.git
201 | cd tavily-company-research
202 | ```
203 | 
204 | 2. Create a `.env` file with your API keys:
205 | ```env
206 | TAVILY_API_KEY=your_tavily_key
207 | GEMINI_API_KEY=your_gemini_key
208 | OPENAI_API_KEY=your_openai_key
209 | 
210 | # Optional: Enable MongoDB persistence
211 | # MONGODB_URI=your_mongodb_connection_string
212 | ```
213 | 
214 | 3. Build and start the containers:
215 | ```bash
216 | docker compose up --build
217 | ```
218 | 
219 | This will start both the backend and frontend services:
220 | - Backend API will be available at `http://localhost:8000`
221 | - Frontend will be available at `http://localhost:5174`
222 | 
223 | To stop the services:
224 | ```bash
225 | docker compose down
226 | ```
227 | 
228 | Note: When updating environment variables in `.env`, you'll need to restart the containers:
229 | ```bash
230 | docker compose down && docker compose up
231 | ```
232 | 
233 | ### Running the Application
234 | 
235 | 1. Start the backend server (choose one):
236 | ```bash
237 | # Option 1: Direct Python Module
238 | python -m application.py
239 | 
240 | # Option 2: FastAPI with Uvicorn
241 | uvicorn application:app --reload --port 8000
242 | ```
243 | 
244 | 2. In a new terminal, start the frontend:
245 | ```bash
246 | cd ui
247 | npm run dev
248 | ```
249 | 
250 | 3. Access the application at `http://localhost:5173`
251 | 
252 | ## Usage
253 | 
254 | ### Local Development
255 | 
256 | 1. Start the backend server (choose one option):
257 | 
258 |    **Option 1: Direct Python Module**
259 |    ```bash
260 |    python -m application.py
261 |    ```
262 | 
263 |    **Option 2: FastAPI with Uvicorn**
264 |    ```bash
265 |    # Install uvicorn if not already installed
266 |    pip install uvicorn
267 | 
268 |    # Run the FastAPI application with hot reload
269 |    uvicorn application:app --reload --port 8000
270 |    ```
271 | 
272 |    The backend will be available at:
273 |    - API Endpoint: `http://localhost:8000`
274 |    - WebSocket Endpoint: `ws://localhost:8000/research/ws/{job_id}`
275 | 
276 | 2. Start the frontend development server:
277 |    ```bash
278 |    cd ui
279 |    npm run dev
280 |    ```
281 | 
282 | 3. Access the application at `http://localhost:5173`
283 | 
284 | ### Deployment Options
285 | 
286 | The application can be deployed to various cloud platforms. Here are some common options:
287 | 
288 | #### AWS Elastic Beanstalk
289 | 
290 | 1. Install the EB CLI:
291 |    ```bash
292 |    pip install awsebcli
293 |    ```
294 | 
295 | 2. Initialize EB application:
296 |    ```bash
297 |    eb init -p python-3.11 tavily-research
298 |    ```
299 | 
300 | 3. Create and deploy:
301 |    ```bash
302 |    eb create tavily-research-prod
303 |    ```
304 | 
305 | #### Other Deployment Options
306 | 
307 | - **Docker**: The application includes a Dockerfile for containerized deployment
308 | - **Heroku**: Deploy directly from GitHub with the Python buildpack
309 | - **Google Cloud Run**: Suitable for containerized deployment with automatic scaling
310 | 
311 | Choose the platform that best suits your needs. The application is platform-agnostic and can be hosted anywhere that supports Python web applications.
312 | 
313 | ## Contributing
314 | 
315 | 1. Fork the repository
316 | 2. Create a feature branch (`git checkout -b feature/amazing-feature`)
317 | 3. Commit your changes (`git commit -m 'Add amazing feature'`)
318 | 4. Push to the branch (`git push origin feature/amazing-feature`)
319 | 5. Open a Pull Request
320 | 
321 | ## License
322 | 
323 | This project is licensed under the MIT License - see the [LICENSE](LICENSE) file for details.
324 | 
325 | ## Acknowledgments
326 | 
327 | - [Tavily](https://tavily.com/) for the research API
328 | - All other open-source libraries and their contributors
329 | 


--------------------------------------------------------------------------------
/README.zh.md:
--------------------------------------------------------------------------------
  1 |  [![en](https://img.shields.io/badge/lang-en-red.svg)](https://github.com/pogjester/company-research-agent/blob/main/README.md)
  2 | [![zh](https://img.shields.io/badge/lang-zh-green.svg)](https://github.com/pogjester/company-research-agent/blob/main/README.zh.md)
  3 | [![fr](https://img.shields.io/badge/lang-fr-blue.svg)](https://github.com/pogjester/company-research-agent/blob/main/README.fr.md)
  4 | [![es](https://img.shields.io/badge/lang-es-yellow.svg)](https://github.com/pogjester/company-research-agent/blob/main/README.es.md)
  5 | [![jp](https://img.shields.io/badge/lang-jp-orange.svg)](https://github.com/pogjester/company-research-agent/blob/main/README.jp.md)
  6 | [![kr](https://img.shields.io/badge/lang-ko-purple.svg)](https://github.com/pogjester/company-research-agent/blob/main/README.kr.md)
  7 | 
  8 | 
  9 | # 智能公司研究助手 🔍
 10 | 
 11 | ![web ui](<static/ui-1.png>)
 12 | 
 13 | 一个多智能体工具，用于生成全面的公司研究报告。该平台使用一系列AI智能体来收集、整理和综合任何公司的信息。
 14 | 
 15 | ✨快来看看吧！[https://companyresearcher.tavily.com](https://companyresearcher.tavily.com) ✨
 16 | 
 17 | https://github.com/user-attachments/assets/071aa491-009b-4d76-a698-88863149e71c
 18 | 
 19 | ## 功能特点
 20 | 
 21 | - **多源研究**：从公司网站、新闻文章、财务报告和行业分析等多个来源收集数据
 22 | - **AI驱动的内容过滤**：使用Tavily的相关性评分进行内容筛选
 23 | - **实时进度流**：使用WebSocket连接流式传输研究进度和结果
 24 | - **双模型架构**：
 25 |   - Gemini 2.0 Flash用于高上下文研究综合
 26 |   - GPT-4.1用于精确的报告格式化和编辑
 27 | - **现代React前端**：具有实时更新、进度跟踪和下载选项的响应式UI
 28 | - **模块化架构**：使用专业研究和处理节点构建的管道
 29 | 
 30 | ## 智能体框架
 31 | 
 32 | ### 研究管道
 33 | 
 34 | 该平台遵循智能体框架，使用专门的节点按顺序处理数据：
 35 | 
 36 | 1. **研究节点**：
 37 |    - `CompanyAnalyzer`：研究核心业务信息
 38 |    - `IndustryAnalyzer`：分析市场定位和趋势
 39 |    - `FinancialAnalyst`：收集财务指标和业绩数据
 40 |    - `NewsScanner`：收集最新新闻和发展动态
 41 | 
 42 | 2. **处理节点**：
 43 |    - `Collector`：汇总所有分析器的研究数据
 44 |    - `Curator`：实现内容过滤和相关性评分
 45 |    - `Briefing`：使用Gemini 2.0 Flash生成特定类别的摘要
 46 |    - `Editor`：使用GPT-4.1-mini将简报编译和格式化为最终报告
 47 | 
 48 |    ![web ui](<static/agent-flow.png>)
 49 | 
 50 | ### 内容生成架构
 51 | 
 52 | 该平台利用不同的模型以获得最佳性能：
 53 | 
 54 | 1. **Gemini 2.0 Flash**（`briefing.py`）：
 55 |    - 处理高上下文研究综合任务
 56 |    - 擅长处理和总结大量数据
 57 |    - 用于生成初始类别简报
 58 |    - 在多个文档之间高效维护上下文
 59 | 
 60 | 2. **GPT-4.1 mini**（`editor.py`）：
 61 |    - 专注于精确的格式化和编辑任务
 62 |    - 处理markdown结构和一致性
 63 |    - 在遵循精确格式说明方面表现出色
 64 |    - 用于：
 65 |      - 最终报告编译
 66 |      - 内容去重
 67 |      - Markdown格式化
 68 |      - 实时报告流式传输
 69 | 
 70 | 这种方法结合了Gemini处理大上下文窗口的优势和GPT-4.1-mini在遵循特定格式说明方面的精确性。
 71 | 
 72 | ### 内容筛选系统
 73 | 
 74 | 该平台在`curator.py`中使用内容过滤系统：
 75 | 
 76 | 1. **相关性评分**：
 77 |    - 文档由Tavily的AI驱动搜索进行评分
 78 |    - 需要达到最低阈值（默认0.4）才能继续
 79 |    - 分数反映与特定研究查询的相关性
 80 |    - 更高的分数表示与研究意图更好的匹配
 81 | 
 82 | 2. **文档处理**：
 83 |    - 内容被标准化和清理
 84 |    - URL被去重和标准化
 85 |    - 文档按相关性分数排序
 86 |    - 通过WebSocket发送实时进度更新
 87 | 
 88 | ### 实时通信系统
 89 | 
 90 | 该平台实现了基于WebSocket的实时通信系统：
 91 | 
 92 | ![web ui](<static/ui-2.png>)
 93 | 
 94 | 1. **后端实现**：
 95 |    - 使用FastAPI的WebSocket支持
 96 |    - 为每个研究任务维护持久连接
 97 |    - 发送各种事件的结构化状态更新：
 98 |      ```python
 99 |      await websocket_manager.send_status_update(
100 |          job_id=job_id,
101 |          status="processing",
102 |          message=f"Generating {category} briefing",
103 |          result={
104 |              "step": "Briefing",
105 |              "category": category,
106 |              "total_docs": len(docs)
107 |          }
108 |      )
109 |      ```
110 | 
111 | 2. **前端集成**：
112 |    - React组件订阅WebSocket更新
113 |    - 实时处理和显示更新
114 |    - 不同的UI组件处理特定类型的更新：
115 |      - 查询生成进度
116 |      - 文档筛选统计
117 |      - 简报完成状态
118 |      - 报告生成进度
119 | 
120 | 3. **状态类型**：
121 |    - `query_generating`：实时查询创建更新
122 |    - `document_kept`：文档筛选进度
123 |    - `briefing_start/complete`：简报生成状态
124 |    - `report_chunk`：流式报告生成
125 |    - `curation_complete`：最终文档统计
126 | 
127 | ## 安装设置
128 | 
129 | ### 快速安装（推荐）
130 | 
131 | 最简单的方法是使用安装脚本：
132 | 
133 | 1. 克隆仓库：
134 | ```bash
135 | git clone https://github.com/pogjester/tavily-company-research.git
136 | cd tavily-company-research
137 | ```
138 | 
139 | 2. 使安装脚本可执行并运行：
140 | ```bash
141 | chmod +x setup.sh
142 | ./setup.sh
143 | ```
144 | 
145 | 安装脚本将：
146 | - 检查所需的Python和Node.js版本
147 | - 可选创建Python虚拟环境（推荐）
148 | - 安装所有依赖（Python和Node.js）
149 | - 指导您设置环境变量
150 | - 可选启动后端和前端服务器
151 | 
152 | 您需要准备以下API密钥：
153 | - Tavily API密钥
154 | - Google Gemini API密钥
155 | - OpenAI API密钥
156 | - MongoDB URI（可选）
157 | 
158 | ### 手动安装
159 | 
160 | 如果您更喜欢手动安装，请按照以下步骤操作：
161 | 
162 | 1. 克隆仓库：
163 | ```bash
164 | git clone https://github.com/pogjester/tavily-company-research.git
165 | cd tavily-company-research
166 | ```
167 | 
168 | 2. 安装后端依赖：
169 | ```bash
170 | # 可选：创建并激活虚拟环境
171 | python -m venv .venv
172 | source .venv/bin/activate
173 | 
174 | # 安装Python依赖
175 | pip install -r requirements.txt
176 | ```
177 | 
178 | 3. 安装前端依赖：
179 | ```bash
180 | cd ui
181 | npm install
182 | ```
183 | 
184 | 4. 创建包含API密钥的`.env`文件：
185 | ```env
186 | TAVILY_API_KEY=your_tavily_key
187 | GEMINI_API_KEY=your_gemini_key
188 | OPENAI_API_KEY=your_openai_key
189 | 
190 | # 可选：启用MongoDB持久化
191 | # MONGODB_URI=your_mongodb_connection_string
192 | ```
193 | 
194 | ### Docker安装
195 | 
196 | 可以使用Docker和Docker Compose运行应用程序：
197 | 
198 | 1. 克隆仓库：
199 | ```bash
200 | git clone https://github.com/pogjester/tavily-company-research.git
201 | cd tavily-company-research
202 | ```
203 | 
204 | 2. 创建包含API密钥的`.env`文件：
205 | ```env
206 | TAVILY_API_KEY=your_tavily_key
207 | GEMINI_API_KEY=your_gemini_key
208 | OPENAI_API_KEY=your_openai_key
209 | 
210 | # 可选：启用MongoDB持久化
211 | # MONGODB_URI=your_mongodb_connection_string
212 | ```
213 | 
214 | 3. 构建并启动容器：
215 | ```bash
216 | docker compose up --build
217 | ```
218 | 
219 | 这将启动后端和前端服务：
220 | - 后端API将在`http://localhost:8000`可用
221 | - 前端将在`http://localhost:5174`可用
222 | 
223 | 停止服务：
224 | ```bash
225 | docker compose down
226 | ```
227 | 
228 | 注意：更新`.env`中的环境变量时，需要重启容器：
229 | ```bash
230 | docker compose down && docker compose up
231 | ```
232 | 
233 | ### 运行应用程序
234 | 
235 | 1. 启动后端服务器（选择一种方式）：
236 | ```bash
237 | # 选项1：直接Python模块
238 | python -m application.py
239 | 
240 | # 选项2：使用Uvicorn的FastAPI
241 | uvicorn application:app --reload --port 8000
242 | ```
243 | 
244 | 2. 在新终端中启动前端：
245 | ```bash
246 | cd ui
247 | npm run dev
248 | ```
249 | 
250 | 3. 在`http://localhost:5173`访问应用程序
251 | 
252 | ## 使用方法
253 | 
254 | ### 本地开发
255 | 
256 | 1. 启动后端服务器（选择一个选项）：
257 | 
258 |    **选项1：直接Python模块**
259 |    ```bash
260 |    python -m application.py
261 |    ```
262 | 
263 |    **选项2：使用Uvicorn的FastAPI**
264 |    ```bash
265 |    # 如果尚未安装，安装uvicorn
266 |    pip install uvicorn
267 | 
268 |    # 使用热重载运行FastAPI应用
269 |    uvicorn application:app --reload --port 8000
270 |    ```
271 | 
272 |    后端将在以下位置可用：
273 |    - API端点：`http://localhost:8000`
274 |    - WebSocket端点：`ws://localhost:8000/research/ws/{job_id}`
275 | 
276 | 2. 启动前端开发服务器：
277 |    ```bash
278 |    cd ui
279 |    npm run dev
280 |    ```
281 | 
282 | 3. 在`http://localhost:5173`访问应用程序
283 | 
284 | ### 部署选项
285 | 
286 | 该应用程序可以部署到各种云平台。以下是一些常见选项：
287 | 
288 | #### AWS Elastic Beanstalk
289 | 
290 | 1. 安装EB CLI：
291 |    ```bash
292 |    pip install awsebcli
293 |    ```
294 | 
295 | 2. 初始化EB应用：
296 |    ```bash
297 |    eb init -p python-3.11 tavily-research
298 |    ```
299 | 
300 | 3. 创建并部署：
301 |    ```bash
302 |    eb create tavily-research-prod
303 |    ```
304 | 
305 | #### 其他部署选项
306 | 
307 | - **Docker**：应用程序包含用于容器化部署的Dockerfile
308 | - **Heroku**：使用Python构建包直接从GitHub部署
309 | - **Google Cloud Run**：适用于具有自动扩展功能的容器化部署
310 | 
311 | 选择最适合您需求的平台。该应用程序是平台无关的，可以托管在任何支持Python Web应用程序的地方。
312 | 
313 | ## 贡献
314 | 
315 | 1. Fork仓库
316 | 2. 创建特性分支（`git checkout -b feature/amazing-feature`）
317 | 3. 提交更改（`git commit -m 'Add amazing feature'`）
318 | 4. 推送到分支（`git push origin feature/amazing-feature`）
319 | 5. 打开Pull Request
320 | 
321 | ## 许可证
322 | 
323 | 本项目采用MIT许可证 - 详情请参阅[LICENSE](LICENSE)文件。
324 | 
325 | ## 致谢
326 | 
327 | - [Tavily](https://tavily.com/)提供研究API
328 | - 所有其他开源库及其贡献者 
329 | 


--------------------------------------------------------------------------------
/application.py:
--------------------------------------------------------------------------------
  1 | import asyncio
  2 | import logging
  3 | import os
  4 | import uuid
  5 | from collections import defaultdict
  6 | from datetime import datetime
  7 | from pathlib import Path
  8 | 
  9 | import uvicorn
 10 | from dotenv import load_dotenv
 11 | from fastapi import FastAPI, HTTPException, WebSocket, WebSocketDisconnect
 12 | from fastapi.middleware.cors import CORSMiddleware
 13 | from fastapi.responses import FileResponse, JSONResponse, StreamingResponse
 14 | from pydantic import BaseModel
 15 | 
 16 | from backend.graph import Graph
 17 | from backend.services.mongodb import MongoDBService
 18 | from backend.services.pdf_service import PDFService
 19 | from backend.services.websocket_manager import WebSocketManager
 20 | 
 21 | # Load environment variables from .env file at startup
 22 | env_path = Path(__file__).parent / '.env'
 23 | if env_path.exists():
 24 |     load_dotenv(dotenv_path=env_path, override=True)
 25 | 
 26 | # Configure logging
 27 | logger = logging.getLogger()
 28 | logger.setLevel(logging.INFO)
 29 | console_handler = logging.StreamHandler()
 30 | logger.addHandler(console_handler)
 31 | 
 32 | app = FastAPI(title="Tavily Company Research API")
 33 | 
 34 | app.add_middleware(
 35 |     CORSMiddleware,
 36 |     allow_origins=["*"],
 37 |     allow_credentials=True,
 38 |     allow_methods=["GET", "POST", "OPTIONS"],
 39 |     allow_headers=["*"],
 40 | )
 41 | 
 42 | manager = WebSocketManager()
 43 | pdf_service = PDFService({"pdf_output_dir": "pdfs"})
 44 | 
 45 | job_status = defaultdict(lambda: {
 46 |     "status": "pending",
 47 |     "result": None,
 48 |     "error": None,
 49 |     "debug_info": [],
 50 |     "company": None,
 51 |     "report": None,
 52 |     "last_update": datetime.now().isoformat()
 53 | })
 54 | 
 55 | mongodb = None
 56 | if mongo_uri := os.getenv("MONGODB_URI"):
 57 |     try:
 58 |         mongodb = MongoDBService(mongo_uri)
 59 |         logger.info("MongoDB integration enabled")
 60 |     except Exception as e:
 61 |         logger.warning(f"Failed to initialize MongoDB: {e}. Continuing without persistence.")
 62 | 
 63 | class ResearchRequest(BaseModel):
 64 |     company: str
 65 |     company_url: str | None = None
 66 |     industry: str | None = None
 67 |     hq_location: str | None = None
 68 | 
 69 | class PDFGenerationRequest(BaseModel):
 70 |     report_content: str
 71 |     company_name: str | None = None
 72 | 
 73 | @app.options("/research")
 74 | async def preflight():
 75 |     response = JSONResponse(content=None, status_code=200)
 76 |     response.headers["Access-Control-Allow-Origin"] = "*"
 77 |     response.headers["Access-Control-Allow-Methods"] = "POST, OPTIONS"
 78 |     response.headers["Access-Control-Allow-Headers"] = "Content-Type, Authorization"
 79 |     return response
 80 | 
 81 | @app.post("/research")
 82 | async def research(data: ResearchRequest):
 83 |     try:
 84 |         logger.info(f"Received research request for {data.company}")
 85 |         job_id = str(uuid.uuid4())
 86 |         asyncio.create_task(process_research(job_id, data))
 87 | 
 88 |         response = JSONResponse(content={
 89 |             "status": "accepted",
 90 |             "job_id": job_id,
 91 |             "message": "Research started. Connect to WebSocket for updates.",
 92 |             "websocket_url": f"/research/ws/{job_id}"
 93 |         })
 94 |         response.headers["Access-Control-Allow-Origin"] = "*"
 95 |         response.headers["Access-Control-Allow-Methods"] = "POST, OPTIONS"
 96 |         response.headers["Access-Control-Allow-Headers"] = "Content-Type, Authorization"
 97 |         return response
 98 | 
 99 |     except Exception as e:
100 |         logger.error(f"Error initiating research: {str(e)}", exc_info=True)
101 |         raise HTTPException(status_code=500, detail=str(e))
102 | 
103 | async def process_research(job_id: str, data: ResearchRequest):
104 |     try:
105 |         if mongodb:
106 |             mongodb.create_job(job_id, data.dict())
107 |         await asyncio.sleep(1)  # Allow WebSocket connection
108 | 
109 |         await manager.send_status_update(job_id, status="processing", message="Starting research")
110 | 
111 |         graph = Graph(
112 |             company=data.company,
113 |             url=data.company_url,
114 |             industry=data.industry,
115 |             hq_location=data.hq_location,
116 |             websocket_manager=manager,
117 |             job_id=job_id
118 |         )
119 | 
120 |         state = {}
121 |         async for s in graph.run(thread={}):
122 |             state.update(s)
123 |         
124 |         # Look for the compiled report in either location.
125 |         report_content = state.get('report') or (state.get('editor') or {}).get('report')
126 |         if report_content:
127 |             logger.info(f"Found report in final state (length: {len(report_content)})")
128 |             job_status[job_id].update({
129 |                 "status": "completed",
130 |                 "report": report_content,
131 |                 "company": data.company,
132 |                 "last_update": datetime.now().isoformat()
133 |             })
134 |             if mongodb:
135 |                 mongodb.update_job(job_id=job_id, status="completed")
136 |                 mongodb.store_report(job_id=job_id, report_data={"report": report_content})
137 |             await manager.send_status_update(
138 |                 job_id=job_id,
139 |                 status="completed",
140 |                 message="Research completed successfully",
141 |                 result={
142 |                     "report": report_content,
143 |                     "company": data.company
144 |                 }
145 |             )
146 |         else:
147 |             logger.error(f"Research completed without finding report. State keys: {list(state.keys())}")
148 |             logger.error(f"Editor state: {state.get('editor', {})}")
149 |             
150 |             # Check if there was a specific error in the state
151 |             error_message = "No report found"
152 |             if error := state.get('error'):
153 |                 error_message = f"Error: {error}"
154 |             
155 |             await manager.send_status_update(
156 |                 job_id=job_id,
157 |                 status="failed",
158 |                 message="Research completed but no report was generated",
159 |                 error=error_message
160 |             )
161 | 
162 |     except Exception as e:
163 |         logger.error(f"Research failed: {str(e)}")
164 |         await manager.send_status_update(
165 |             job_id=job_id,
166 |             status="failed",
167 |             message=f"Research failed: {str(e)}",
168 |             error=str(e)
169 |         )
170 |         if mongodb:
171 |             mongodb.update_job(job_id=job_id, status="failed", error=str(e))
172 | @app.get("/")
173 | async def ping():
174 |     return {"message": "Alive"}
175 | 
176 | @app.get("/research/pdf/{filename}")
177 | async def get_pdf(filename: str):
178 |     pdf_path = os.path.join("pdfs", filename)
179 |     if not os.path.exists(pdf_path):
180 |         raise HTTPException(status_code=404, detail="PDF not found")
181 |     return FileResponse(pdf_path, media_type='application/pdf', filename=filename)
182 | 
183 | @app.websocket("/research/ws/{job_id}")
184 | async def websocket_endpoint(websocket: WebSocket, job_id: str):
185 |     try:
186 |         await websocket.accept()
187 |         await manager.connect(websocket, job_id)
188 | 
189 |         if job_id in job_status:
190 |             status = job_status[job_id]
191 |             await manager.send_status_update(
192 |                 job_id,
193 |                 status=status["status"],
194 |                 message="Connected to status stream",
195 |                 error=status["error"],
196 |                 result=status["result"]
197 |             )
198 | 
199 |         while True:
200 |             try:
201 |                 await websocket.receive_text()
202 |             except WebSocketDisconnect:
203 |                 manager.disconnect(websocket, job_id)
204 |                 break
205 | 
206 |     except Exception as e:
207 |         logger.error(f"WebSocket error for job {job_id}: {str(e)}", exc_info=True)
208 |         manager.disconnect(websocket, job_id)
209 | 
210 | @app.get("/research/{job_id}")
211 | async def get_research(job_id: str):
212 |     if not mongodb:
213 |         raise HTTPException(status_code=501, detail="Database persistence not configured")
214 |     job = mongodb.get_job(job_id)
215 |     if not job:
216 |         raise HTTPException(status_code=404, detail="Research job not found")
217 |     return job
218 | 
219 | @app.get("/research/{job_id}/report")
220 | async def get_research_report(job_id: str):
221 |     if not mongodb:
222 |         if job_id in job_status:
223 |             result = job_status[job_id]
224 |             if report := result.get("report"):
225 |                 return {"report": report}
226 |         raise HTTPException(status_code=404, detail="Report not found")
227 |     
228 |     report = mongodb.get_report(job_id)
229 |     if not report:
230 |         raise HTTPException(status_code=404, detail="Research report not found")
231 |     return report
232 | 
233 | @app.post("/generate-pdf")
234 | async def generate_pdf(data: PDFGenerationRequest):
235 |     """Generate a PDF from markdown content and stream it to the client."""
236 |     try:
237 |         success, result = pdf_service.generate_pdf_stream(data.report_content, data.company_name)
238 |         if success:
239 |             pdf_buffer, filename = result
240 |             return StreamingResponse(
241 |                 pdf_buffer,
242 |                 media_type='application/pdf',
243 |                 headers={
244 |                     'Content-Disposition': f'attachment; filename="{filename}"'
245 |                 }
246 |             )
247 |         else:
248 |             raise HTTPException(status_code=500, detail=result)
249 |     except Exception as e:
250 |         raise HTTPException(status_code=500, detail=str(e))
251 | 
252 | if __name__ == "__main__":
253 |     uvicorn.run(app, host="0.0.0.0", port=8000)


--------------------------------------------------------------------------------
/backend/__init__.py:
--------------------------------------------------------------------------------
 1 | """Backend package for tavily-company-research."""
 2 | 
 3 | import os
 4 | import sys
 5 | from pathlib import Path
 6 | import logging
 7 | from dotenv import load_dotenv
 8 | 
 9 | # Set up logging
10 | logger = logging.getLogger(__name__)
11 | 
12 | # Load environment variables from .env file
13 | env_path = Path(__file__).parent.parent / '.env'
14 | if env_path.exists():
15 |     logger.info(f"Loading environment variables from {env_path}")
16 |     load_dotenv(dotenv_path=env_path, override=True)
17 | else:
18 |     logger.warning(f".env file not found at {env_path}. Using system environment variables.")
19 | 
20 | # Check for critical environment variables
21 | if not os.getenv("TAVILY_API_KEY"):
22 |     logger.warning("TAVILY_API_KEY environment variable is not set.")
23 | 
24 | if not os.getenv("OPENAI_API_KEY"):
25 |     logger.warning("OPENAI_API_KEY environment variable is not set.")
26 | 
27 | if not os.getenv("GEMINI_API_KEY"):
28 |     logger.warning("GEMINI_API_KEY environment variable is not set.")
29 | 
30 | from .graph import Graph
31 | 
32 | __all__ = ["Graph"]
33 | 


--------------------------------------------------------------------------------
/backend/classes/__init__.py:
--------------------------------------------------------------------------------
1 | from .state import InputState, ResearchState
2 | 
3 | __all__ = ["InputState", "ResearchState"] 


--------------------------------------------------------------------------------
/backend/classes/state.py:
--------------------------------------------------------------------------------
 1 | from typing import TypedDict, NotRequired, Required, Dict, List, Any
 2 | from backend.services.websocket_manager import WebSocketManager
 3 | 
 4 | #Define the input state
 5 | class InputState(TypedDict, total=False):
 6 |     company: Required[str]
 7 |     company_url: NotRequired[str]
 8 |     hq_location: NotRequired[str]
 9 |     industry: NotRequired[str]
10 |     websocket_manager: NotRequired[WebSocketManager]
11 |     job_id: NotRequired[str]
12 | 
13 | class ResearchState(InputState):
14 |     site_scrape: Dict[str, Any]
15 |     messages: List[Any]
16 |     financial_data: Dict[str, Any]
17 |     news_data: Dict[str, Any]
18 |     industry_data: Dict[str, Any]
19 |     company_data: Dict[str, Any]
20 |     curated_financial_data: Dict[str, Any]
21 |     curated_news_data: Dict[str, Any]
22 |     curated_industry_data: Dict[str, Any]
23 |     curated_company_data: Dict[str, Any]
24 |     financial_briefing: str
25 |     news_briefing: str
26 |     industry_briefing: str
27 |     company_briefing: str
28 |     references: List[str]
29 |     briefings: Dict[str, Any]
30 |     report: str


--------------------------------------------------------------------------------
/backend/graph.py:
--------------------------------------------------------------------------------
  1 | import logging
  2 | from typing import Any, AsyncIterator, Dict
  3 | 
  4 | from langchain_core.messages import SystemMessage
  5 | from langgraph.graph import StateGraph
  6 | 
  7 | from .classes.state import InputState
  8 | from .nodes import GroundingNode
  9 | from .nodes.briefing import Briefing
 10 | from .nodes.collector import Collector
 11 | from .nodes.curator import Curator
 12 | from .nodes.editor import Editor
 13 | from .nodes.enricher import Enricher
 14 | from .nodes.researchers import (
 15 |     CompanyAnalyzer,
 16 |     FinancialAnalyst,
 17 |     IndustryAnalyzer,
 18 |     NewsScanner,
 19 | )
 20 | 
 21 | logger = logging.getLogger(__name__)
 22 | 
 23 | class Graph:
 24 |     def __init__(self, company=None, url=None, hq_location=None, industry=None,
 25 |                  websocket_manager=None, job_id=None):
 26 |         self.websocket_manager = websocket_manager
 27 |         self.job_id = job_id
 28 |         
 29 |         # Initialize InputState
 30 |         self.input_state = InputState(
 31 |             company=company,
 32 |             company_url=url,
 33 |             hq_location=hq_location,
 34 |             industry=industry,
 35 |             websocket_manager=websocket_manager,
 36 |             job_id=job_id,
 37 |             messages=[
 38 |                 SystemMessage(content="Expert researcher starting investigation")
 39 |             ]
 40 |         )
 41 | 
 42 |         # Initialize nodes with WebSocket manager and job ID
 43 |         self._init_nodes()
 44 |         self._build_workflow()
 45 | 
 46 |     def _init_nodes(self):
 47 |         """Initialize all workflow nodes"""
 48 |         self.ground = GroundingNode()
 49 |         self.financial_analyst = FinancialAnalyst()
 50 |         self.news_scanner = NewsScanner()
 51 |         self.industry_analyst = IndustryAnalyzer()
 52 |         self.company_analyst = CompanyAnalyzer()
 53 |         self.collector = Collector()
 54 |         self.curator = Curator()
 55 |         self.enricher = Enricher()
 56 |         self.briefing = Briefing()
 57 |         self.editor = Editor()
 58 | 
 59 |     def _build_workflow(self):
 60 |         """Configure the state graph workflow"""
 61 |         self.workflow = StateGraph(InputState)
 62 |         
 63 |         # Add nodes with their respective processing functions
 64 |         self.workflow.add_node("grounding", self.ground.run)
 65 |         self.workflow.add_node("financial_analyst", self.financial_analyst.run)
 66 |         self.workflow.add_node("news_scanner", self.news_scanner.run)
 67 |         self.workflow.add_node("industry_analyst", self.industry_analyst.run)
 68 |         self.workflow.add_node("company_analyst", self.company_analyst.run)
 69 |         self.workflow.add_node("collector", self.collector.run)
 70 |         self.workflow.add_node("curator", self.curator.run)
 71 |         self.workflow.add_node("enricher", self.enricher.run)
 72 |         self.workflow.add_node("briefing", self.briefing.run)
 73 |         self.workflow.add_node("editor", self.editor.run)
 74 | 
 75 |         # Configure workflow edges
 76 |         self.workflow.set_entry_point("grounding")
 77 |         self.workflow.set_finish_point("editor")
 78 |         
 79 |         research_nodes = [
 80 |             "financial_analyst", 
 81 |             "news_scanner",
 82 |             "industry_analyst", 
 83 |             "company_analyst"
 84 |         ]
 85 | 
 86 |         # Connect grounding to all research nodes
 87 |         for node in research_nodes:
 88 |             self.workflow.add_edge("grounding", node)
 89 |             self.workflow.add_edge(node, "collector")
 90 | 
 91 |         # Connect remaining nodes
 92 |         self.workflow.add_edge("collector", "curator")
 93 |         self.workflow.add_edge("curator", "enricher")
 94 |         self.workflow.add_edge("enricher", "briefing")
 95 |         self.workflow.add_edge("briefing", "editor")
 96 | 
 97 |     async def run(self, thread: Dict[str, Any]) -> AsyncIterator[Dict[str, Any]]:
 98 |         """Execute the research workflow"""
 99 |         compiled_graph = self.workflow.compile()
100 |         
101 |         async for state in compiled_graph.astream(
102 |             self.input_state,
103 |             thread
104 |         ):
105 |             if self.websocket_manager and self.job_id:
106 |                 await self._handle_ws_update(state)
107 |             yield state
108 | 
109 |     async def _handle_ws_update(self, state: Dict[str, Any]):
110 |         """Handle WebSocket updates based on state changes"""
111 |         update = {
112 |             "type": "state_update",
113 |             "data": {
114 |                 "current_node": state.get("current_node", "unknown"),
115 |                 "progress": state.get("progress", 0),
116 |                 "keys": list(state.keys())
117 |             }
118 |         }
119 |         await self.websocket_manager.broadcast_to_job(
120 |             self.job_id,
121 |             update
122 |         )
123 |     
124 |     def compile(self):
125 |         graph = self.workflow.compile()
126 |         return graph


--------------------------------------------------------------------------------
/backend/nodes/__init__.py:
--------------------------------------------------------------------------------
1 | from .grounding import GroundingNode
2 | 
3 | __all__ = ["GroundingNode"] 


--------------------------------------------------------------------------------
/backend/nodes/collector.py:
--------------------------------------------------------------------------------
 1 | from langchain_core.messages import AIMessage
 2 | 
 3 | from ..classes import ResearchState
 4 | 
 5 | 
 6 | class Collector:
 7 |     """Collects and organizes all research data before curation."""
 8 | 
 9 |     async def collect(self, state: ResearchState) -> ResearchState:
10 |         """Collect and verify all research data is present."""
11 |         company = state.get('company', 'Unknown Company')
12 |         msg = [f"📦 Collecting research data for {company}:"]
13 | 
14 |         if websocket_manager := state.get('websocket_manager'):
15 |             if job_id := state.get('job_id'):
16 |                 await websocket_manager.send_status_update(
17 |                     job_id=job_id,
18 |                     status="processing",
19 |                     message=f"Collecting research data for {company}",
20 |                     result={"step": "Collecting"}
21 |                 )
22 |         
23 |         # Check each type of research data
24 |         research_types = {
25 |             'financial_data': '💰 Financial',
26 |             'news_data': '📰 News',
27 |             'industry_data': '🏭 Industry',
28 |             'company_data': '🏢 Company'
29 |         }
30 |         
31 |         for data_field, label in research_types.items():
32 |             data = state.get(data_field, {})
33 |             if data:
34 |                 msg.append(f"• {label}: {len(data)} documents collected")
35 |             else:
36 |                 msg.append(f"• {label}: No data found")
37 |         
38 |         # Update state with collection message
39 |         messages = state.get('messages', [])
40 |         messages.append(AIMessage(content="\n".join(msg)))
41 |         state['messages'] = messages
42 |         
43 |         return state
44 | 
45 |     async def run(self, state: ResearchState) -> ResearchState:
46 |         return await self.collect(state)


--------------------------------------------------------------------------------
/backend/nodes/curator.py:
--------------------------------------------------------------------------------
  1 | import logging
  2 | from typing import Dict
  3 | from urllib.parse import urljoin, urlparse
  4 | 
  5 | from langchain_core.messages import AIMessage
  6 | 
  7 | from ..classes import ResearchState
  8 | from ..utils.references import process_references_from_search_results
  9 | 
 10 | logger = logging.getLogger(__name__)
 11 | 
 12 | class Curator:
 13 |     def __init__(self) -> None:
 14 |         self.relevance_threshold = 0.4  # Fixed initialization of class attribute
 15 |         logger.info("Curator initialized with relevance threshold: {relevance_threshhold}")
 16 | 
 17 |     async def evaluate_documents(self, state: ResearchState, docs: list, context: Dict[str, str]) -> list:
 18 |         """Evaluate documents based on Tavily's scoring."""
 19 |         if websocket_manager := state.get('websocket_manager'):
 20 |             if job_id := state.get('job_id'):
 21 |                 logger.info(f"Sending initial curation status update for job {job_id}")
 22 |                 await websocket_manager.send_status_update(
 23 |                     job_id=job_id,
 24 |                     status="processing",
 25 |                     message="Evaluating documents",
 26 |                     result={
 27 |                         "step": "Curation",
 28 |                     }
 29 |                 )
 30 |         
 31 |         if not docs:
 32 |             return []
 33 | 
 34 |         logger.info(f"Evaluating {len(docs)} documents")
 35 |         
 36 |         evaluated_docs = []
 37 |         try:
 38 |             # Evaluate each document using Tavily's score
 39 |             for doc in docs:
 40 |                 try:
 41 |                     # Ensure score is a valid float
 42 |                     tavily_score = float(doc.get('score', 0))  # Default to 0 if no score
 43 |                     
 44 |                     # Keep documents with good Tavily score
 45 |                     if tavily_score >= self.relevance_threshold:
 46 |                         logger.info(f"Document passed threshold with score {tavily_score:.4f} for '{doc.get('title', 'No title')}'")
 47 |                         
 48 |                         evaluated_doc = {
 49 |                             **doc,
 50 |                             "evaluation": {
 51 |                                 "overall_score": tavily_score,  # Store as float
 52 |                                 "query": doc.get('query', '')
 53 |                             }
 54 |                         }
 55 |                         evaluated_docs.append(evaluated_doc)
 56 |                         
 57 |                         # Send incremental update for kept document
 58 |                         if websocket_manager := state.get('websocket_manager'):
 59 |                             if job_id := state.get('job_id'):
 60 |                                 await websocket_manager.send_status_update(
 61 |                                     job_id=job_id,
 62 |                                     status="document_kept",
 63 |                                     message=f"Kept document: {doc.get('title', 'No title')}",
 64 |                                     result={
 65 |                                         "step": "Curation",
 66 |                                         "doc_type": doc.get('doc_type', 'unknown'),
 67 |                                         "title": doc.get('title', 'No title'),
 68 |                                         "score": tavily_score
 69 |                                     }
 70 |                                 )
 71 |                     else:
 72 |                         logger.info(f"Document below threshold with score {tavily_score:.4f} for '{doc.get('title', 'No title')}'")
 73 |                 except (ValueError, TypeError) as e:
 74 |                     logger.warning(f"Error processing score for document: {e}")
 75 |                     continue
 76 |                     
 77 |         except Exception as e:
 78 |             logger.error(f"Error during document evaluation: {e}")
 79 |             return []
 80 | 
 81 |         # Sort evaluated docs by score before returning
 82 |         evaluated_docs.sort(key=lambda x: float(x['evaluation']['overall_score']), reverse=True)
 83 |         logger.info(f"Returning {len(evaluated_docs)} evaluated documents")
 84 |         
 85 |         return evaluated_docs
 86 | 
 87 |     async def curate_data(self, state: ResearchState) -> ResearchState:
 88 |         """Curate all collected data based on Tavily scores."""
 89 |         company = state.get('company', 'Unknown Company')
 90 |         logger.info(f"Starting curation for company: {company}")
 91 |         
 92 |         # Send initial status update through WebSocket
 93 |         if websocket_manager := state.get('websocket_manager'):
 94 |             if job_id := state.get('job_id'):
 95 |                 logger.info(f"Sending initial curation status update for job {job_id}")
 96 |                 await websocket_manager.send_status_update(
 97 |                     job_id=job_id,
 98 |                     status="processing",
 99 |                     message=f"Starting document curation for {company}",
100 |                     result={
101 |                         "step": "Curation",
102 |                         "doc_counts": {
103 |                             "company": {"initial": 0, "kept": 0},
104 |                             "industry": {"initial": 0, "kept": 0},
105 |                             "financial": {"initial": 0, "kept": 0},
106 |                             "news": {"initial": 0, "kept": 0}
107 |                         }
108 |                     }
109 |                 )
110 | 
111 |         industry = state.get('industry', 'Unknown')
112 |         context = {
113 |             "company": company,
114 |             "industry": industry,
115 |             "hq_location": state.get('hq_location', 'Unknown')
116 |         }
117 | 
118 |         msg = [f"🔍 Curating research data for {company}"]
119 |         
120 |         data_types = {
121 |             'financial_data': ('💰 Financial', 'financial'),
122 |             'news_data': ('📰 News', 'news'),
123 |             'industry_data': ('🏭 Industry', 'industry'),
124 |             'company_data': ('🏢 Company', 'company')
125 |         }
126 | 
127 |         # Create all evaluation tasks upfront
128 |         curation_tasks = []
129 |         for data_field, (emoji, doc_type) in data_types.items():
130 |             data = state.get(data_field, {})
131 |             if not data:
132 |                 continue
133 | 
134 |             # Filter and normalize URLs
135 |             unique_docs = {}
136 |             for url, doc in data.items():
137 |                 try:
138 |                     parsed = urlparse(url)
139 |                     if not parsed.scheme:
140 |                         url = urljoin('https://', url)
141 |                     clean_url = parsed._replace(query='', fragment='').geturl()
142 |                     if clean_url not in unique_docs:
143 |                         doc['url'] = clean_url
144 |                         doc['doc_type'] = doc_type
145 |                         unique_docs[clean_url] = doc
146 |                 except Exception:
147 |                     continue
148 | 
149 |             docs = list(unique_docs.values())
150 |             curation_tasks.append((data_field, emoji, doc_type, unique_docs.keys(), docs))
151 | 
152 |         # Track document counts for each type
153 |         doc_counts = {}
154 | 
155 |         for data_field, emoji, doc_type, urls, docs in curation_tasks:
156 |             msg.append(f"\n{emoji}: Found {len(docs)} documents")
157 | 
158 |             if websocket_manager := state.get('websocket_manager'):
159 |                 if job_id := state.get('job_id'):
160 |                     await websocket_manager.send_status_update(
161 |                         job_id=job_id,
162 |                         status="category_start",
163 |                         message=f"Processing {doc_type} documents",
164 |                         result={
165 |                             "step": "Curation",
166 |                             "doc_type": doc_type,
167 |                             "initial_count": len(docs)
168 |                         }
169 |                     )
170 | 
171 |             evaluated_docs = await self.evaluate_documents(state, docs, context)
172 | 
173 |             if not evaluated_docs:
174 |                 msg.append("  ⚠️ No relevant documents found")
175 |                 doc_counts[data_field] = {"initial": len(docs), "kept": 0}
176 |                 continue
177 | 
178 |             # Filter and sort by Tavily score
179 |             relevant_docs = {url: doc for url, doc in zip(urls, evaluated_docs)}
180 |             sorted_items = sorted(relevant_docs.items(), key=lambda item: item[1]['evaluation']['overall_score'], reverse=True)
181 |             
182 |             # Limit to top 30 documents per category
183 |             if len(sorted_items) > 30:
184 |                 sorted_items = sorted_items[:30]
185 |             relevant_docs = dict(sorted_items)
186 | 
187 |             doc_counts[data_field] = {
188 |                 "initial": len(docs),
189 |                 "kept": len(relevant_docs)
190 |             }
191 | 
192 |             if relevant_docs:
193 |                 msg.append(f"  ✓ Kept {len(relevant_docs)} relevant documents")
194 |                 logger.info(f"Kept {len(relevant_docs)} documents for {doc_type} with scores above threshold")
195 |             else:
196 |                 msg.append("  ⚠️ No documents met relevance threshold")
197 |                 logger.info(f"No documents met relevance threshold for {doc_type}")
198 | 
199 |             # Store curated documents in state
200 |             state[f'curated_{data_field}'] = relevant_docs
201 |             
202 |         # Process references using the references module
203 |         top_reference_urls, reference_titles, reference_info = process_references_from_search_results(state)
204 |         logger.info(f"Selected top {len(top_reference_urls)} references for the report")
205 |         
206 |         # Update state with references and their titles
207 |         messages = state.get('messages', [])
208 |         messages.append(AIMessage(content="\n".join(msg)))
209 |         state['messages'] = messages
210 |         state['references'] = top_reference_urls
211 |         state['reference_titles'] = reference_titles
212 |         state['reference_info'] = reference_info
213 | 
214 |         # Send final curation stats
215 |         if websocket_manager := state.get('websocket_manager'):
216 |             if job_id := state.get('job_id'):
217 |                 await websocket_manager.send_status_update(
218 |                     job_id=job_id,
219 |                     status="curation_complete",
220 |                     message="Document curation complete",
221 |                     result={
222 |                         "step": "Curation",
223 |                         "doc_counts": {
224 |                             "company": doc_counts.get('company_data', {"initial": 0, "kept": 0}),
225 |                             "industry": doc_counts.get('industry_data', {"initial": 0, "kept": 0}),
226 |                             "financial": doc_counts.get('financial_data', {"initial": 0, "kept": 0}),
227 |                             "news": doc_counts.get('news_data', {"initial": 0, "kept": 0})
228 |                         }
229 |                     }
230 |                 )
231 | 
232 |         return state
233 | 
234 |     async def run(self, state: ResearchState) -> ResearchState:
235 |         return await self.curate_data(state)
236 | 


--------------------------------------------------------------------------------
/backend/nodes/grounding.py:
--------------------------------------------------------------------------------
  1 | import logging
  2 | import os
  3 | 
  4 | from langchain_core.messages import AIMessage
  5 | from tavily import AsyncTavilyClient
  6 | 
  7 | from ..classes import InputState, ResearchState
  8 | 
  9 | logger = logging.getLogger(__name__)
 10 | 
 11 | class GroundingNode:
 12 |     """Gathers initial grounding data about the company."""
 13 |     
 14 |     def __init__(self) -> None:
 15 |         self.tavily_client = AsyncTavilyClient(api_key=os.getenv("TAVILY_API_KEY"))
 16 | 
 17 |     async def initial_search(self, state: InputState) -> ResearchState:
 18 |         # Add debug logging at the start to check websocket manager
 19 |         if websocket_manager := state.get('websocket_manager'):
 20 |             logger.info("Websocket manager found in state")
 21 |         else:
 22 |             logger.warning("No websocket manager found in state")
 23 |         
 24 |         company = state.get('company', 'Unknown Company')
 25 |         msg = f"🎯 Initiating research for {company}...\n"
 26 |         
 27 |         if websocket_manager := state.get('websocket_manager'):
 28 |             if job_id := state.get('job_id'):
 29 |                 await websocket_manager.send_status_update(
 30 |                     job_id=job_id,
 31 |                     status="processing",
 32 |                     message=f"🎯 Initiating research for {company}",
 33 |                     result={"step": "Initializing"}
 34 |                 )
 35 | 
 36 |         site_scrape = {}
 37 | 
 38 |         # Only attempt extraction if we have a URL
 39 |         if url := state.get('company_url'):
 40 |             msg += f"\n🌐 Analyzing company website: {url}"
 41 |             logger.info(f"Starting website analysis for {url}")
 42 |             
 43 |             # Send initial briefing status
 44 |             if websocket_manager := state.get('websocket_manager'):
 45 |                 if job_id := state.get('job_id'):
 46 |                     await websocket_manager.send_status_update(
 47 |                         job_id=job_id,
 48 |                         status="processing",
 49 |                         message="Analyzing company website",
 50 |                         result={"step": "Initial Site Scrape"}
 51 |                     )
 52 | 
 53 |             try:
 54 |                 logger.info("Initiating Tavily extraction")
 55 |                 site_extraction = await self.tavily_client.extract(url, extract_depth="basic")
 56 |                 
 57 |                 raw_contents = []
 58 |                 for item in site_extraction.get("results", []):
 59 |                     if content := item.get("raw_content"):
 60 |                         raw_contents.append(content)
 61 |                 
 62 |                 if raw_contents:
 63 |                     site_scrape = {
 64 |                         'title': company,
 65 |                         'raw_content': "\n\n".join(raw_contents)
 66 |                     }
 67 |                     logger.info(f"Successfully extracted {len(raw_contents)} content sections")
 68 |                     msg += "\n✅ Successfully extracted content from website"
 69 |                     if websocket_manager := state.get('websocket_manager'):
 70 |                         if job_id := state.get('job_id'):
 71 |                             await websocket_manager.send_status_update(
 72 |                                 job_id=job_id,
 73 |                                 status="processing",
 74 |                                 message="Successfully extracted content from website",
 75 |                                 result={"step": "Initial Site Scrape"}
 76 |                             )
 77 |                 else:
 78 |                     logger.warning("No content found in extraction results")
 79 |                     msg += "\n⚠️ No content found in website extraction"
 80 |                     if websocket_manager := state.get('websocket_manager'):
 81 |                         if job_id := state.get('job_id'):
 82 |                             await websocket_manager.send_status_update(
 83 |                                 job_id=job_id,
 84 |                                 status="processing",
 85 |                                 message="⚠️ No content found in provided URL",
 86 |                                 result={"step": "Initial Site Scrape"}
 87 |                             )
 88 |             except Exception as e:
 89 |                 error_str = str(e)
 90 |                 logger.error(f"Website extraction error: {error_str}", exc_info=True)
 91 |                 error_msg = f"⚠️ Error extracting website content: {error_str}"
 92 |                 print(error_msg)
 93 |                 msg += f"\n{error_msg}"
 94 |                 if websocket_manager := state.get('websocket_manager'):
 95 |                     if job_id := state.get('job_id'):
 96 |                         await websocket_manager.send_status_update(
 97 |                             job_id=job_id,
 98 |                             status="website_error",
 99 |                             message=error_msg,
100 |                             result={
101 |                                 "step": "Initial Site Scrape", 
102 |                                 "error": error_str,
103 |                                 "continue_research": True  # Continue with research even if website extraction fails
104 |                             }
105 |                         )
106 |         else:
107 |             msg += "\n⏩ No company URL provided, proceeding directly to research phase"
108 |             if websocket_manager := state.get('websocket_manager'):
109 |                 if job_id := state.get('job_id'):
110 |                     await websocket_manager.send_status_update(
111 |                         job_id=job_id,
112 |                         status="processing",
113 |                         message="No company URL provided, proceeding directly to research phase",
114 |                         result={"step": "Initializing"}
115 |                     )
116 |         # Add context about what information we have
117 |         context_data = {}
118 |         if hq := state.get('hq_location'):
119 |             msg += f"\n📍 Company HQ: {hq}"
120 |             context_data["hq_location"] = hq
121 |         if industry := state.get('industry'):
122 |             msg += f"\n🏭 Industry: {industry}"
123 |             context_data["industry"] = industry
124 |         
125 |         # Initialize ResearchState with input information
126 |         research_state = {
127 |             # Copy input fields
128 |             "company": state.get('company'),
129 |             "company_url": state.get('company_url'),
130 |             "hq_location": state.get('hq_location'),
131 |             "industry": state.get('industry'),
132 |             # Initialize research fields
133 |             "messages": [AIMessage(content=msg)],
134 |             "site_scrape": site_scrape,
135 |             # Pass through websocket info
136 |             "websocket_manager": state.get('websocket_manager'),
137 |             "job_id": state.get('job_id')
138 |         }
139 | 
140 |         # If there was an error in the initial extraction, store it in the state
141 |         if "⚠️ Error extracting website content:" in msg:
142 |             research_state["error"] = error_str
143 | 
144 |         return research_state
145 | 
146 |     async def run(self, state: InputState) -> ResearchState:
147 |         return await self.initial_search(state)
148 | 


--------------------------------------------------------------------------------
/backend/nodes/researchers/__init__.py:
--------------------------------------------------------------------------------
1 | from .financial import FinancialAnalyst
2 | from .news import NewsScanner
3 | from .industry import IndustryAnalyzer
4 | from .company import CompanyAnalyzer
5 | 
6 | __all__ = ["FinancialAnalyst", "NewsScanner", "IndustryAnalyzer", "CompanyAnalyzer"] 


--------------------------------------------------------------------------------
/backend/nodes/researchers/company.py:
--------------------------------------------------------------------------------
 1 | from typing import Any, Dict
 2 | 
 3 | from langchain_core.messages import AIMessage
 4 | 
 5 | from ...classes import ResearchState
 6 | from .base import BaseResearcher
 7 | 
 8 | 
 9 | class CompanyAnalyzer(BaseResearcher):
10 |     def __init__(self) -> None:
11 |         super().__init__()
12 |         self.analyst_type = "company_analyzer"
13 | 
14 |     async def analyze(self, state: ResearchState) -> Dict[str, Any]:
15 |         company = state.get('company', 'Unknown Company')
16 |         msg = [f"🏢 Company Analyzer analyzing {company}"]
17 |         
18 |         # Generate search queries using LLM
19 |         queries = await self.generate_queries(state, """
20 |         Generate queries on the company fundamentals of {company} in the {industry} industry such as:
21 |         - Core products and services
22 |         - Company history and milestones
23 |         - Leadership team
24 |         - Business model and strategy
25 |         """)
26 | 
27 |         # Add message to show subqueries with emojis
28 |         subqueries_msg = "🔍 Subqueries for company analysis:\n" + "\n".join([f"• {query}" for query in queries])
29 |         messages = state.get('messages', [])
30 |         messages.append(AIMessage(content=subqueries_msg))
31 |         state['messages'] = messages
32 | 
33 |     # Send queries through WebSocket
34 |         if websocket_manager := state.get('websocket_manager'):
35 |             if job_id := state.get('job_id'):
36 |                 await websocket_manager.send_status_update(
37 |                     job_id=job_id,
38 |                     status="processing",
39 |                     message="Company analysis queries generated",
40 |                     result={
41 |                         "step": "Company Analyst",
42 |                         "analyst_type": "Company Analyst",
43 |                         "queries": queries
44 |                     }
45 |                 )
46 |         
47 |         company_data = {}
48 |         
49 |         # If we have site_scrape data, include it first
50 |         if site_scrape := state.get('site_scrape'):
51 |             msg.append("\n📊 Including site scrape data in company analysis...")
52 |             company_url = state.get('company_url', 'company-website')
53 |             company_data[company_url] = {
54 |                 'title': state.get('company', 'Unknown Company'),
55 |                 'raw_content': site_scrape,
56 |                 'query': f'Company overview and information about {company}'  # Add a default query for site scrape
57 |             }
58 |         
59 |         # Perform additional research with comprehensive search
60 |         try:
61 |             # Store documents with their respective queries
62 |             for query in queries:
63 |                 documents = await self.search_documents(state, [query])
64 |                 if documents:  # Only process if we got results
65 |                     for url, doc in documents.items():
66 |                         doc['query'] = query  # Associate each document with its query
67 |                         company_data[url] = doc
68 |             
69 |             msg.append(f"\n✓ Found {len(company_data)} documents")
70 |             if websocket_manager := state.get('websocket_manager'):
71 |                 if job_id := state.get('job_id'):
72 |                     await websocket_manager.send_status_update(
73 |                         job_id=job_id,
74 |                         status="processing",
75 |                         message=f"Used Tavily Search to find {len(company_data)} documents",
76 |                         result={
77 |                             "step": "Searching",
78 |                             "analyst_type": "Company Analyst",
79 |                             "queries": queries
80 |                         }
81 |                     )
82 |         except Exception as e:
83 |             msg.append(f"\n⚠️ Error during research: {str(e)}")
84 |         
85 |         # Update state with our findings
86 |         messages = state.get('messages', [])
87 |         messages.append(AIMessage(content="\n".join(msg)))
88 |         state['messages'] = messages
89 |         state['company_data'] = company_data
90 |         
91 |         return {
92 |             'message': msg,
93 |             'company_data': company_data
94 |         }
95 | 
96 |     async def run(self, state: ResearchState) -> Dict[str, Any]:
97 |         return await self.analyze(state) 


--------------------------------------------------------------------------------
/backend/nodes/researchers/financial.py:
--------------------------------------------------------------------------------
  1 | import logging
  2 | from typing import Any, Dict
  3 | 
  4 | from langchain_core.messages import AIMessage
  5 | 
  6 | from ...classes import ResearchState
  7 | from .base import BaseResearcher
  8 | 
  9 | logger = logging.getLogger(__name__)
 10 | 
 11 | class FinancialAnalyst(BaseResearcher):
 12 |     def __init__(self) -> None:
 13 |         super().__init__()
 14 |         self.analyst_type = "financial_analyzer"
 15 | 
 16 |     async def analyze(self, state: ResearchState) -> Dict[str, Any]:
 17 |         company = state.get('company', 'Unknown Company')
 18 |         websocket_manager = state.get('websocket_manager')
 19 |         job_id = state.get('job_id')
 20 |         
 21 |         try:
 22 |             # Generate search queries
 23 |             queries = await self.generate_queries(
 24 |                 state,
 25 |                 """
 26 |                  Generate queries on the financial analysis of {company} in the {industry} industry such as:
 27 |         - Fundraising history and valuation
 28 |         - Financial statements and key metrics
 29 |         - Revenue and profit sources
 30 |         """)
 31 |             
 32 |             # Add message to show subqueries with emojis
 33 |             subqueries_msg = "🔍 Subqueries for financial analysis:\n" + "\n".join([f"• {query}" for query in queries])
 34 |             messages = state.get('messages', [])
 35 |             messages.append(AIMessage(content=subqueries_msg))
 36 |             state['messages'] = messages
 37 | 
 38 |             # Send queries through WebSocket
 39 |             if websocket_manager:
 40 |                 if job_id:
 41 |                     await websocket_manager.send_status_update(
 42 |                         job_id=job_id,
 43 |                         status="processing",
 44 |                         message="Financial analysis queries generated",
 45 |                         result={
 46 |                             "step": "Financial Analyst",
 47 |                             "analyst_type": "Financial Analyst",
 48 |                             "queries": queries
 49 |                         }
 50 |                     )
 51 |             
 52 |             # Process site scrape data
 53 |             financial_data = {}
 54 |             if site_scrape := state.get('site_scrape'):
 55 |                 company_url = state.get('company_url', 'company-website')
 56 |                 financial_data[company_url] = {
 57 |                     'title': state.get('company', 'Unknown Company'),
 58 |                     'raw_content': site_scrape,
 59 |                     'query': f'Financial information on {company}'
 60 |                 }
 61 | 
 62 |             for query in queries:
 63 |                 documents = await self.search_documents(state, [query])
 64 |                 for url, doc in documents.items():
 65 |                     doc['query'] = query
 66 |                     financial_data[url] = doc
 67 | 
 68 |             # Final status update
 69 |             completion_msg = f"Completed analysis with {len(financial_data)} documents"
 70 |             
 71 |             if websocket_manager:
 72 |                 if job_id:
 73 |                     await websocket_manager.send_status_update(
 74 |                         job_id=job_id,
 75 |                         status="processing",
 76 |                         message=f"Used Tavily Search to find {len(financial_data)} documents",
 77 |                         result={
 78 |                             "step": "Searching",
 79 |                             "analyst_type": "Financial Analyst",
 80 |                             "queries": queries
 81 |                         }
 82 |                     )
 83 |             
 84 |             # Update state
 85 |             messages.append(AIMessage(content=completion_msg))
 86 |             state['messages'] = messages
 87 |             state['financial_data'] = financial_data
 88 | 
 89 |             # Send completion status with final queries
 90 |             if websocket_manager and job_id:
 91 |                 await websocket_manager.send_status_update(
 92 |                     job_id=job_id,
 93 |                     status="processing",
 94 |                     message=completion_msg,
 95 |                     result={
 96 |                         "analyst_type": "Financial Analyst",
 97 |                         "queries": queries,
 98 |                         "documents_found": len(financial_data)
 99 |                     }
100 |                 )
101 | 
102 |             return {
103 |                 'message': completion_msg,
104 |                 'financial_data': financial_data,
105 |                 'analyst_type': self.analyst_type,
106 |                 'queries': queries
107 |             }
108 | 
109 |         except Exception as e:
110 |             error_msg = f"Financial analysis failed: {str(e)}"
111 |             # Send error status
112 |             if websocket_manager:
113 |                 if job_id:
114 |                     await websocket_manager.send_status_update(
115 |                         job_id=job_id,
116 |                         status="error",
117 |                         message=error_msg,
118 |                         result={
119 |                             "analyst_type": "Financial Analyst",
120 |                             "error": str(e)
121 |                         }
122 |                     )
123 |             raise  # Re-raise to maintain error flow
124 | 
125 |     async def run(self, state: ResearchState) -> Dict[str, Any]:
126 |         return await self.analyze(state)


--------------------------------------------------------------------------------
/backend/nodes/researchers/industry.py:
--------------------------------------------------------------------------------
 1 | from typing import Any, Dict
 2 | 
 3 | from langchain_core.messages import AIMessage
 4 | 
 5 | from ...classes import ResearchState
 6 | from .base import BaseResearcher
 7 | 
 8 | 
 9 | class IndustryAnalyzer(BaseResearcher):
10 |     def __init__(self) -> None:
11 |         super().__init__()
12 |         self.analyst_type = "industry_analyzer"
13 | 
14 |     async def analyze(self, state: ResearchState) -> Dict[str, Any]:
15 |         company = state.get('company', 'Unknown Company')
16 |         industry = state.get('industry', 'Unknown Industry')
17 |         msg = [f"🏭 Industry Analyzer analyzing {company} in {industry}"]
18 |         
19 |         # Generate search queries using LLM
20 |         queries = await self.generate_queries(state, """
21 |         Generate queries on the industry analysis of {company} in the {industry} industry such as:
22 |         - Market position
23 |         - Competitors
24 |         - {industry} industry trends and challenges
25 |         - Market size and growth
26 |         """)
27 | 
28 |         subqueries_msg = "🔍 Subqueries for industry analysis:\n" + "\n".join([f"• {query}" for query in queries])
29 |         messages = state.get('messages', [])
30 |         messages.append(AIMessage(content=subqueries_msg))
31 |         state['messages'] = messages
32 | 
33 |         # Send queries through WebSocket
34 |         if websocket_manager := state.get('websocket_manager'):
35 |             if job_id := state.get('job_id'):
36 |                 await websocket_manager.send_status_update(
37 |                     job_id=job_id,
38 |                     status="processing",
39 |                     message="Industry analysis queries generated",
40 |                     result={
41 |                         "step": "Industry Analyst",
42 |                         "analyst_type": "Industry Analyst",
43 |                         "queries": queries
44 |                     }
45 |                 )
46 |         
47 |         industry_data = {}
48 |         
49 |         # If we have site_scrape data, include it first
50 |         if site_scrape := state.get('site_scrape'):
51 |             msg.append("\n📊 Including site scrape data in company analysis...")
52 |             company_url = state.get('company_url', 'company-website')
53 |             industry_data[company_url] = {
54 |                 'title': state.get('company', 'Unknown Company'),
55 |                 'raw_content': site_scrape,
56 |                 'query': f'Industry analysis on {company}'  # Add a default query for site scrape
57 |             }
58 |         
59 |         # Perform additional research with increased search depth
60 |         try:
61 |             # Store documents with their respective queries
62 |             for query in queries:
63 |                 documents = await self.search_documents(state, [query])
64 |                 if documents:  # Only process if we got results
65 |                     for url, doc in documents.items():
66 |                         doc['query'] = query  # Associate each document with its query
67 |                         industry_data[url] = doc
68 |             
69 |             msg.append(f"\n✓ Found {len(industry_data)} documents")
70 |             if websocket_manager := state.get('websocket_manager'):
71 |                 if job_id := state.get('job_id'):
72 |                     await websocket_manager.send_status_update(
73 |                         job_id=job_id,
74 |                         status="processing",
75 |                         message=f"Used Tavily Search to find {len(industry_data)} documents",
76 |                         result={
77 |                             "step": "Searching",
78 |                             "analyst_type": "Industry Analyst",
79 |                             "queries": queries
80 |                         }
81 |                     )
82 |         except Exception as e:
83 |             msg.append(f"\n⚠️ Error during research: {str(e)}")
84 |         
85 |         # Update state with our findings
86 |         messages = state.get('messages', [])
87 |         messages.append(AIMessage(content="\n".join(msg)))
88 |         state['messages'] = messages
89 |         state['industry_data'] = industry_data
90 |         
91 |         return {
92 |             'message': msg,
93 |             'industry_data': industry_data
94 |         }
95 | 
96 |     async def run(self, state: ResearchState) -> Dict[str, Any]:
97 |         return await self.analyze(state) 


--------------------------------------------------------------------------------
/backend/nodes/researchers/news.py:
--------------------------------------------------------------------------------
 1 | from typing import Any, Dict
 2 | 
 3 | from langchain_core.messages import AIMessage
 4 | 
 5 | from ...classes import ResearchState
 6 | from .base import BaseResearcher
 7 | 
 8 | 
 9 | class NewsScanner(BaseResearcher):
10 |     def __init__(self) -> None:
11 |         super().__init__()
12 |         self.analyst_type = "news_analyzer"
13 | 
14 |     async def analyze(self, state: ResearchState) -> Dict[str, Any]:
15 |         company = state.get('company', 'Unknown Company')
16 |         msg = [f"📰 News Scanner analyzing {company}"]
17 |         
18 |         # Generate search queries using LLM
19 |         queries = await self.generate_queries(state, """
20 |         Generate queries on the recent news coverage of {company} such as:
21 |         - Recent company announcements
22 |         - Press releases
23 |         - New partnerships
24 |         """)
25 | 
26 |         subqueries_msg = "🔍 Subqueries for news analysis:\n" + "\n".join([f"• {query}" for query in queries])
27 |         messages = state.get('messages', [])
28 |         messages.append(AIMessage(content=subqueries_msg))
29 |         state['messages'] = messages
30 |         
31 |         news_data = {}
32 |         
33 |         # If we have site_scrape data, include it first
34 |         if site_scrape := state.get('site_scrape'):
35 |             msg.append("\n📊 Including site scrape data in company analysis...")
36 |             company_url = state.get('company_url', 'company-website')
37 |             news_data[company_url] = {
38 |                 'title': state.get('company', 'Unknown Company'),
39 |                 'raw_content': site_scrape,
40 |                 'query': f'News and announcements about {company}'  # Add a default query for site scrape
41 |             }
42 |         
43 |         # Perform additional research with recent time filter
44 |         try:
45 |             # Store documents with their respective queries
46 |             for query in queries:
47 |                 documents = await self.search_documents(state, [query])
48 |                 if documents:  # Only process if we got results
49 |                     for url, doc in documents.items():
50 |                         doc['query'] = query  # Associate each document with its query
51 |                         news_data[url] = doc
52 |             
53 |             msg.append(f"\n✓ Found {len(news_data)} documents")
54 |             if websocket_manager := state.get('websocket_manager'):
55 |                 if job_id := state.get('job_id'):
56 |                     await websocket_manager.send_status_update(
57 |                         job_id=job_id,
58 |                         status="processing",
59 |                         message=f"Used Tavily Search to find {len(news_data)} documents",
60 |                         result={
61 |                             "step": "Searching",
62 |                             "analyst_type": "News Scanner",
63 |                             "queries": queries
64 |                         }
65 |                     )
66 |         except Exception as e:
67 |             msg.append(f"\n⚠️ Error during research: {str(e)}")
68 |         
69 |         # Update state with our findings
70 |         messages = state.get('messages', [])
71 |         messages.append(AIMessage(content="\n".join(msg)))
72 |         state['messages'] = messages
73 |         state['news_data'] = news_data
74 |         
75 |         return {
76 |             'message': msg,
77 |             'news_data': news_data
78 |         }
79 | 
80 |     async def run(self, state: ResearchState) -> Dict[str, Any]:
81 |         return await self.analyze(state) 


--------------------------------------------------------------------------------
/backend/services/mongodb.py:
--------------------------------------------------------------------------------
 1 | from datetime import datetime
 2 | from typing import Any, Dict, Optional
 3 | 
 4 | import certifi
 5 | from pymongo import MongoClient
 6 | 
 7 | 
 8 | class MongoDBService:
 9 |     def __init__(self, uri: str):
10 |         # Use certifi for SSL certificate verification with updated options
11 |         self.client = MongoClient(
12 |             uri,
13 |             tlsCAFile=certifi.where(),
14 |             retryWrites=True,
15 |             w='majority'
16 |         )
17 |         self.db = self.client.get_database('tavily_research')
18 |         self.jobs = self.db.jobs
19 |         self.reports = self.db.reports
20 | 
21 |     def create_job(self, job_id: str, inputs: Dict[str, Any]) -> None:
22 |         """Create a new research job record."""
23 |         self.jobs.insert_one({
24 |             "job_id": job_id,
25 |             "inputs": inputs,
26 |             "status": "pending",
27 |             "created_at": datetime.utcnow(),
28 |             "updated_at": datetime.utcnow()
29 |         })
30 | 
31 |     def update_job(self, job_id: str, 
32 |                   status: str = None,
33 |                   result: Dict[str, Any] = None,
34 |                   error: str = None) -> None:
35 |         """Update a research job with results or status."""
36 |         update_data = {"updated_at": datetime.utcnow()}
37 |         if status:
38 |             update_data["status"] = status
39 |         if result:
40 |             update_data["result"] = result
41 |         if error:
42 |             update_data["error"] = error
43 | 
44 |         self.jobs.update_one(
45 |             {"job_id": job_id},
46 |             {"$set": update_data}
47 |         )
48 | 
49 |     def get_job(self, job_id: str) -> Optional[Dict[str, Any]]:
50 |         """Retrieve a job by ID."""
51 |         return self.jobs.find_one({"job_id": job_id})
52 | 
53 |     def store_report(self, job_id: str, report_data: Dict[str, Any]) -> None:
54 |         """Store the finalized research report."""
55 |         self.reports.insert_one({
56 |             "job_id": job_id,
57 |             "report_content": report_data.get("report", ""),
58 |             "references": report_data.get("references", []),
59 |             "sections": report_data.get("sections_completed", []),
60 |             "analyst_queries": report_data.get("analyst_queries", {}),
61 |             "created_at": datetime.utcnow()
62 |         })
63 | 
64 |     def get_report(self, job_id: str) -> Optional[Dict[str, Any]]:
65 |         """Retrieve a report by job ID."""
66 |         return self.reports.find_one({"job_id": job_id}) 


--------------------------------------------------------------------------------
/backend/services/pdf_service.py:
--------------------------------------------------------------------------------
 1 | import io
 2 | import logging
 3 | import os
 4 | import re
 5 | 
 6 | from backend.utils.utils import generate_pdf_from_md
 7 | 
 8 | logger = logging.getLogger(__name__)
 9 | 
10 | class PDFService:
11 |     def __init__(self, config):
12 |         self.output_dir = config.get("pdf_output_dir", "pdfs")
13 |         # Create output directory if it doesn't exist
14 |         os.makedirs(self.output_dir, exist_ok=True)
15 |         
16 |     def _sanitize_company_name(self, company_name):
17 |         """Sanitize company name for use in filenames."""
18 |         # Replace spaces with underscores and remove special characters
19 |         sanitized = re.sub(r'[^\w\s-]', '', company_name).strip().replace(' ', '_')
20 |         return sanitized.lower()
21 |     
22 |     def _generate_pdf_filename(self, company_name):
23 |         """Generate a PDF filename based on the company name."""
24 |         sanitized_name = self._sanitize_company_name(company_name)
25 |         return f"{sanitized_name}_report.pdf"
26 |     
27 |     def generate_pdf_stream(self, markdown_content, company_name=None):
28 |         """
29 |         Generate a PDF from markdown content and return it as a stream.
30 |         
31 |         Args:
32 |             markdown_content (str): The markdown content to convert to PDF
33 |             company_name (str, optional): The company name to use in the filename
34 |             
35 |         Returns:
36 |             tuple: (success status, PDF stream or error message)
37 |         """
38 |         try:
39 |             # Extract company name from the first line if not provided
40 |             if not company_name:
41 |                 first_line = markdown_content.split('\n')[0].strip()
42 |                 if first_line.startswith('# '):
43 |                     company_name = first_line[2:].strip()
44 |                 else:
45 |                     company_name = "Company Research"
46 |             
47 |             # Generate the output filename
48 |             pdf_filename = self._generate_pdf_filename(company_name)
49 |             
50 |             # Create a BytesIO object to store the PDF
51 |             pdf_buffer = io.BytesIO()
52 |             
53 |             # Generate the PDF directly to the buffer
54 |             generate_pdf_from_md(markdown_content, pdf_buffer)
55 |             
56 |             # Reset buffer position to start
57 |             pdf_buffer.seek(0)
58 |             
59 |             # Return success and the buffer
60 |             return True, (pdf_buffer, pdf_filename)
61 |             
62 |         except Exception as e:
63 |             error_msg = f"Error generating PDF: {str(e)}"
64 |             logger.error(error_msg)
65 |             return False, error_msg


--------------------------------------------------------------------------------
/backend/services/websocket_manager.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | import logging
 3 | from datetime import datetime
 4 | from typing import Dict, Set
 5 | 
 6 | from fastapi import WebSocket
 7 | 
 8 | # Set up logging
 9 | logger = logging.getLogger(__name__)
10 | 
11 | class WebSocketManager:
12 |     def __init__(self):
13 |         # Store active connections for each job
14 |         self.active_connections: Dict[str, Set[WebSocket]] = {}
15 |         
16 |     async def connect(self, websocket: WebSocket, job_id: str):
17 |         """Connect a new client to a specific job."""
18 |         if job_id not in self.active_connections:
19 |             self.active_connections[job_id] = set()
20 |         self.active_connections[job_id].add(websocket)
21 |         logger.info(f"New WebSocket connection for job {job_id}")
22 |         logger.info(f"Total connections for job: {len(self.active_connections[job_id])}")
23 |         logger.info(f"All active jobs: {list(self.active_connections.keys())}")
24 |         
25 |     def disconnect(self, websocket: WebSocket, job_id: str):
26 |         """Disconnect a client from a specific job."""
27 |         if job_id in self.active_connections:
28 |             self.active_connections[job_id].discard(websocket)
29 |             if not self.active_connections[job_id]:
30 |                 del self.active_connections[job_id]
31 |             logger.info(f"WebSocket disconnected for job {job_id}")
32 |             logger.info(f"Remaining connections for job: {len(self.active_connections.get(job_id, set()))}")
33 |             logger.info(f"Remaining active jobs: {list(self.active_connections.keys())}")
34 |                 
35 |     async def broadcast_to_job(self, job_id: str, message: dict):
36 |         """Send a message to all clients connected to a specific job."""
37 |         if job_id not in self.active_connections:
38 |             logger.warning(f"No active connections for job {job_id}")
39 |             return
40 |             
41 |         # Add timestamp to message
42 |         message["timestamp"] = datetime.now().isoformat()
43 |         
44 |         # Convert message to JSON string
45 |         message_str = json.dumps(message)
46 |         logger.info(f"Message content: {message_str}")
47 |         
48 |         # Send to all connected clients for this job
49 |         success_count = 0
50 |         disconnected = set()
51 |         for connection in self.active_connections[job_id]:
52 |             try:
53 |                 await connection.send_text(message_str)
54 |                 success_count += 1
55 |             except Exception as e:
56 |                 logger.error(f"Error sending message to client: {str(e)}", exc_info=True)
57 |                 disconnected.add(connection)
58 |         
59 |         # Clean up disconnected clients
60 |         for connection in disconnected:
61 |             self.disconnect(connection, job_id)
62 |             
63 |     async def send_status_update(self, job_id: str, status: str, message: str = None, error: str = None, result: dict = None):
64 |         """Helper method to send formatted status updates."""
65 |         update = {
66 |             "type": "status_update",
67 |             "data": {
68 |                 "status": status,
69 |                 "message": message,
70 |                 "error": error,
71 |                 "result": result
72 |             }
73 |         }
74 |         #logger.info(f"Status: {status}, Message: {message}")
75 |         await self.broadcast_to_job(job_id, update)


--------------------------------------------------------------------------------
/backend/utils/__init__.py:
--------------------------------------------------------------------------------
 1 | from .utils import generate_pdf_from_md, clean_text
 2 | from .references import (
 3 |     extract_domain_name, 
 4 |     extract_title_from_url_path, 
 5 |     clean_title, 
 6 |     normalize_url,
 7 |     extract_website_name_from_domain,
 8 |     process_references_from_search_results,
 9 |     format_reference_for_markdown,
10 |     extract_link_info,
11 |     format_references_section
12 | ) 


--------------------------------------------------------------------------------
/docker-compose.yml:
--------------------------------------------------------------------------------
 1 | services:
 2 |   backend:
 3 |     build:
 4 |       context: .
 5 |       dockerfile: Dockerfile
 6 |     ports:
 7 |       - "8000:8000"
 8 |     environment:
 9 |       - PYTHONUNBUFFERED=1
10 |       - PORT=8000
11 |     volumes:
12 |       - ./reports:/app/reports
13 |       - ./backend:/app/backend
14 |       - ./application.py:/app/application.py
15 |     env_file:
16 |       - .env
17 | 
18 |   frontend:
19 |     image: node:20-slim
20 |     working_dir: /ui
21 |     command: sh -c "npm install && npm run dev"
22 |     ports:
23 |       - "5174:5174"
24 |     volumes:
25 |       - ./ui:/ui
26 |     environment:
27 |       - NODE_ENV=development
28 |       - VITE_API_URL=http://localhost:8000 
29 |       - VITE_WS_URL=ws://localhost:8000


--------------------------------------------------------------------------------
/langgraph.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "dockerfile_lines": [],
 3 |   "graphs": {
 4 |     "agent": "./langgraph_entry.py:graph"
 5 |   },
 6 |   "env": ".env",
 7 |   "python_version": "3.11",
 8 |   "dependencies": [
 9 |     "."
10 |   ]
11 | }


--------------------------------------------------------------------------------
/langgraph_entry.py:
--------------------------------------------------------------------------------
1 | # langgraph_entry.py
2 | from backend.graph import Graph
3 | 
4 | graph = Graph().compile()


--------------------------------------------------------------------------------
/package-lock.json:
--------------------------------------------------------------------------------
1 | {
2 |   "name": "tavily-company-research",
3 |   "lockfileVersion": 3,
4 |   "requires": true,
5 |   "packages": {}
6 | }
7 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | certifi==2025.1.31
 2 | fastapi==0.115.11
 3 | langchain_core==0.3.41
 4 | langgraph==0.3.5
 5 | openai==1.65.4
 6 | protobuf~=4.25.0
 7 | pydantic==2.10.6
 8 | pymongo==4.6.3
 9 | reportlab==4.3.1
10 | tavily_python==0.5.1
11 | uvicorn[standard]==0.34.0
12 | websockets==12.0
13 | google-generativeai==0.8.4


--------------------------------------------------------------------------------
/setup.sh:
--------------------------------------------------------------------------------
  1 | #!/bin/bash
  2 | 
  3 | # Text styling
  4 | BOLD='\033[1m'
  5 | GREEN='\033[0;32m'
  6 | BLUE='\033[0;34m'
  7 | NC='\033[0m' # No Color
  8 | 
  9 | # Version comparison function
 10 | version_compare() {
 11 |     echo "$@" | awk -F. '{ printf("%d%03d%03d%03d\n", $1,$2,$3,$4); }'
 12 | }
 13 | 
 14 | echo -e "${BOLD}🚀 Welcome to the Agentic Company Researcher Setup!${NC}\n"
 15 | 
 16 | # Check if Python 3.11+ is installed
 17 | echo -e "${BLUE}Checking Python version...${NC}"
 18 | if command -v python3 >/dev/null 2>&1; then
 19 |     python_version=$(python3 -c 'import sys; print(".".join(map(str, sys.version_info[:2])))')
 20 |     if [ "$(version_compare "$python_version")" -ge "$(version_compare "3.11")" ]; then
 21 |         echo -e "${GREEN}✓ Python $python_version is installed${NC}"
 22 |     else
 23 |         echo "❌ Python 3.11 or higher is required. Current version: $python_version"
 24 |         echo "Please install Python 3.11 or higher from https://www.python.org/downloads/"
 25 |         exit 1
 26 |     fi
 27 | else
 28 |     echo "❌ Python 3 is not installed"
 29 |     echo "Please install Python 3.11 or higher from https://www.python.org/downloads/"
 30 |     exit 1
 31 | fi
 32 | 
 33 | # Check if Node.js 18+ is installed
 34 | echo -e "\n${BLUE}Checking Node.js version...${NC}"
 35 | if command -v node >/dev/null 2>&1; then
 36 |     node_version=$(node -v | cut -d'v' -f2)
 37 |     if [ "$(version_compare "$node_version")" -ge "$(version_compare "18.0.0")" ]; then
 38 |         echo -e "${GREEN}✓ Node.js $node_version is installed${NC}"
 39 |     else
 40 |         echo "❌ Node.js 18 or higher is required. Current version: $node_version"
 41 |         echo "Please install Node.js 18 or higher from https://nodejs.org/"
 42 |         exit 1
 43 |     fi
 44 | else
 45 |     echo "❌ Node.js is not installed"
 46 |     echo "Please install Node.js 18 or higher from https://nodejs.org/"
 47 |     exit 1
 48 | fi
 49 | 
 50 | # Ask about virtual environment
 51 | echo -e "\n${BLUE}Would you like to set up a Python virtual environment? (Recommended) [Y/n]${NC}"
 52 | read -r use_venv
 53 | use_venv=${use_venv:-Y}
 54 | 
 55 | if [[ $use_venv =~ ^[Yy]$ ]]; then
 56 |     echo -e "\n${BLUE}Setting up Python virtual environment...${NC}"
 57 |     python3 -m venv .venv
 58 |     source .venv/bin/activate
 59 |     echo -e "${GREEN}✓ Virtual environment created and activated${NC}"
 60 |     
 61 |     # Install Python dependencies in venv
 62 |     echo -e "\n${BLUE}Installing Python dependencies in virtual environment...${NC}"
 63 |     pip install -r requirements.txt
 64 |     echo -e "${GREEN}✓ Python dependencies installed${NC}"
 65 | else
 66 |     # Prompt for global installation
 67 |     echo -e "\n${BLUE}Would you like to install Python dependencies globally? This may affect other Python projects. [y/N]${NC}"
 68 |     read -r install_global
 69 |     install_global=${install_global:-N}
 70 | 
 71 |     if [[ $install_global =~ ^[Yy]$ ]]; then
 72 |         echo -e "\n${BLUE}Installing Python dependencies globally...${NC}"
 73 |         pip3 install -r requirements.txt
 74 |         echo -e "${GREEN}✓ Python dependencies installed${NC}"
 75 |         echo -e "${BLUE}Note: Dependencies have been installed in your global Python environment${NC}"
 76 |     else
 77 |         echo -e "${BLUE}Skipping Python dependency installation. You'll need to install them manually later.${NC}"
 78 |         echo -e "${BLUE}You can do this by running: pip install -r requirements.txt${NC}"
 79 |     fi
 80 | fi
 81 | 
 82 | # Install Node.js dependencies
 83 | echo -e "\n${BLUE}Installing Node.js dependencies...${NC}"
 84 | cd ui
 85 | npm install
 86 | # Create or overwrite .env.development for frontend dev environment
 87 | cat > .env.development << EOL
 88 | VITE_API_URL=http://localhost:8000
 89 | VITE_WS_URL=ws://localhost:8000
 90 | EOL
 91 | cd ..
 92 | echo -e "${GREEN}✓ Node.js dependencies installed${NC}"
 93 | 
 94 | # Setup .env file
 95 | echo -e "\n${BLUE}Setting up environment variables...${NC}"
 96 | if [ -f ".env" ]; then
 97 |     echo "Found existing .env file. Would you like to overwrite it? (y/n)"
 98 |     read -r overwrite
 99 |     if [ "$overwrite" != "y" ]; then
100 |         echo "Keeping existing .env file"
101 |     else
102 |         setup_env=true
103 |     fi
104 | else
105 |     setup_env=true
106 | fi
107 | 
108 | if [ "$setup_env" = true ]; then
109 |     echo -e "\nPlease enter your API keys:"
110 |     echo -n "Tavily API Key: "
111 |     read -r tavily_key
112 |     echo -n "Google Gemini API Key: "
113 |     read -r gemini_key
114 |     echo -n "OpenAI API Key: "
115 |     read -r openai_key
116 |     echo -n "MongoDB URI (optional - press enter to skip): "
117 |     read -r mongodb_uri
118 | 
119 |     # Create .env file
120 |     cat > .env << EOL
121 | TAVILY_API_KEY=$tavily_key
122 | GEMINI_API_KEY=$gemini_key
123 | OPENAI_API_KEY=$openai_key
124 | EOL
125 | 
126 |     # Add MongoDB URI if provided
127 |     if [ ! -z "$mongodb_uri" ]; then
128 |         echo "MONGODB_URI=$mongodb_uri" >> .env
129 |     fi
130 | 
131 |     echo -e "${GREEN}✓ Environment variables saved to .env${NC}"
132 | fi
133 | 
134 | # Final instructions and server startup options
135 | echo -e "\n${BOLD}🎉 Setup complete!${NC}"
136 | 
137 | if [[ $use_venv =~ ^[Yy]$ ]]; then
138 |     echo -e "\n${BLUE}Virtual environment is now activated and ready to use${NC}"
139 | fi
140 | 
141 | # Ask about starting servers
142 | echo -e "\n${BLUE}Would you like to start the application servers now? [Y/n]${NC}"
143 | read -r start_servers
144 | start_servers=${start_servers:-Y}
145 | 
146 | if [[ $start_servers =~ ^[Yy]$ ]]; then
147 |     echo -e "\n${BLUE}Choose backend server option:${NC}"
148 |     echo "1) python -m application.py"
149 |     echo "2) uvicorn application:app --reload --port 8000"
150 |     read -r backend_choice
151 | 
152 |     # Start backend server in background
153 |     if [ "$backend_choice" = "1" ]; then
154 |         echo -e "\n${GREEN}Starting backend server with python...${NC}"
155 |         python -m application.py &
156 |     else
157 |         echo -e "\n${GREEN}Starting backend server with uvicorn...${NC}"
158 |         uvicorn application:app --reload --port 8000 &
159 |     fi
160 |     
161 |     # Store backend PID
162 |     backend_pid=$!
163 |     
164 |     # Wait a moment for backend to start
165 |     sleep 2
166 |     
167 |     # Start frontend server
168 |     echo -e "\n${GREEN}Starting frontend server...${NC}"
169 |     cd ui
170 |     npm run dev &
171 |     frontend_pid=$!
172 |     cd ..
173 |     
174 |     echo -e "\n${GREEN}Servers are starting up! The application will be available at:${NC}"
175 |     echo -e "${BOLD}http://localhost:5173${NC}"
176 |     
177 |     # Add trap to handle script termination
178 |     trap 'kill $backend_pid $frontend_pid 2>/dev/null' EXIT
179 |     
180 |     # Keep script running until user stops it
181 |     echo -e "\n${BLUE}Press Ctrl+C to stop the servers${NC}"
182 |     wait
183 | else
184 |     echo -e "\n${BOLD}To start the application manually:${NC}"
185 |     echo -e "\n1. Start the backend server (choose one):"
186 |     echo "   Option 1: python -m application.py"
187 |     echo "   Option 2: uvicorn application:app --reload --port 8000"
188 |     echo -e "\n2. In a new terminal, start the frontend:"
189 |     echo "   cd ui"
190 |     echo "   npm run dev"
191 |     echo -e "\n3. Access the application at ${BOLD}http://localhost:5173${NC}"
192 | fi
193 | 
194 | echo -e "\n${BOLD}Need help?${NC}"
195 | echo "- Documentation: README.md"
196 | echo "- Issues: https://github.com/pogjester/tavily-company-research/issues"
197 | echo -e "\n${GREEN}Happy researching! 🚀${NC}" 


--------------------------------------------------------------------------------
/static/agent-flow.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pogjester/company-research-agent/1814a7a9b26831decf4cd8893d7a80a6dbe28b0b/static/agent-flow.png


--------------------------------------------------------------------------------
/static/demo.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pogjester/company-research-agent/1814a7a9b26831decf4cd8893d7a80a6dbe28b0b/static/demo.mp4


--------------------------------------------------------------------------------
/static/ui-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pogjester/company-research-agent/1814a7a9b26831decf4cd8893d7a80a6dbe28b0b/static/ui-1.png


--------------------------------------------------------------------------------
/static/ui-2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pogjester/company-research-agent/1814a7a9b26831decf4cd8893d7a80a6dbe28b0b/static/ui-2.png


--------------------------------------------------------------------------------
/ui/.env.development.example:
--------------------------------------------------------------------------------
1 | VITE_API_URL=http://localhost:8000
2 | VITE_WS_URL=ws://localhost:8000


--------------------------------------------------------------------------------
/ui/.gitignore:
--------------------------------------------------------------------------------
 1 | .vercel
 2 | /dist
 3 | /node_modules
 4 | .env
 5 | .env.local
 6 | .env.development.local
 7 | .env.test.local
 8 | .env.production.local
 9 | .env.development
10 | .env.production
11 | .env.test
12 | .vite


--------------------------------------------------------------------------------
/ui/eslint.config.js:
--------------------------------------------------------------------------------
 1 | import js from '@eslint/js';
 2 | import globals from 'globals';
 3 | import reactHooks from 'eslint-plugin-react-hooks';
 4 | import reactRefresh from 'eslint-plugin-react-refresh';
 5 | import tseslint from 'typescript-eslint';
 6 | 
 7 | export default tseslint.config(
 8 |   { ignores: ['dist'] },
 9 |   {
10 |     extends: [js.configs.recommended, ...tseslint.configs.recommended],
11 |     files: ['**/*.{ts,tsx}'],
12 |     languageOptions: {
13 |       ecmaVersion: 2020,
14 |       globals: globals.browser,
15 |     },
16 |     plugins: {
17 |       'react-hooks': reactHooks,
18 |       'react-refresh': reactRefresh,
19 |     },
20 |     rules: {
21 |       ...reactHooks.configs.recommended.rules,
22 |       'react-refresh/only-export-components': [
23 |         'warn',
24 |         { allowConstantExport: true },
25 |       ],
26 |     },
27 |   }
28 | );
29 | 


--------------------------------------------------------------------------------
/ui/index.html:
--------------------------------------------------------------------------------
 1 | <!DOCTYPE html>
 2 | <html lang="en">
 3 |   <head>
 4 |     <meta charset="UTF-8" />
 5 |     <link rel="icon" type="image/x-icon" href="/favicon.ico" />
 6 |     <meta name="viewport" content="width=device-width, initial-scale=1.0" />
 7 |     <title>Company Research</title>
 8 |   </head>
 9 |   <body>
10 |     <div id="root"></div>
11 |     <script type="module" src="/src/main.tsx"></script>
12 |   </body>
13 | </html>
14 | 


--------------------------------------------------------------------------------
/ui/package.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "name": "tavily-company-research-ui",
 3 |   "private": true,
 4 |   "version": "0.0.0",
 5 |   "type": "module",
 6 |   "scripts": {
 7 |     "dev": "vite",
 8 |     "build": "tsc && vite build",
 9 |     "start": "serve -s build -l $PORT",
10 |     "lint": "eslint . --ext ts,tsx --report-unused-disable-directives --max-warnings 0",
11 |     "preview": "vite preview"
12 |   },
13 |   "dependencies": {
14 |     "framer-motion": "^12.4.10",
15 |     "lucide-react": "^0.344.0",
16 |     "react": "^18.2.0",
17 |     "react-dom": "^18.2.0",
18 |     "react-markdown": "^9.0.1",
19 |     "rehype-raw": "^7.0.0",
20 |     "remark-gfm": "^4.0.0",
21 |     "serve": "^14.2.1"
22 |   },
23 |   "devDependencies": {
24 |     "@types/google.maps": "^3.58.1",
25 |     "@types/node": "^22.13.9",
26 |     "@types/react": "^18.2.56",
27 |     "@types/react-dom": "^18.2.19",
28 |     "@typescript-eslint/eslint-plugin": "^7.0.2",
29 |     "@typescript-eslint/parser": "^7.0.2",
30 |     "@vitejs/plugin-react": "^4.2.1",
31 |     "autoprefixer": "^10.4.17",
32 |     "eslint": "^8.56.0",
33 |     "eslint-plugin-react-hooks": "^4.6.0",
34 |     "eslint-plugin-react-refresh": "^0.4.5",
35 |     "postcss": "^8.4.35",
36 |     "tailwindcss": "^3.4.1",
37 |     "typescript": "^5.2.2",
38 |     "vite": "^6.3.4"
39 |   },
40 |   "engines": {
41 |     "node": ">=14.x"
42 |   }
43 | }
44 | 


--------------------------------------------------------------------------------
/ui/postcss.config.js:
--------------------------------------------------------------------------------
1 | export default {
2 |   plugins: {
3 |     tailwindcss: {},
4 |     autoprefixer: {},
5 |   },
6 | };
7 | 


--------------------------------------------------------------------------------
/ui/public/favicon.ico:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pogjester/company-research-agent/1814a7a9b26831decf4cd8893d7a80a6dbe28b0b/ui/public/favicon.ico


--------------------------------------------------------------------------------
/ui/public/tavilylogo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pogjester/company-research-agent/1814a7a9b26831decf4cd8893d7a80a6dbe28b0b/ui/public/tavilylogo.png


--------------------------------------------------------------------------------
/ui/src/components/CurationExtraction.tsx:
--------------------------------------------------------------------------------
 1 | import React from 'react';
 2 | import { ChevronDown, ChevronUp, Loader2 } from 'lucide-react';
 3 | 
 4 | type EnrichmentCounts = {
 5 |   company: { total: number; enriched: number };
 6 |   industry: { total: number; enriched: number };
 7 |   financial: { total: number; enriched: number };
 8 |   news: { total: number; enriched: number };
 9 | };
10 | 
11 | interface CurationExtractionProps {
12 |   enrichmentCounts: EnrichmentCounts | undefined;
13 |   isExpanded: boolean;
14 |   onToggleExpand: () => void;
15 |   isResetting: boolean;
16 |   loaderColor: string;
17 | }
18 | 
19 | const CurationExtraction: React.FC<CurationExtractionProps> = ({
20 |   enrichmentCounts,
21 |   isExpanded,
22 |   onToggleExpand,
23 |   isResetting,
24 |   loaderColor
25 | }) => {
26 |   const glassStyle = "backdrop-filter backdrop-blur-lg bg-white/80 border border-gray-200 shadow-xl";
27 |   const glassCardStyle = `${glassStyle} rounded-2xl p-6`;
28 | 
29 |   return (
30 |     <div 
31 |       className={`${glassCardStyle} transition-all duration-300 ease-in-out ${
32 |         isResetting ? 'opacity-0 transform -translate-y-4' : 'opacity-100 transform translate-y-0'
33 |       }`}
34 |     >
35 |       <div 
36 |         className="flex items-center justify-between cursor-pointer"
37 |         onClick={onToggleExpand}
38 |       >
39 |         <h2 className="text-xl font-semibold text-gray-900">
40 |           Curation and Extraction
41 |         </h2>
42 |         <button className="text-gray-600 hover:text-gray-900 transition-colors">
43 |           {isExpanded ? (
44 |             <ChevronUp className="h-6 w-6" />
45 |           ) : (
46 |             <ChevronDown className="h-6 w-6" />
47 |           )}
48 |         </button>
49 |       </div>
50 | 
51 |       <div className={`overflow-hidden transition-all duration-500 ease-in-out ${
52 |         isExpanded ? 'mt-4 max-h-[1000px] opacity-100' : 'max-h-0 opacity-0'
53 |       }`}>
54 |         <div className="grid grid-cols-4 gap-4">
55 |           {['company', 'industry', 'financial', 'news'].map((category) => {
56 |             const counts = enrichmentCounts?.[category as keyof EnrichmentCounts];
57 |             return (
58 |               <div key={category} className="backdrop-blur-2xl bg-white/95 border border-gray-200/50 rounded-xl p-3 shadow-none">
59 |                 <h3 className="text-sm font-medium text-gray-700 mb-2 capitalize">{category}</h3>
60 |                 <div className="text-gray-900">
61 |                   <div className="text-2xl font-bold mb-1">
62 |                     {counts ? (
63 |                       <span className="text-[#468BFF]">
64 |                         {counts.enriched}
65 |                       </span>
66 |                     ) : (
67 |                       <Loader2 className="animate-spin h-6 w-6 mx-auto loader-icon" style={{ stroke: loaderColor }} />
68 |                     )}
69 |                   </div>
70 |                   <div className="text-sm text-gray-600">
71 |                     {counts ? (
72 |                       `selected from ${counts.total}`
73 |                     ) : (
74 |                       "waiting..."
75 |                     )}
76 |                   </div>
77 |                 </div>
78 |               </div>
79 |             );
80 |           })}
81 |         </div>
82 |       </div>
83 | 
84 |       {!isExpanded && enrichmentCounts && (
85 |         <div className="mt-2 text-sm text-gray-600">
86 |           {Object.values(enrichmentCounts).reduce((acc, curr) => acc + curr.enriched, 0)} documents enriched from {Object.values(enrichmentCounts).reduce((acc, curr) => acc + curr.total, 0)} total
87 |         </div>
88 |       )}
89 |     </div>
90 |   );
91 | };
92 | 
93 | export default CurationExtraction; 


--------------------------------------------------------------------------------
/ui/src/components/ExamplePopup.tsx:
--------------------------------------------------------------------------------
  1 | import React, { useState, useEffect, RefObject } from 'react';
  2 | import { ArrowRight, Sparkles } from 'lucide-react';
  3 | 
  4 | // Sample companies for examples
  5 | export const EXAMPLE_COMPANIES = [
  6 |   {
  7 |     name: "Stripe",
  8 |     url: "stripe.com",
  9 |     hq: "San Francisco, CA",
 10 |     industry: "Financial Technology"
 11 |   },
 12 |   {
 13 |     name: "Shopify",
 14 |     url: "shopify.com",
 15 |     hq: "Ottawa, Canada",
 16 |     industry: "E-commerce"
 17 |   },
 18 |   {
 19 |     name: "Notion",
 20 |     url: "notion.so",
 21 |     hq: "San Francisco, CA",
 22 |     industry: "Productivity Software"
 23 |   },
 24 |   {
 25 |     name: "Tesla",
 26 |     url: "tesla.com",
 27 |     hq: "Austin, TX",
 28 |     industry: "Automotive & Energy"
 29 |   },
 30 |   {
 31 |     name: "Airbnb",
 32 |     url: "airbnb.com",
 33 |     hq: "San Francisco, CA",
 34 |     industry: "Travel & Hospitality"
 35 |   },
 36 |   {
 37 |     name: "Slack",
 38 |     url: "slack.com",
 39 |     hq: "San Francisco, CA",
 40 |     industry: "Business Communication"
 41 |   },
 42 |   {
 43 |     name: "Spotify",
 44 |     url: "spotify.com",
 45 |     hq: "Stockholm, Sweden",
 46 |     industry: "Music Streaming"
 47 |   }
 48 | ];
 49 | 
 50 | export type ExampleCompany = typeof EXAMPLE_COMPANIES[0];
 51 | 
 52 | export interface ExamplePopupProps {
 53 |   visible: boolean;
 54 |   onExampleSelect: (example: ExampleCompany) => void;
 55 |   glassStyle: {
 56 |     card: string;
 57 |     input: string;
 58 |   };
 59 |   exampleRef: RefObject<HTMLDivElement>;
 60 | }
 61 | 
 62 | // Example Popup Component
 63 | const ExamplePopup: React.FC<ExamplePopupProps> = ({
 64 |   visible,
 65 |   onExampleSelect,
 66 |   glassStyle,
 67 |   exampleRef
 68 | }) => {
 69 |   const [selectedExample, setSelectedExample] = useState(0);
 70 |   const [isNameAnimating, setIsNameAnimating] = useState(false);
 71 |   
 72 |   // Cycle through examples periodically
 73 |   useEffect(() => {
 74 |     const interval = setInterval(() => {
 75 |       // Trigger name animation
 76 |       setIsNameAnimating(true);
 77 |       setTimeout(() => {
 78 |         setSelectedExample((prev) => (prev + 1) % EXAMPLE_COMPANIES.length);
 79 |         setTimeout(() => {
 80 |           setIsNameAnimating(false);
 81 |         }, 150);
 82 |       }, 150);
 83 |     }, 5000);
 84 |     return () => clearInterval(interval);
 85 |   }, []);
 86 | 
 87 |   if (!visible) return null;
 88 | 
 89 |   return (
 90 |     <div 
 91 |       ref={exampleRef}
 92 |       className={`absolute -top-14 left-8 ${glassStyle.card} bg-white/90 shadow-lg border-blue-200 cursor-pointer z-10 
 93 |                  flex items-center px-3 py-2 space-x-2 transform transition-all duration-300 
 94 |                  hover:shadow-xl hover:bg-white/95 hover:-translate-y-1 hover:border-blue-300 group`}
 95 |       onClick={() => onExampleSelect(EXAMPLE_COMPANIES[selectedExample])}
 96 |       style={{
 97 |         borderTopLeftRadius: '12px',
 98 |         borderTopRightRadius: '12px',
 99 |         borderBottomRightRadius: '12px',
100 |         borderBottomLeftRadius: '4px',
101 |       }}
102 |     >
103 |       <Sparkles className="h-4 w-4 text-blue-500 group-hover:text-blue-600 animate-pulse group-hover:animate-none group-hover:scale-110 transition-all" />
104 |       <div>
105 |         <span className="text-sm font-medium text-gray-700 group-hover:text-gray-800 transition-colors">Try an example: </span>
106 |         <span 
107 |           className={`text-sm font-bold text-blue-600 group-hover:text-blue-700 transition-all inline-block
108 |             ${isNameAnimating ? 'opacity-0 transform -translate-y-3 scale-95' : 'opacity-100 transform translate-y-0 scale-100'}`}
109 |           style={{ 
110 |             transitionDuration: '150ms',
111 |             transitionTimingFunction: 'cubic-bezier(0.2, 0, 0.4, 1)'
112 |           }}
113 |         >
114 |           {EXAMPLE_COMPANIES[selectedExample].name}
115 |         </span>
116 |       </div>
117 |       <ArrowRight className="h-3.5 w-3.5 text-blue-500 group-hover:text-blue-600 group-hover:translate-x-0.5 transition-all" />
118 |     </div>
119 |   );
120 | };
121 | 
122 | export default ExamplePopup; 


--------------------------------------------------------------------------------
/ui/src/components/Header.tsx:
--------------------------------------------------------------------------------
 1 | import React from 'react';
 2 | import { Github } from 'lucide-react';
 3 | 
 4 | interface HeaderProps {
 5 |   glassStyle: string;
 6 | }
 7 | 
 8 | const Header: React.FC<HeaderProps> = ({ glassStyle }) => {
 9 |   const handleImageError = (e: React.SyntheticEvent<HTMLImageElement, Event>) => {
10 |     console.error('Failed to load Tavily logo');
11 |     console.log('Image path:', e.currentTarget.src);
12 |     e.currentTarget.style.display = 'none';
13 |   };
14 | 
15 |   return (
16 |     <div className="relative mb-16">
17 |       <div className="text-center pt-4">
18 |         <h1 className="text-[48px] font-medium text-[#1a202c] font-['DM_Sans'] tracking-[-1px] leading-[52px] text-center mx-auto antialiased">
19 |           Company Research Agent
20 |         </h1>
21 |         <p className="text-gray-600 text-lg font-['DM_Sans'] mt-4">
22 |           Conduct in-depth company diligence powered by Tavily
23 |         </p>
24 |       </div>
25 |       <div className="absolute top-0 right-0 flex items-center space-x-2">
26 |         <a
27 |           href="https://tavily.com"
28 |           target="_blank"
29 |           rel="noopener noreferrer"
30 |           className={`text-gray-600 hover:text-gray-900 transition-colors ${glassStyle} rounded-lg flex items-center justify-center`}
31 |           style={{ width: '50px', height: '50px', padding: '2px' }}
32 |           aria-label="Tavily Website"
33 |         >
34 |           <img 
35 |             src="/tavilylogo.png" 
36 |             alt="Tavily Logo" 
37 |             className="w-full h-full object-contain" 
38 |             style={{ 
39 |               width: '45px', 
40 |               height: '45px',
41 |               display: 'block',
42 |               margin: 'auto'
43 |             }}
44 |             onError={handleImageError}
45 |           />
46 |         </a>
47 |         <a
48 |           href="https://github.com/pogjester/company-research-agent"
49 |           target="_blank"
50 |           rel="noopener noreferrer"
51 |           className={`text-gray-600 hover:text-gray-900 transition-colors ${glassStyle} rounded-lg flex items-center justify-center`}
52 |           style={{ width: '40px', height: '40px', padding: '8px' }}
53 |           aria-label="GitHub Profile"
54 |         >
55 |           <Github 
56 |             style={{ 
57 |               width: '24px', 
58 |               height: '24px',
59 |               display: 'block',
60 |               margin: 'auto'
61 |             }} 
62 |           />
63 |         </a>
64 |       </div>
65 |     </div>
66 |   );
67 | };
68 | 
69 | export default Header; 


--------------------------------------------------------------------------------
/ui/src/components/LocationInput.tsx:
--------------------------------------------------------------------------------
  1 | import React, { useEffect, useRef, useState, useCallback } from 'react';
  2 | import { MapPin } from 'lucide-react';
  3 | 
  4 | interface LocationInputProps {
  5 |   value: string;
  6 |   onChange: (value: string) => void;
  7 |   className?: string;
  8 | }
  9 | 
 10 | declare global {
 11 |   interface Window {
 12 |     google: any;
 13 |     initGoogleMapsCallback: () => void;
 14 |   }
 15 | }
 16 | 
 17 | // Create a global script loader to ensure we only load the script once
 18 | let scriptPromise: Promise<void> | null = null;
 19 | 
 20 | const loadGoogleMapsScript = (): Promise<void> => {
 21 |   if (scriptPromise) {
 22 |     return scriptPromise;
 23 |   }
 24 | 
 25 |   scriptPromise = new Promise<void>((resolve) => {
 26 |     // If already loaded, resolve immediately
 27 |     if (window.google?.maps?.places) {
 28 |       resolve();
 29 |       return;
 30 |     }
 31 | 
 32 |     // Define the callback function
 33 |     window.initGoogleMapsCallback = () => {
 34 |       resolve();
 35 |     };
 36 | 
 37 |     // Create script element
 38 |     const script = document.createElement('script');
 39 |     // Use loading=async parameter as recommended by Google
 40 |     script.src = `https://maps.googleapis.com/maps/api/js?key=${import.meta.env.VITE_GOOGLE_MAPS_API_KEY}&libraries=places&loading=async&callback=initGoogleMapsCallback`;
 41 |     script.async = true;
 42 |     script.defer = true;
 43 |     
 44 |     // Handle errors
 45 |     script.onerror = (error) => {
 46 |       console.error('Error loading Google Maps script:', error);
 47 |       scriptPromise = null;
 48 |     };
 49 | 
 50 |     // Append to document
 51 |     document.head.appendChild(script);
 52 |   });
 53 | 
 54 |   return scriptPromise;
 55 | };
 56 | 
 57 | const LocationInput: React.FC<LocationInputProps> = ({ value, onChange, className }) => {
 58 |   const inputRef = useRef<HTMLInputElement>(null);
 59 |   const autocompleteRef = useRef<google.maps.places.Autocomplete | null>(null);
 60 |   const [isApiLoaded, setIsApiLoaded] = useState(false);
 61 |   const onChangeRef = useRef(onChange);
 62 |   const isInitializedRef = useRef(false);
 63 | 
 64 |   // Update the ref when onChange changes
 65 |   useEffect(() => {
 66 |     onChangeRef.current = onChange;
 67 |   }, [onChange]);
 68 | 
 69 |   // Load the Google Maps API
 70 |   useEffect(() => {
 71 |     // Check if script is already in the document
 72 |     const existingScript = document.querySelector('script[src*="maps.googleapis.com/maps/api/js"]');
 73 |     if (existingScript) {
 74 |       console.warn('Google Maps script is already loaded elsewhere in the application');
 75 |       // If script exists but API not available yet, wait for it
 76 |       if (!window.google?.maps?.places) {
 77 |         const checkInterval = setInterval(() => {
 78 |           if (window.google?.maps?.places) {
 79 |             setIsApiLoaded(true);
 80 |             clearInterval(checkInterval);
 81 |           }
 82 |         }, 100);
 83 |         
 84 |         // Clear interval after 10 seconds to prevent infinite checking
 85 |         setTimeout(() => clearInterval(checkInterval), 10000);
 86 |       } else {
 87 |         setIsApiLoaded(true);
 88 |       }
 89 |       return;
 90 |     }
 91 | 
 92 |     const loadApi = async () => {
 93 |       try {
 94 |         await loadGoogleMapsScript();
 95 |         setIsApiLoaded(true);
 96 |       } catch (error) {
 97 |         console.error('Failed to load Google Maps API:', error);
 98 |       }
 99 |     };
100 | 
101 |     loadApi();
102 |   }, []);
103 | 
104 |   // Initialize autocomplete when API is loaded and input is available
105 |   useEffect(() => {
106 |     if (!isApiLoaded || !inputRef.current || !window.google?.maps?.places || isInitializedRef.current) {
107 |       return;
108 |     }
109 | 
110 |     try {
111 |       // Initialize autocomplete
112 |       autocompleteRef.current = new window.google.maps.places.Autocomplete(inputRef.current, {
113 |         types: ['(cities)'],
114 |       });
115 | 
116 |       // Style the autocomplete dropdown
117 |       const style = document.createElement('style');
118 |       style.textContent = `
119 |         .pac-container {
120 |           background-color: white !important;
121 |           border: 1px solid rgba(70, 139, 255, 0.1) !important;
122 |           border-radius: 0.75rem !important;
123 |           margin-top: 0.5rem !important;
124 |           font-family: "Noto Sans", sans-serif !important;
125 |           overflow: hidden !important;
126 |           box-shadow: none !important;
127 |         }
128 |         .pac-item {
129 |           padding: 0.875rem 1.25rem !important;
130 |           cursor: pointer !important;
131 |           transition: all 0.2s ease-in-out !important;
132 |           border-bottom: 1px solid rgba(70, 139, 255, 0.05) !important;
133 |         }
134 |         .pac-item:last-child {
135 |           border-bottom: none !important;
136 |         }
137 |         .pac-item:hover {
138 |           background-color: rgba(70, 139, 255, 0.03) !important;
139 |         }
140 |         .pac-item-selected {
141 |           background-color: rgba(70, 139, 255, 0.05) !important;
142 |         }
143 |         .pac-item-query {
144 |           color: #1a365d !important;
145 |           font-size: 0.9375rem !important;
146 |           font-weight: 500 !important;
147 |         }
148 |         .pac-matched {
149 |           font-weight: 600 !important;
150 |         }
151 |         .pac-item span:not(.pac-item-query) {
152 |           color: #64748b !important;
153 |           font-size: 0.8125rem !important;
154 |           margin-left: 0.5rem !important;
155 |         }
156 |         /* Hide the location icon */
157 |         .pac-icon {
158 |           display: none !important;
159 |         }
160 |       `;
161 |       document.head.appendChild(style);
162 | 
163 |       // Add place_changed listener
164 |       const autocomplete = autocompleteRef.current;
165 |       if (autocomplete) {
166 |         autocomplete.addListener('place_changed', () => {
167 |           const place = autocomplete.getPlace();
168 |           if (place?.formatted_address) {
169 |             onChangeRef.current(place.formatted_address);
170 |           }
171 |         });
172 |       }
173 | 
174 |       isInitializedRef.current = true;
175 |     } catch (error) {
176 |       console.error('Error initializing Google Maps Autocomplete:', error);
177 |     }
178 | 
179 |     // Cleanup
180 |     return () => {
181 |       if (autocompleteRef.current && window.google?.maps?.event) {
182 |         window.google.maps.event.clearInstanceListeners(autocompleteRef.current);
183 |         autocompleteRef.current = null;
184 |         isInitializedRef.current = false;
185 |       }
186 |     };
187 |   }, [isApiLoaded]); // Removed onChange from dependencies
188 | 
189 |   // Handle manual input changes
190 |   const handleInputChange = useCallback((e: React.ChangeEvent<HTMLInputElement>) => {
191 |     onChange(e.target.value);
192 |   }, [onChange]);
193 | 
194 |   return (
195 |     <div className="relative group">
196 |       <div className="absolute inset-0 bg-gradient-to-r from-gray-50/0 via-gray-100/50 to-gray-50/0 opacity-0 group-hover:opacity-100 transition-opacity duration-500 rounded-lg"></div>
197 |       <MapPin className="absolute left-4 top-1/2 -translate-y-1/2 h-5 w-5 stroke-[#468BFF] transition-all duration-200 group-hover:stroke-[#8FBCFA] z-10" strokeWidth={1.5} />
198 |       <input
199 |         ref={inputRef}
200 |         type="text"
201 |         value={value}
202 |         onChange={handleInputChange}
203 |         onKeyDown={(e) => {
204 |           if (e.key === 'Enter') {
205 |             e.preventDefault();
206 |           }
207 |         }}
208 |         className={`${className} !font-['DM_Sans']`}
209 |         placeholder="City, Country"
210 |       />
211 |     </div>
212 |   );
213 | };
214 | 
215 | export default LocationInput; 


--------------------------------------------------------------------------------
/ui/src/components/ResearchBriefings.tsx:
--------------------------------------------------------------------------------
 1 | import React from 'react';
 2 | import { ChevronDown, ChevronUp, CheckCircle2 } from 'lucide-react';
 3 | 
 4 | type BriefingStatus = {
 5 |   company: boolean;
 6 |   industry: boolean;
 7 |   financial: boolean;
 8 |   news: boolean;
 9 | };
10 | 
11 | interface ResearchBriefingsProps {
12 |   briefingStatus: BriefingStatus;
13 |   isExpanded: boolean;
14 |   onToggleExpand: () => void;
15 |   isResetting: boolean;
16 | }
17 | 
18 | const ResearchBriefings: React.FC<ResearchBriefingsProps> = ({
19 |   briefingStatus,
20 |   isExpanded,
21 |   onToggleExpand,
22 |   isResetting
23 | }) => {
24 |   const glassStyle = "backdrop-filter backdrop-blur-lg bg-white/80 border border-gray-200 shadow-xl";
25 |   const cardGlassStyle = "backdrop-filter backdrop-blur-lg bg-white/80 shadow-sm";
26 | 
27 |   return (
28 |     <div 
29 |       className={`${glassStyle} rounded-2xl p-6 transition-all duration-300 ease-in-out ${
30 |         isResetting ? 'opacity-0 transform -translate-y-4' : 'opacity-100 transform translate-y-0'
31 |       } font-['DM_Sans']`}
32 |     >
33 |       <div 
34 |         className="flex items-center justify-between cursor-pointer"
35 |         onClick={onToggleExpand}
36 |       >
37 |         <h2 className="text-xl font-semibold text-gray-900">
38 |           Research Briefings
39 |         </h2>
40 |         <button className="text-gray-600 hover:text-gray-900 transition-colors">
41 |           {isExpanded ? (
42 |             <ChevronUp className="h-6 w-6" />
43 |           ) : (
44 |             <ChevronDown className="h-6 w-6" />
45 |           )}
46 |         </button>
47 |       </div>
48 | 
49 |       <div className={`overflow-hidden transition-all duration-500 ease-in-out ${
50 |         isExpanded ? 'mt-6 max-h-[1000px] opacity-100' : 'max-h-0 opacity-0'
51 |       }`}>
52 |         <div className="grid grid-cols-1 sm:grid-cols-2 md:grid-cols-4 gap-4 px-1">
53 |           {['company', 'industry', 'financial', 'news'].map((category) => (
54 |             <div 
55 |               key={category} 
56 |               className={`${cardGlassStyle} rounded-lg p-4 transition-all duration-500 ease-in-out relative ${
57 |                 briefingStatus[category as keyof BriefingStatus] 
58 |                   ? 'border border-[#468BFF] bg-gradient-to-br from-[#468BFF]/5 to-[#468BFF]/10 shadow-md' 
59 |                   : 'border border-gray-200 bg-white/80 hover:border-gray-300 hover:shadow-sm'
60 |               } backdrop-blur-sm group`}
61 |             >
62 |               {/* Background decoration element (only visible when active) */}
63 |               <div 
64 |                 className={`absolute inset-0 bg-[radial-gradient(circle_at_30%_20%,rgba(70,139,255,0.15),transparent_70%)] opacity-0 transition-opacity duration-700 ease-in-out rounded-lg ${
65 |                   briefingStatus[category as keyof BriefingStatus] ? 'opacity-100' : ''
66 |                 }`}
67 |                 style={{ pointerEvents: 'none' }}
68 |               />
69 |               
70 |               <div className="relative z-10 flex items-center justify-between">
71 |                 <h3 className={`text-sm font-medium capitalize transition-all duration-500 ${
72 |                   briefingStatus[category as keyof BriefingStatus]
73 |                     ? 'text-[#468BFF]'
74 |                     : 'text-gray-700 group-hover:text-gray-900'
75 |                 }`}>{category}</h3>
76 |                 {briefingStatus[category as keyof BriefingStatus] ? (
77 |                   <CheckCircle2 className="h-4 w-4 text-[#468BFF] transition-all duration-300" />
78 |                 ) : (
79 |                   <div className="h-4 w-4 rounded-full border border-gray-200 group-hover:border-gray-300 transition-all duration-300"></div>
80 |                 )}
81 |               </div>
82 |             </div>
83 |           ))}
84 |         </div>
85 |       </div>
86 | 
87 |       {!isExpanded && (
88 |         <div className="mt-2 text-sm text-gray-600">
89 |           {Object.values(briefingStatus).filter(Boolean).length} of {Object.keys(briefingStatus).length} briefings completed
90 |         </div>
91 |       )}
92 |     </div>
93 |   );
94 | };
95 | 
96 | export default ResearchBriefings; 


--------------------------------------------------------------------------------
/ui/src/components/ResearchForm.tsx:
--------------------------------------------------------------------------------
  1 | import React, { useState, useRef, useEffect } from 'react';
  2 | import { Building2, Factory, Globe, Loader2, Search } from 'lucide-react';
  3 | import LocationInput from './LocationInput';
  4 | import ExamplePopup, { ExampleCompany } from './ExamplePopup';
  5 | 
  6 | interface FormData {
  7 |   companyName: string;
  8 |   companyUrl: string;
  9 |   companyHq: string;
 10 |   companyIndustry: string;
 11 | }
 12 | 
 13 | interface ResearchFormProps {
 14 |   onSubmit: (formData: FormData) => Promise<void>;
 15 |   isResearching: boolean;
 16 |   glassStyle: {
 17 |     card: string;
 18 |     input: string;
 19 |   };
 20 |   loaderColor: string;
 21 | }
 22 | 
 23 | const ResearchForm: React.FC<ResearchFormProps> = ({
 24 |   onSubmit,
 25 |   isResearching,
 26 |   glassStyle,
 27 |   loaderColor
 28 | }) => {
 29 |   const [formData, setFormData] = useState<FormData>({
 30 |     companyName: "",
 31 |     companyUrl: "",
 32 |     companyHq: "",
 33 |     companyIndustry: "",
 34 |   });
 35 |   
 36 |   // Animation states
 37 |   const [showExampleSuggestion, setShowExampleSuggestion] = useState(true);
 38 |   const [isExampleAnimating, setIsExampleAnimating] = useState(false);
 39 |   const [wasResearching, setWasResearching] = useState(false);
 40 |   
 41 |   // Refs for form fields for animation
 42 |   const formRef = useRef<HTMLDivElement>(null);
 43 |   const exampleRef = useRef<HTMLDivElement>(null);
 44 |   
 45 |   // Hide example suggestion when form is filled
 46 |   useEffect(() => {
 47 |     if (formData.companyName) {
 48 |       setShowExampleSuggestion(false);
 49 |     } else if (!isExampleAnimating) {
 50 |       setShowExampleSuggestion(true);
 51 |     }
 52 |   }, [formData.companyName, isExampleAnimating]);
 53 | 
 54 |   // Track research state changes to show example popup when research completes
 55 |   useEffect(() => {
 56 |     // If we were researching and now we're not, research just completed
 57 |     if (wasResearching && !isResearching) {
 58 |       // Add a slight delay to let animations complete
 59 |       setTimeout(() => {
 60 |         // Reset form fields to empty values
 61 |         setFormData({
 62 |           companyName: "",
 63 |           companyUrl: "",
 64 |           companyHq: "",
 65 |           companyIndustry: "",
 66 |         });
 67 |         
 68 |         // Show the example suggestion again
 69 |         setShowExampleSuggestion(true);
 70 |       }, 1000);
 71 |     }
 72 |     
 73 |     // Update tracking state
 74 |     setWasResearching(isResearching);
 75 |   }, [isResearching, wasResearching]);
 76 | 
 77 |   const handleSubmit = async (e: React.FormEvent) => {
 78 |     e.preventDefault();
 79 |     await onSubmit(formData);
 80 |   };
 81 |   
 82 |   const fillExampleData = (example: ExampleCompany) => {
 83 |     // Start animation
 84 |     setIsExampleAnimating(true);
 85 |     
 86 |     // Animate the suggestion moving into the form
 87 |     if (exampleRef.current && formRef.current) {
 88 |       const exampleRect = exampleRef.current.getBoundingClientRect();
 89 |       const formRect = formRef.current.getBoundingClientRect();
 90 |       
 91 |       // Calculate the distance to move
 92 |       const moveX = formRect.left + 20 - exampleRect.left;
 93 |       const moveY = formRect.top + 20 - exampleRect.top;
 94 |       
 95 |       // Apply animation
 96 |       exampleRef.current.style.transform = `translate(${moveX}px, ${moveY}px) scale(0.6)`;
 97 |       exampleRef.current.style.opacity = '0';
 98 |     }
 99 |     
100 |     // Fill in form data after a short delay for animation
101 |     setTimeout(() => {
102 |       const newFormData = {
103 |         companyName: example.name,
104 |         companyUrl: example.url,
105 |         companyHq: example.hq,
106 |         companyIndustry: example.industry
107 |       };
108 |       
109 |       // Update form data
110 |       setFormData(newFormData);
111 |       
112 |       // Start research automatically (only if not already researching)
113 |       if (!isResearching) {
114 |         onSubmit(newFormData);
115 |       }
116 |       
117 |       setIsExampleAnimating(false);
118 |     }, 500);
119 |   };
120 | 
121 |   return (
122 |     <div className="relative" ref={formRef}>
123 |       {/* Example Suggestion */}
124 |       <ExamplePopup 
125 |         visible={showExampleSuggestion}
126 |         onExampleSelect={fillExampleData}
127 |         glassStyle={glassStyle}
128 |         exampleRef={exampleRef}
129 |       />
130 | 
131 |       {/* Main Form */}
132 |       <div className={`${glassStyle.card} backdrop-blur-2xl bg-white/90 border-gray-200/50 shadow-xl`}>
133 |         <form onSubmit={handleSubmit} className="space-y-6">
134 |           <div className="grid grid-cols-1 md:grid-cols-2 gap-8">
135 |             {/* Company Name */}
136 |             <div className="relative group">
137 |               <label
138 |                 htmlFor="companyName"
139 |                 className="block text-base font-medium text-gray-700 mb-2.5 transition-all duration-200 group-hover:text-gray-900 font-['DM_Sans']"
140 |               >
141 |                 Company Name <span className="text-gray-900/70">*</span>
142 |               </label>
143 |               <div className="relative">
144 |                 <div className="absolute inset-0 bg-gradient-to-r from-gray-50/0 via-gray-100/50 to-gray-50/0 opacity-0 group-hover:opacity-100 transition-opacity duration-500 rounded-lg"></div>
145 |                 <Building2 className="absolute left-4 top-1/2 -translate-y-1/2 h-5 w-5 stroke-[#468BFF] transition-all duration-200 group-hover:stroke-[#8FBCFA] z-10" strokeWidth={1.5} />
146 |                 <input
147 |                   required
148 |                   id="companyName"
149 |                   type="text"
150 |                   value={formData.companyName}
151 |                   onChange={(e) =>
152 |                     setFormData((prev) => ({
153 |                       ...prev,
154 |                       companyName: e.target.value,
155 |                     }))
156 |                   }
157 |                   className={`${glassStyle.input} transition-all duration-300 focus:border-[#468BFF]/50 focus:ring-1 focus:ring-[#468BFF]/50 group-hover:border-[#468BFF]/30 bg-white/80 backdrop-blur-sm text-lg py-4 pl-12 font-['DM_Sans']`}
158 |                   placeholder="Enter company name"
159 |                 />
160 |               </div>
161 |             </div>
162 | 
163 |             {/* Company URL */}
164 |             <div className="relative group">
165 |               <label
166 |                 htmlFor="companyUrl"
167 |                 className="block text-base font-medium text-gray-700 mb-2.5 transition-all duration-200 group-hover:text-gray-900 font-['DM_Sans']"
168 |               >
169 |                 Company URL
170 |               </label>
171 |               <div className="relative">
172 |                 <div className="absolute inset-0 bg-gradient-to-r from-gray-50/0 via-gray-100/50 to-gray-50/0 opacity-0 group-hover:opacity-100 transition-opacity duration-500 rounded-lg"></div>
173 |                 <Globe className="absolute left-4 top-1/2 -translate-y-1/2 h-5 w-5 stroke-[#468BFF] transition-all duration-200 group-hover:stroke-[#8FBCFA] z-10" strokeWidth={1.5} />
174 |                 <input
175 |                   id="companyUrl"
176 |                   type="text"
177 |                   value={formData.companyUrl}
178 |                   onChange={(e) =>
179 |                     setFormData((prev) => ({
180 |                       ...prev,
181 |                       companyUrl: e.target.value,
182 |                     }))
183 |                   }
184 |                   className={`${glassStyle.input} transition-all duration-300 focus:border-[#468BFF]/50 focus:ring-1 focus:ring-[#468BFF]/50 group-hover:border-[#468BFF]/30 bg-white/80 backdrop-blur-sm text-lg py-4 pl-12 font-['DM_Sans']`}
185 |                   placeholder="example.com"
186 |                 />
187 |               </div>
188 |             </div>
189 | 
190 |             {/* Company HQ */}
191 |             <div className="relative group">
192 |               <label
193 |                 htmlFor="companyHq"
194 |                 className="block text-base font-medium text-gray-700 mb-2.5 transition-all duration-200 group-hover:text-gray-900 font-['DM_Sans']"
195 |               >
196 |                 Company HQ
197 |               </label>
198 |               <LocationInput
199 |                 value={formData.companyHq}
200 |                 onChange={(value) =>
201 |                   setFormData((prev) => ({
202 |                     ...prev,
203 |                     companyHq: value,
204 |                   }))
205 |                 }
206 |                 className={`${glassStyle.input} transition-all duration-300 focus:border-[#468BFF]/50 focus:ring-1 focus:ring-[#468BFF]/50 group-hover:border-[#468BFF]/30 bg-white/80 backdrop-blur-sm text-lg py-4 pl-12 font-['DM_Sans']`}
207 |               />
208 |             </div>
209 | 
210 |             {/* Company Industry */}
211 |             <div className="relative group">
212 |               <label
213 |                 htmlFor="companyIndustry"
214 |                 className="block text-base font-medium text-gray-700 mb-2.5 transition-all duration-200 group-hover:text-gray-900 font-['DM_Sans']"
215 |               >
216 |                 Company Industry
217 |               </label>
218 |               <div className="relative">
219 |                 <div className="absolute inset-0 bg-gradient-to-r from-gray-50/0 via-gray-100/50 to-gray-50/0 opacity-0 group-hover:opacity-100 transition-opacity duration-500 rounded-lg"></div>
220 |                 <Factory className="absolute left-4 top-1/2 -translate-y-1/2 h-5 w-5 stroke-[#468BFF] transition-all duration-200 group-hover:stroke-[#8FBCFA] z-10" strokeWidth={1.5} />
221 |                 <input
222 |                   id="companyIndustry"
223 |                   type="text"
224 |                   value={formData.companyIndustry}
225 |                   onChange={(e) =>
226 |                     setFormData((prev) => ({
227 |                       ...prev,
228 |                       companyIndustry: e.target.value,
229 |                     }))
230 |                   }
231 |                   className={`${glassStyle.input} transition-all duration-300 focus:border-[#468BFF]/50 focus:ring-1 focus:ring-[#468BFF]/50 group-hover:border-[#468BFF]/30 bg-white/80 backdrop-blur-sm text-lg py-4 pl-12 font-['DM_Sans']`}
232 |                   placeholder="e.g. Technology, Healthcare"
233 |                 />
234 |               </div>
235 |             </div>
236 |           </div>
237 | 
238 |           <button
239 |             type="submit"
240 |             disabled={isResearching || !formData.companyName}
241 |             className="relative group w-fit mx-auto block overflow-hidden rounded-lg bg-white/80 backdrop-blur-sm border border-gray-200 transition-all duration-500 hover:bg-gray-50 disabled:opacity-50 disabled:cursor-not-allowed px-12 font-['DM_Sans']"
242 |           >
243 |             <div className="absolute inset-0 bg-gradient-to-r from-gray-50/0 via-gray-100/50 to-gray-50/0 translate-x-[-100%] group-hover:translate-x-[100%] transition-transform duration-1000"></div>
244 |             <div className="relative flex items-center justify-center py-3.5">
245 |               {isResearching ? (
246 |                 <>
247 |                   <Loader2 className="animate-spin -ml-1 mr-2 h-5 w-5 loader-icon" style={{ stroke: loaderColor }} />
248 |                   <span className="text-base font-medium text-gray-900/90">Researching...</span>
249 |                 </>
250 |               ) : (
251 |                 <>
252 |                   <Search className="-ml-1 mr-2 h-5 w-5 text-gray-900/90" />
253 |                   <span className="text-base font-medium text-gray-900/90">Start Research</span>
254 |                 </>
255 |               )}
256 |             </div>
257 |           </button>
258 |         </form>
259 |       </div>
260 |     </div>
261 |   );
262 | };
263 | 
264 | export default ResearchForm; 


--------------------------------------------------------------------------------
/ui/src/components/ResearchQueries.tsx:
--------------------------------------------------------------------------------
 1 | import React from 'react';
 2 | import { ChevronDown, ChevronUp } from 'lucide-react';
 3 | import { ResearchQueriesProps } from '../types';
 4 | 
 5 | const ResearchQueries: React.FC<ResearchQueriesProps> = ({
 6 |   queries,
 7 |   streamingQueries,
 8 |   isExpanded,
 9 |   onToggleExpand,
10 |   isResetting,
11 |   glassStyle
12 | }) => {
13 |   const glassCardStyle = `${glassStyle} rounded-2xl p-6`;
14 |   const fadeInAnimation = "transition-all duration-300 ease-in-out";
15 | 
16 |   return (
17 |     <div 
18 |       className={`${glassCardStyle} ${fadeInAnimation} ${isResetting ? 'opacity-0 transform -translate-y-4' : 'opacity-100 transform translate-y-0'} font-['DM_Sans']`}
19 |     >
20 |       <div 
21 |         className="flex items-center justify-between cursor-pointer"
22 |         onClick={onToggleExpand}
23 |       >
24 |         <h2 className="text-xl font-semibold text-gray-900">
25 |           Generated Research Queries
26 |         </h2>
27 |         <button className="text-gray-600 hover:text-gray-900 transition-colors">
28 |           {isExpanded ? (
29 |             <ChevronUp className="h-6 w-6" />
30 |           ) : (
31 |             <ChevronDown className="h-6 w-6" />
32 |           )}
33 |         </button>
34 |       </div>
35 |       
36 |       <div className={`overflow-hidden transition-all duration-500 ease-in-out ${
37 |         isExpanded ? 'mt-4 max-h-[1000px] opacity-100' : 'max-h-0 opacity-0'
38 |       }`}>
39 |         <div className="grid grid-cols-2 gap-4">
40 |           {['company', 'industry', 'financial', 'news'].map((category) => (
41 |             <div key={category} className={`${glassStyle} rounded-xl p-3`}>
42 |               <h3 className="text-base font-medium text-gray-900 mb-3 capitalize">
43 |                 {category.charAt(0).toUpperCase() + category.slice(1)} Queries
44 |               </h3>
45 |               <div className="space-y-2">
46 |                 {/* Show streaming queries first */}
47 |                 {Object.entries(streamingQueries)
48 |                   .filter(([key]) => key.startsWith(category))
49 |                   .map(([key, query]) => (
50 |                     <div key={key} className="backdrop-filter backdrop-blur-lg bg-white/80 border border-[#468BFF]/30 rounded-lg p-2">
51 |                       <span className="text-gray-600">{query.text}</span>
52 |                       <span className="animate-pulse ml-1 text-[#8FBCFA]">|</span>
53 |                     </div>
54 |                   ))}
55 |                 {/* Then show completed queries */}
56 |                 {queries
57 |                   .filter((q) => q.category.startsWith(category))
58 |                   .map((query, idx) => (
59 |                     <div key={idx} className="backdrop-filter backdrop-blur-lg bg-white/80 border border-gray-200 rounded-lg p-2">
60 |                       <span className="text-gray-600">{query.text}</span>
61 |                     </div>
62 |                   ))}
63 |               </div>
64 |             </div>
65 |           ))}
66 |         </div>
67 |       </div>
68 |       
69 |       {!isExpanded && (
70 |         <div className="mt-2 text-sm text-gray-600">
71 |           {queries.length} queries generated across {['company', 'industry', 'financial', 'news'].length} categories
72 |         </div>
73 |       )}
74 |     </div>
75 |   );
76 | };
77 | 
78 | export default ResearchQueries; 


--------------------------------------------------------------------------------
/ui/src/components/ResearchReport.tsx:
--------------------------------------------------------------------------------
  1 | import React from 'react';
  2 | import ReactMarkdown from "react-markdown";
  3 | import rehypeRaw from 'rehype-raw';
  4 | import remarkGfm from 'remark-gfm';
  5 | import { Check, Copy, Download, Loader2 } from 'lucide-react';
  6 | import { GlassStyle, AnimationStyle } from '../types';
  7 | 
  8 | interface ResearchReportProps {
  9 |   output: {
 10 |     summary: string;
 11 |     details: {
 12 |       report: string;
 13 |     };
 14 |   } | null;
 15 |   isResetting: boolean;
 16 |   glassStyle: GlassStyle;
 17 |   fadeInAnimation: AnimationStyle;
 18 |   loaderColor: string;
 19 |   isGeneratingPdf: boolean;
 20 |   isCopied: boolean;
 21 |   onCopyToClipboard: () => void;
 22 |   onGeneratePdf: () => void;
 23 | }
 24 | 
 25 | const ResearchReport: React.FC<ResearchReportProps> = ({
 26 |   output,
 27 |   isResetting,
 28 |   glassStyle,
 29 |   fadeInAnimation,
 30 |   loaderColor,
 31 |   isGeneratingPdf,
 32 |   isCopied,
 33 |   onCopyToClipboard,
 34 |   onGeneratePdf
 35 | }) => {
 36 |   if (!output || !output.details) return null;
 37 | 
 38 |   return (
 39 |     <div 
 40 |       className={`${glassStyle.card} ${fadeInAnimation.fadeIn} ${isResetting ? 'opacity-0 transform -translate-y-4' : 'opacity-100 transform translate-y-0'} font-['DM_Sans']`}
 41 |     >
 42 |       <div className="flex justify-end gap-2 mb-4">
 43 |         {output?.details?.report && (
 44 |           <>
 45 |             <button
 46 |               onClick={onCopyToClipboard}
 47 |               className="inline-flex items-center justify-center px-4 py-2 rounded-lg bg-[#468BFF] text-white hover:bg-[#8FBCFA] transition-all duration-200"
 48 |             >
 49 |               {isCopied ? (
 50 |                 <Check className="h-5 w-5" />
 51 |               ) : (
 52 |                 <Copy className="h-5 w-5" />
 53 |               )}
 54 |             </button>
 55 |             <button
 56 |               onClick={onGeneratePdf}
 57 |               disabled={isGeneratingPdf}
 58 |               className="inline-flex items-center justify-center px-4 py-2 rounded-lg bg-[#FFB800] text-white hover:bg-[#FFA800] transition-all duration-200 disabled:opacity-50 disabled:cursor-not-allowed"
 59 |             >
 60 |               {isGeneratingPdf ? (
 61 |                 <>
 62 |                   <Loader2 className="animate-spin h-5 w-5 mr-2" style={{ stroke: loaderColor }} />
 63 |                   Generating PDF...
 64 |                 </>
 65 |               ) : (
 66 |                 <>
 67 |                   <Download className="h-5 w-5" />
 68 |                   <span className="ml-2">PDF</span>
 69 |                 </>
 70 |               )}
 71 |             </button>
 72 |           </>
 73 |         )}
 74 |       </div>
 75 |       <div className="prose prose-invert prose-lg max-w-none">
 76 |         <div className="mt-4">
 77 |           <ReactMarkdown
 78 |             rehypePlugins={[rehypeRaw]}
 79 |             remarkPlugins={[remarkGfm]}
 80 |             components={{
 81 |               div: ({node, ...props}) => (
 82 |                 <div className="space-y-4 text-gray-800" {...props} />
 83 |               ),
 84 |               h1: ({node, children, ...props}) => {
 85 |                 const text = String(children);
 86 |                 const isFirstH1 = text.includes("Research Report");
 87 |                 const isReferences = text.includes("References");
 88 |                 return (
 89 |                   <div>
 90 |                     <h1 
 91 |                       className={`font-bold text-gray-900 break-words whitespace-pre-wrap ${isFirstH1 ? 'text-5xl mb-10 mt-4 max-w-[calc(100%-8rem)]' : 'text-3xl mb-6'}`} 
 92 |                       {...props} 
 93 |                     >
 94 |                       {children}
 95 |                     </h1>
 96 |                     {isReferences && (
 97 |                       <div className="h-[1px] w-full bg-gradient-to-r from-transparent via-gray-300 to-transparent my-8"></div>
 98 |                     )}
 99 |                   </div>
100 |                 );
101 |               },
102 |               h2: ({node, ...props}) => (
103 |                 <h2 className="text-3xl font-bold text-gray-900 first:mt-2 mt-8 mb-4" {...props} />
104 |               ),
105 |               h3: ({node, ...props}) => (
106 |                 <h3 className="text-xl font-semibold text-gray-900 mt-6 mb-3" {...props} />
107 |               ),
108 |               p: ({node, children, ...props}) => {
109 |                 const text = String(children);
110 |                 const isSubsectionHeader = (
111 |                   text.includes('\n') === false && 
112 |                   text.length < 50 && 
113 |                   (text.endsWith(':') || /^[A-Z][A-Za-z\s\/]+$/.test(text))
114 |                 );
115 |                 
116 |                 if (isSubsectionHeader) {
117 |                   return (
118 |                     <h3 className="text-xl font-semibold text-gray-900 mt-6 mb-3">
119 |                       {text.endsWith(':') ? text.slice(0, -1) : text}
120 |                     </h3>
121 |                   );
122 |                 }
123 |                 
124 |                 const isBulletLabel = text.startsWith('•') && text.includes(':');
125 |                 if (isBulletLabel) {
126 |                   const [label, content] = text.split(':');
127 |                   return (
128 |                     <div className="text-gray-800 my-2">
129 |                       <span className="font-semibold text-gray-900">
130 |                         {label.replace('•', '').trim()}:
131 |                       </span>
132 |                       {content}
133 |                     </div>
134 |                   );
135 |                 }
136 |                 
137 |                 const urlRegex = /(https?:\/\/[^\s<>"]+)/g;
138 |                 if (urlRegex.test(text)) {
139 |                   const parts = text.split(urlRegex);
140 |                   return (
141 |                     <p className="text-gray-800 my-2" {...props}>
142 |                       {parts.map((part, i) => 
143 |                         urlRegex.test(part) ? (
144 |                           <a 
145 |                             key={i}
146 |                             href={part}
147 |                             className="text-[#468BFF] hover:text-[#8FBCFA] underline decoration-[#468BFF] hover:decoration-[#8FBCFA] cursor-pointer transition-colors"
148 |                             target="_blank"
149 |                             rel="noopener noreferrer"
150 |                           >
151 |                             {part}
152 |                           </a>
153 |                         ) : part
154 |                       )}
155 |                     </p>
156 |                   );
157 |                 }
158 |                 
159 |                 return <p className="text-gray-800 my-2" {...props}>{children}</p>;
160 |               },
161 |               ul: ({node, ...props}) => (
162 |                 <ul className="text-gray-800 space-y-1 list-disc pl-6" {...props} />
163 |               ),
164 |               li: ({node, ...props}) => (
165 |                 <li className="text-gray-800" {...props} />
166 |               ),
167 |               a: ({node, href, ...props}) => (
168 |                 <a 
169 |                   href={href}
170 |                   className="text-[#468BFF] hover:text-[#8FBCFA] underline decoration-[#468BFF] hover:decoration-[#8FBCFA] cursor-pointer transition-colors" 
171 |                   target="_blank"
172 |                   rel="noopener noreferrer"
173 |                   {...props} 
174 |                 />
175 |               ),
176 |             }}
177 |           >
178 |             {output.details.report || "No report available"}
179 |           </ReactMarkdown>
180 |         </div>
181 |       </div>
182 |     </div>
183 |   );
184 | };
185 | 
186 | export default ResearchReport; 


--------------------------------------------------------------------------------
/ui/src/components/ResearchStatus.tsx:
--------------------------------------------------------------------------------
 1 | import React from 'react';
 2 | import { Loader2, CheckCircle2, XCircle } from 'lucide-react';
 3 | import { ResearchStatusProps } from '../types';
 4 | 
 5 | const ResearchStatus: React.FC<ResearchStatusProps> = ({
 6 |   status,
 7 |   error,
 8 |   isComplete,
 9 |   currentPhase,
10 |   isResetting,
11 |   glassStyle,
12 |   loaderColor,
13 |   statusRef
14 | }) => {
15 |   const glassCardStyle = `${glassStyle.base} rounded-2xl p-6`;
16 |   const fadeInAnimation = "transition-all duration-300 ease-in-out";
17 | 
18 |   if (!status) return null;
19 | 
20 |   return (
21 |     <div 
22 |       ref={statusRef} 
23 |       className={`${glassCardStyle} ${fadeInAnimation} ${isResetting ? 'opacity-0 transform -translate-y-4' : 'opacity-100 transform translate-y-0'} bg-white/80 backdrop-blur-sm border-gray-200 font-['DM_Sans']`}
24 |     >
25 |       <div className="flex items-center space-x-4">
26 |         <div className="flex-shrink-0">
27 |           {error ? (
28 |             <div className={`${glassStyle.base} p-2 rounded-full bg-[#FE363B]/10 border-[#FE363B]/20`}>
29 |               <XCircle className="h-5 w-5 text-[#FE363B]" />
30 |             </div>
31 |           ) : status?.step === "Complete" || isComplete ? (
32 |             <div className={`${glassStyle.base} p-2 rounded-full bg-[#22C55E]/10 border-[#22C55E]/20`}>
33 |               <CheckCircle2 className="h-5 w-5 text-[#22C55E]" />
34 |             </div>
35 |           ) : currentPhase === 'search' || currentPhase === 'enrichment' || (status?.step === "Processing" && status.message.includes("scraping")) ? (
36 |             <div className={`${glassStyle.base} p-2 rounded-full bg-[#468BFF]/10 border-[#468BFF]/20`}>
37 |               <Loader2 className="h-5 w-5 animate-spin loader-icon" style={{ stroke: loaderColor }} />
38 |             </div>
39 |           ) : currentPhase === 'briefing' ? (
40 |             <div className={`${glassStyle.base} p-2 rounded-full bg-[#468BFF]/10 border-[#468BFF]/20`}>
41 |               <Loader2 className="h-5 w-5 animate-spin loader-icon" style={{ stroke: loaderColor }} />
42 |             </div>
43 |           ) : (
44 |             <div className={`${glassStyle.base} p-2 rounded-full bg-[#468BFF]/10 border-[#468BFF]/20`}>
45 |               <Loader2 className="h-5 w-5 animate-spin loader-icon" style={{ stroke: loaderColor }} />
46 |             </div>
47 |           )}
48 |         </div>
49 |         <div className="flex-1">
50 |           <p className="font-medium text-gray-900/90">{status.step}</p>
51 |           <p className="text-sm text-gray-600 whitespace-pre-wrap">
52 |             {error || status.message}
53 |           </p>
54 |         </div>
55 |       </div>
56 |     </div>
57 |   );
58 | };
59 | 
60 | export default ResearchStatus; 


--------------------------------------------------------------------------------
/ui/src/components/index.ts:
--------------------------------------------------------------------------------
1 | // Export all components
2 | export { default as Header } from './Header';
3 | export { default as LocationInput } from './LocationInput';
4 | export { default as ResearchStatus } from './ResearchStatus';
5 | export { default as ResearchReport } from './ResearchReport';
6 | export { default as ResearchForm } from './ResearchForm'; 


--------------------------------------------------------------------------------
/ui/src/env.d.ts:
--------------------------------------------------------------------------------
 1 | /// <reference types="vite/client" />
 2 | 
 3 | interface ImportMetaEnv {
 4 |   readonly VITE_API_URL: string;
 5 |   readonly VITE_WS_URL: string;
 6 |   readonly MODE: string;
 7 |   readonly DEV: boolean;
 8 |   readonly PROD: boolean;
 9 | }
10 | 
11 | interface ImportMeta {
12 |   readonly env: ImportMetaEnv;
13 | }
14 | 


--------------------------------------------------------------------------------
/ui/src/index.css:
--------------------------------------------------------------------------------
 1 | @import url('https://fonts.googleapis.com/css2?family=DM+Sans:opsz,wght@9..40,400;9..40,500;9..40,600;9..40,700&display=swap');
 2 | 
 3 | @tailwind base;
 4 | @tailwind components;
 5 | @tailwind utilities;
 6 | 
 7 | @layer base {
 8 |   body {
 9 |     @apply bg-gray-900;
10 |     font-family: "DM Sans", sans-serif;
11 |     -webkit-font-smoothing: antialiased;
12 |     text-rendering: optimizeLegibility;
13 |     background-image: radial-gradient(circle at 1px 1px, rgba(255, 255, 255, 0.05) 1px, transparent 0);
14 |     background-size: 24px 24px;
15 |     background-position: center center;
16 |   }
17 | 
18 |   h1 {
19 |     font-family: "DM Sans", sans-serif;
20 |     font-size: 48px;
21 |     font-style: normal;
22 |     font-variation-settings: normal;
23 |     font-weight: 500;
24 |     letter-spacing: -1px;
25 |     text-rendering: optimizeLegibility;
26 |     unicode-bidi: isolate;
27 |     -webkit-font-smoothing: antialiased;
28 |   }
29 | 
30 |   h2, h3, h4, h5, h6 {
31 |     font-family: "DM Sans", sans-serif;
32 |     font-weight: 500;
33 |     letter-spacing: -0.5px;
34 |   }
35 | 
36 |   p, span, div, li, a {
37 |     font-family: "DM Sans", sans-serif;
38 |     font-weight: 400;
39 |   }
40 | 
41 |   input {
42 |     font-family: "DM Sans", sans-serif;
43 |   }
44 | 
45 |   button {
46 |     font-family: "DM Sans", sans-serif;
47 |   }
48 | 
49 |   select {
50 |     font-family: "DM Sans", sans-serif;
51 |   }
52 | }
53 | 
54 | @layer components {
55 |   .glass {
56 |     @apply bg-gray-900/40 backdrop-blur-md border border-gray-700/50;
57 |   }
58 | }


--------------------------------------------------------------------------------
/ui/src/main.tsx:
--------------------------------------------------------------------------------
 1 | import { StrictMode } from 'react';
 2 | import { createRoot } from 'react-dom/client';
 3 | import App from './App.tsx';
 4 | import './index.css';
 5 | 
 6 | createRoot(document.getElementById('root')!).render(
 7 |   <StrictMode>
 8 |     <App />
 9 |   </StrictMode>
10 | );
11 | 


--------------------------------------------------------------------------------
/ui/src/styles/index.ts:
--------------------------------------------------------------------------------
 1 | export const colorAnimation = `
 2 | @keyframes colorTransition {
 3 |   0% { stroke: #468BFF; }
 4 |   15% { stroke: #8FBCFA; }
 5 |   30% { stroke: #468BFF; }
 6 |   45% { stroke: #FE363B; }
 7 |   60% { stroke: #FF9A9D; }
 8 |   75% { stroke: #FDBB11; }
 9 |   90% { stroke: #F6D785; }
10 |   100% { stroke: #468BFF; }
11 | }
12 | 
13 | .animate-colors {
14 |   animation: colorTransition 8s ease-in-out infinite;
15 |   animation-fill-mode: forwards;
16 | }
17 | 
18 | .animate-spin {
19 |   animation: spin 1s linear infinite;
20 | }
21 | 
22 | @keyframes spin {
23 |   from {
24 |     transform: rotate(0deg);
25 |   }
26 |   to {
27 |     transform: rotate(360deg);
28 |   }
29 | }
30 | 
31 | /* Add transition for smoother color changes */
32 | .loader-icon {
33 |   transition: stroke 1s ease-in-out;
34 | }
35 | `;
36 | 
37 | export const dmSansStyle = `
38 |   @import url('https://fonts.googleapis.com/css2?family=DM+Sans:opsz,wght@9..40,400;9..40,500;9..40,600;9..40,700&display=swap');
39 |   
40 |   /* Apply DM Sans globally */
41 |   body {
42 |     font-family: 'DM Sans', sans-serif;
43 |   }
44 | `;
45 | 
46 | export const glassStyle = {
47 |   base: "backdrop-filter backdrop-blur-lg bg-white/80 border border-gray-200 shadow-xl",
48 |   card: "backdrop-filter backdrop-blur-lg bg-white/80 border border-gray-200 shadow-xl rounded-2xl p-6",
49 |   input: "backdrop-filter backdrop-blur-lg bg-white/80 border border-gray-200 shadow-xl pl-10 w-full rounded-lg py-3 px-4 text-gray-900 focus:border-[#468BFF]/50 focus:outline-none focus:ring-1 focus:ring-[#468BFF]/50 placeholder-gray-400 bg-white/80 shadow-none"
50 | };
51 | 
52 | export const fadeInAnimation = {
53 |   fadeIn: "transition-all duration-300 ease-in-out",
54 |   writing: "animate-pulse",
55 |   colorTransition: colorAnimation
56 | }; 


--------------------------------------------------------------------------------
/ui/src/types/index.ts:
--------------------------------------------------------------------------------
 1 | export type ResearchStatusType = {
 2 |   step: string;
 3 |   message: string;
 4 | };
 5 | 
 6 | export type ResearchOutput = {
 7 |   summary: string;
 8 |   details: {
 9 |     report: string;
10 |   };
11 | };
12 | 
13 | export type DocCount = {
14 |   initial: number;
15 |   kept: number;
16 | };
17 | 
18 | export type DocCounts = {
19 |   [key: string]: DocCount;
20 | };
21 | 
22 | export type EnrichmentCounts = {
23 |   company: { total: number; enriched: number };
24 |   industry: { total: number; enriched: number };
25 |   financial: { total: number; enriched: number };
26 |   news: { total: number; enriched: number };
27 | };
28 | 
29 | export type ResearchState = {
30 |   status: string;
31 |   message: string;
32 |   queries: Array<{
33 |     text: string;
34 |     number: number;
35 |     category: string;
36 |   }>;
37 |   streamingQueries: {
38 |     [key: string]: {
39 |       text: string;
40 |       number: number;
41 |       category: string;
42 |       isComplete: boolean;
43 |     };
44 |   };
45 |   briefingStatus: {
46 |     company: boolean;
47 |     industry: boolean;
48 |     financial: boolean;
49 |     news: boolean;
50 |   };
51 |   enrichmentCounts?: EnrichmentCounts;
52 |   docCounts?: DocCounts;
53 | };
54 | 
55 | export type GlassStyle = {
56 |   base: string;
57 |   card: string;
58 |   input: string;
59 | };
60 | 
61 | export type AnimationStyle = {
62 |   fadeIn: string;
63 |   writing: string;
64 |   colorTransition: string;
65 | };
66 | 
67 | export type ResearchStatusProps = {
68 |   status: ResearchStatusType | null;
69 |   error: string | null;
70 |   isComplete: boolean;
71 |   currentPhase: 'search' | 'enrichment' | 'briefing' | 'complete' | null;
72 |   isResetting: boolean;
73 |   glassStyle: GlassStyle;
74 |   loaderColor: string;
75 |   statusRef: React.RefObject<HTMLDivElement>;
76 | }; 


--------------------------------------------------------------------------------
/ui/src/utils/constants.ts:
--------------------------------------------------------------------------------
 1 | // API and WebSocket URLs
 2 | export const API_URL = import.meta.env.VITE_API_URL;
 3 | export const WS_URL = import.meta.env.VITE_WS_URL;
 4 | 
 5 | // WebSocket Configuration
 6 | export const MAX_RECONNECT_ATTEMPTS = 3;
 7 | export const RECONNECT_DELAY = 2000; // 2 seconds
 8 | 
 9 | // Animation Styles
10 | export const writingAnimation = `
11 | @keyframes writing {
12 |   0% {
13 |     stroke-dashoffset: 1000;
14 |   }
15 |   100% {
16 |     stroke-dashoffset: 0;
17 |   }
18 | }
19 | 
20 | .animate-writing {
21 |   animation: writing 1.5s linear infinite;
22 | }
23 | `;
24 | 
25 | export const colorAnimation = `
26 | @keyframes colorTransition {
27 |   0% { stroke: #468BFF; }
28 |   15% { stroke: #8FBCFA; }
29 |   30% { stroke: #468BFF; }
30 |   45% { stroke: #FE363B; }
31 |   60% { stroke: #FF9A9D; }
32 |   75% { stroke: #FDBB11; }
33 |   90% { stroke: #F6D785; }
34 |   100% { stroke: #468BFF; }
35 | }
36 | 
37 | .animate-colors {
38 |   animation: colorTransition 8s ease-in-out infinite;
39 |   animation-fill-mode: forwards;
40 | }
41 | 
42 | .animate-spin {
43 |   animation: spin 1s linear infinite;
44 | }
45 | 
46 | @keyframes spin {
47 |   from {
48 |     transform: rotate(0deg);
49 |   }
50 |   to {
51 |     transform: rotate(360deg);
52 |   }
53 | }
54 | 
55 | /* Add transition for smoother color changes */
56 | .loader-icon {
57 |   transition: stroke 1s ease-in-out;
58 | }
59 | `;
60 | 
61 | export const dmSansStyle = `
62 |   @import url('https://fonts.googleapis.com/css2?family=DM+Sans:opsz,wght@9..40,400;9..40,500;9..40,600;9..40,700&display=swap');
63 |   
64 |   /* Apply DM Sans globally */
65 |   body {
66 |     font-family: 'DM Sans', sans-serif;
67 |   }
68 | `;
69 | 
70 | // Color Palette
71 | export const colors = {
72 |   primary: {
73 |     blue: "#468BFF",
74 |     lightBlue: "#8FBCFA",
75 |     red: "#FE363B",
76 |     lightRed: "#FF9A9D",
77 |     yellow: "#FDBB11",
78 |     lightYellow: "#F6D785"
79 |   }
80 | };
81 | 
82 | // Animation Durations
83 | export const ANIMATION_DURATIONS = {
84 |   reset: 300,
85 |   collapse: 1000,
86 |   briefingCollapse: 2000
87 | }; 


--------------------------------------------------------------------------------
/ui/src/utils/handlers.ts:
--------------------------------------------------------------------------------
  1 | import { API_URL } from './constants';
  2 | import { ResearchOutput, ResearchState, ResearchStatusType } from '../types';
  3 | 
  4 | export const handleGeneratePdf = async (
  5 |   output: ResearchOutput | null,
  6 |   originalCompanyName: string,
  7 |   setIsGeneratingPdf: (value: boolean) => void,
  8 |   setError: (error: string | null) => void,
  9 |   isGeneratingPdf: boolean
 10 | ) => {
 11 |   if (!output || isGeneratingPdf) return;
 12 |   
 13 |   setIsGeneratingPdf(true);
 14 |   try {
 15 |     console.log("Generating PDF with company name:", originalCompanyName);
 16 |     const response = await fetch(`${API_URL}/generate-pdf`, {
 17 |       method: 'POST',
 18 |       headers: {
 19 |         'Content-Type': 'application/json',
 20 |       },
 21 |       body: JSON.stringify({
 22 |         report_content: output.details.report,
 23 |         company_name: originalCompanyName || 'research_report'
 24 |       }),
 25 |     });
 26 |     
 27 |     if (!response.ok) {
 28 |       throw new Error('Failed to generate PDF');
 29 |     }
 30 |     
 31 |     // Get the blob from the response
 32 |     const blob = await response.blob();
 33 |     
 34 |     // Create a URL for the blob
 35 |     const url = window.URL.createObjectURL(blob);
 36 |     
 37 |     // Create a temporary link element
 38 |     const link = document.createElement('a');
 39 |     link.href = url;
 40 |     link.download = `${originalCompanyName || 'research_report'}.pdf`;
 41 |     
 42 |     // Append to body, click, and remove
 43 |     document.body.appendChild(link);
 44 |     link.click();
 45 |     document.body.removeChild(link);
 46 |     
 47 |     // Clean up the URL
 48 |     window.URL.revokeObjectURL(url);
 49 |     
 50 |   } catch (error) {
 51 |     console.error('Error generating PDF:', error);
 52 |     setError(error instanceof Error ? error.message : 'Failed to generate PDF');
 53 |   } finally {
 54 |     setIsGeneratingPdf(false);
 55 |   }
 56 | };
 57 | 
 58 | export const handleCopyToClipboard = async (
 59 |   output: ResearchOutput | null,
 60 |   setIsCopied: (value: boolean) => void,
 61 |   setError: (error: string | null) => void
 62 | ) => {
 63 |   if (!output?.details?.report) return;
 64 |   
 65 |   try {
 66 |     await navigator.clipboard.writeText(output.details.report);
 67 |     setIsCopied(true);
 68 |     setTimeout(() => setIsCopied(false), 2000); // Reset after 2 seconds
 69 |   } catch (err) {
 70 |     console.error('Failed to copy text: ', err);
 71 |     setError('Failed to copy to clipboard');
 72 |   }
 73 | };
 74 | 
 75 | export const checkForFinalReport = async (
 76 |   jobId: string,
 77 |   setOutput: (output: ResearchOutput | null) => void,
 78 |   setStatus: (status: ResearchStatusType | null) => void,
 79 |   setIsComplete: (value: boolean) => void,
 80 |   setIsResearching: (value: boolean) => void,
 81 |   setCurrentPhase: (phase: 'search' | 'enrichment' | 'briefing' | 'complete' | null) => void,
 82 |   setHasFinalReport: (value: boolean) => void,
 83 |   pollingIntervalRef: React.MutableRefObject<NodeJS.Timeout | null>
 84 | ) => {
 85 |   try {
 86 |     const response = await fetch(`${API_URL}/research/status/${jobId}`);
 87 |     if (!response.ok) throw new Error('Failed to fetch status');
 88 |     
 89 |     const data = await response.json();
 90 |     
 91 |     if (data.status === "completed" && data.result?.report) {
 92 |       setOutput({
 93 |         summary: "",
 94 |         details: {
 95 |           report: data.result.report,
 96 |         },
 97 |       });
 98 |       setStatus({
 99 |         step: "Complete",
100 |         message: "Research completed successfully"
101 |       });
102 |       setIsComplete(true);
103 |       setIsResearching(false);
104 |       setCurrentPhase('complete');
105 |       setHasFinalReport(true);
106 |       
107 |       // Clear polling interval
108 |       if (pollingIntervalRef.current) {
109 |         clearInterval(pollingIntervalRef.current);
110 |         pollingIntervalRef.current = null;
111 |       }
112 |     }
113 |   } catch (error) {
114 |     console.error('Error checking final report:', error);
115 |   }
116 | };
117 | 
118 | export const resetResearch = (
119 |   setStatus: (status: ResearchStatusType | null) => void,
120 |   setOutput: (output: ResearchOutput | null) => void,
121 |   setError: (error: string | null) => void,
122 |   setIsComplete: (value: boolean) => void,
123 |   setResearchState: (state: ResearchState) => void,
124 |   setPdfUrl: (url: string | null) => void,
125 |   setCurrentPhase: (phase: 'search' | 'enrichment' | 'briefing' | 'complete' | null) => void,
126 |   setIsSearchPhase: (value: boolean) => void,
127 |   setShouldShowQueries: (value: boolean) => void,
128 |   setIsQueriesExpanded: (value: boolean) => void,
129 |   setIsBriefingExpanded: (value: boolean) => void,
130 |   setIsEnrichmentExpanded: (value: boolean) => void,
131 |   setIsResetting: (value: boolean) => void,
132 |   setHasScrolledToStatus: (value: boolean) => void
133 | ) => {
134 |   setIsResetting(true);
135 |   
136 |   // Use setTimeout to create a smooth transition
137 |   setTimeout(() => {
138 |     setStatus(null);
139 |     setOutput(null);
140 |     setError(null);
141 |     setIsComplete(false);
142 |     setResearchState({
143 |       status: "idle",
144 |       message: "",
145 |       queries: [],
146 |       streamingQueries: {},
147 |       briefingStatus: {
148 |         company: false,
149 |         industry: false,
150 |         financial: false,
151 |         news: false
152 |       }
153 |     });
154 |     setPdfUrl(null);
155 |     setCurrentPhase(null);
156 |     setIsSearchPhase(false);
157 |     setShouldShowQueries(false);
158 |     setIsQueriesExpanded(true);
159 |     setIsBriefingExpanded(true);
160 |     setIsEnrichmentExpanded(true);
161 |     setIsResetting(false);
162 |     setHasScrolledToStatus(false); // Reset scroll flag when resetting research
163 |   }, 300); // Match this with CSS transition duration
164 | }; 


--------------------------------------------------------------------------------
/ui/src/vite-env.d.ts:
--------------------------------------------------------------------------------
1 | /// <reference types="vite/client" />
2 | 


--------------------------------------------------------------------------------
/ui/tailwind.config.js:
--------------------------------------------------------------------------------
1 | /** @type {import('tailwindcss').Config} */
2 | export default {
3 |   content: ["./index.html", "./src/**/*.{js,ts,jsx,tsx}"],
4 |   theme: {
5 |     extend: {},
6 |   },
7 |   plugins: [],
8 | };
9 | 


--------------------------------------------------------------------------------
/ui/tsconfig.app.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "compilerOptions": {
 3 |     "target": "ES2020",
 4 |     "useDefineForClassFields": true,
 5 |     "lib": ["ES2020", "DOM", "DOM.Iterable"],
 6 |     "module": "ESNext",
 7 |     "skipLibCheck": true,
 8 | 
 9 |     /* Bundler mode */
10 |     "moduleResolution": "bundler",
11 |     "allowImportingTsExtensions": true,
12 |     "isolatedModules": true,
13 |     "moduleDetection": "force",
14 |     "noEmit": true,
15 |     "jsx": "react-jsx",
16 | 
17 |     /* Linting */
18 |     "strict": true,
19 |     "noUnusedLocals": true,
20 |     "noUnusedParameters": true,
21 |     "noFallthroughCasesInSwitch": true
22 |   },
23 |   "include": ["src"]
24 | }
25 | 


--------------------------------------------------------------------------------
/ui/tsconfig.json:
--------------------------------------------------------------------------------
1 | {
2 |   "files": [],
3 |   "references": [
4 |     { "path": "./tsconfig.app.json" },
5 |     { "path": "./tsconfig.node.json" }
6 |   ]
7 | }
8 | 


--------------------------------------------------------------------------------
/ui/tsconfig.node.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "compilerOptions": {
 3 |     "composite": true,
 4 |     "skipLibCheck": true,
 5 |     "module": "ESNext",
 6 |     "moduleResolution": "bundler",
 7 |     "allowSyntheticDefaultImports": true
 8 |   },
 9 |   "include": ["vite.config.ts"]
10 | }
11 | 


--------------------------------------------------------------------------------
/ui/vercel.json:
--------------------------------------------------------------------------------
1 | {
2 |   "rewrites": [
3 |     {
4 |       "source": "/(.*)",
5 |       "destination": "/index.html"
6 |     }
7 |   ]
8 | }
9 | 


--------------------------------------------------------------------------------
/ui/vite.config.ts:
--------------------------------------------------------------------------------
 1 | import { defineConfig } from "vite";
 2 | import react from "@vitejs/plugin-react";
 3 | 
 4 | // https://vitejs.dev/config/
 5 | export default defineConfig({
 6 |   plugins: [react()],
 7 |   optimizeDeps: {
 8 |     exclude: ["lucide-react"],
 9 |   },
10 |   build: {
11 |     outDir: "dist",
12 |     sourcemap: true,
13 |   },
14 |   server: {
15 |     port: 5174,
16 |     strictPort: true,
17 |     host: true,
18 |     proxy: {
19 |       '/api': {
20 |         target: 'http://tavily-company-research.eba-h6x8kkzc.us-east-1.elasticbeanstalk.com',
21 |         changeOrigin: true,
22 |         secure: false,
23 |         rewrite: (path) => path.replace(/^\/api/, '')
24 |       }
25 |     }
26 |   },
27 | });
28 | 


--------------------------------------------------------------------------------