├── .env.example
├── .gitignore
├── LICENSE.md
├── README.md
├── README_KR.md
├── case1
    ├── auto_mcp_json.py
    ├── config.py
    ├── mcp_server.py
    └── rag
    │   ├── __init__.py
    │   ├── base.py
    │   └── pdf.py
├── case2
    ├── auto_mcp_json.py
    ├── dify_ek_server.py
    └── mcp_server.py
├── case3
    ├── auto_mcp_json.py
    └── mcp_server.py
├── case4
    ├── auto_mcp_json.py
    └── mcp_server.py
├── data
    └── .gitkeep
├── docs
    ├── case1.md
    ├── case2.md
    ├── case3.md
    ├── case4.md
    └── installation.md
├── pyproject.toml
├── requirements.txt
├── requirements_windows.txt
└── uv.lock


/.env.example:
--------------------------------------------------------------------------------
 1 | # case1: OpenAI API Setting
 2 | OPENAI_API_KEY = "sk-"
 3 | 
 4 | # case2: Dify Exteral Knowledge API Setting example
 5 | DIFY_API_ENDPOINT = "http://localhost:8000/retrieval"
 6 | DIFY_API_KEY = "dify-external-knowledge-api-key"
 7 | DIFY_KNOWLEDGE_ID = "test-knowledge-base"
 8 | 
 9 | # case3: Dify Workflow API Setting example
10 | DIFY_BASE_URL = "https://api.dify.ai/v1"
11 | DIFY_APP_SK = "app-"
12 | 
13 | # case4: Tavily API Setting example
14 | TAVILY_API_KEY = "tvly"


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | build/
 12 | develop-eggs/
 13 | dist/
 14 | downloads/
 15 | eggs/
 16 | .eggs/
 17 | lib/
 18 | lib64/
 19 | parts/
 20 | sdist/
 21 | var/
 22 | wheels/
 23 | share/python-wheels/
 24 | *.egg-info/
 25 | .installed.cfg
 26 | *.egg
 27 | MANIFEST
 28 | 
 29 | # PyInstaller
 30 | #  Usually these files are written by a python script from a template
 31 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 32 | *.manifest
 33 | *.spec
 34 | 
 35 | # Installer logs
 36 | pip-log.txt
 37 | pip-delete-this-directory.txt
 38 | 
 39 | # Unit test / coverage reports
 40 | htmlcov/
 41 | .tox/
 42 | .nox/
 43 | .coverage
 44 | .coverage.*
 45 | .cache
 46 | nosetests.xml
 47 | coverage.xml
 48 | *.cover
 49 | *.py,cover
 50 | .hypothesis/
 51 | .pytest_cache/
 52 | cover/
 53 | 
 54 | # Translations
 55 | *.mo
 56 | *.pot
 57 | 
 58 | # Django stuff:
 59 | *.log
 60 | local_settings.py
 61 | db.sqlite3
 62 | db.sqlite3-journal
 63 | 
 64 | # Flask stuff:
 65 | instance/
 66 | .webassets-cache
 67 | 
 68 | # Scrapy stuff:
 69 | .scrapy
 70 | 
 71 | # Sphinx documentation
 72 | docs/_build/
 73 | 
 74 | # PyBuilder
 75 | .pybuilder/
 76 | target/
 77 | 
 78 | # Jupyter Notebook
 79 | .ipynb_checkpoints
 80 | 
 81 | # IPython
 82 | profile_default/
 83 | ipython_config.py
 84 | 
 85 | # pyenv
 86 | #   For a library or package, you might want to ignore these files since the code is
 87 | #   intended to run in multiple environments; otherwise, check them in:
 88 | # .python-version
 89 | 
 90 | # pipenv
 91 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
 92 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
 93 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
 94 | #   install all needed dependencies.
 95 | #Pipfile.lock
 96 | 
 97 | # UV
 98 | #   Similar to Pipfile.lock, it is generally recommended to include uv.lock in version control.
 99 | #   This is especially recommended for binary packages to ensure reproducibility, and is more
100 | #   commonly ignored for libraries.
101 | #uv.lock
102 | 
103 | # poetry
104 | #   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
105 | #   This is especially recommended for binary packages to ensure reproducibility, and is more
106 | #   commonly ignored for libraries.
107 | #   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
108 | #poetry.lock
109 | 
110 | # pdm
111 | #   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
112 | #pdm.lock
113 | #   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
114 | #   in version control.
115 | #   https://pdm.fming.dev/latest/usage/project/#working-with-version-control
116 | .pdm.toml
117 | .pdm-python
118 | .pdm-build/
119 | 
120 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
121 | __pypackages__/
122 | 
123 | # Celery stuff
124 | celerybeat-schedule
125 | celerybeat.pid
126 | 
127 | # SageMath parsed files
128 | *.sage.py
129 | 
130 | # Environments
131 | .env
132 | .venv
133 | env/
134 | venv/
135 | ENV/
136 | env.bak/
137 | venv.bak/
138 | .python-version
139 | 
140 | # Spyder project settings
141 | .spyderproject
142 | .spyproject
143 | 
144 | # Rope project settings
145 | .ropeproject
146 | 
147 | # mkdocs documentation
148 | /site
149 | 
150 | # mypy
151 | .mypy_cache/
152 | .dmypy.json
153 | dmypy.json
154 | 
155 | # Pyre type checker
156 | .pyre/
157 | 
158 | # pytype static type analyzer
159 | .pytype/
160 | 
161 | # Cython debug symbols
162 | cython_debug/
163 | 
164 | # PyCharm
165 | #  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
166 | #  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
167 | #  and can be added to the global gitignore or merged into this file.  For a more nuclear
168 | #  option (not recommended) you can uncomment the following to ignore the entire idea folder.
169 | #.idea/
170 | 
171 | # PyPI configuration file
172 | .pypirc
173 | 
174 | # etc
175 | vector*
176 | chroma*
177 | .DS_Store
178 | *.pdf


--------------------------------------------------------------------------------
/LICENSE.md:
--------------------------------------------------------------------------------
 1 | # MIT License
 2 | 
 3 | Copyright (c) 2025 TeddyNote
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy  
 6 | of this software and associated documentation files (the "Software"), to deal  
 7 | in the Software without restriction, including without limitation the rights  
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell  
 9 | copies of the Software, and to permit persons to whom the Software is  
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in  
13 | all copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR  
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,  
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE  
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER  
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,  
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN  
21 | THE SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # Quick-start Auto MCP : All in one Claude Desktop and Cursor
  2 | 
  3 | [English](README.md) | [한국어](README_KR.md)
  4 | 
  5 | ## Introduction
  6 | 
  7 | **Quick-start Auto MCP** is a tool that helps you easily and quickly register Anthropic's Model Context Protocol (MCP) in Claude Desktop and Cursor.
  8 | 
  9 | **Key advantages:**
 10 | 1. **Quick Setup**: Add MCP functionality to Claude Desktop and Cursor simply by running a tool and copying/pasting the generated JSON file.
 11 | 2. **Various Tools Provided**: We continuously update useful MCP tools. Stay up to date with your personalized toolkit by starring and following us. :)
 12 | 
 13 | ## Table of Contents
 14 | 
 15 | - [Features](#features)
 16 | - [Project Structure](#project-structure)
 17 | - [Requirements](#requirements)
 18 | - [Installation](#installation)
 19 | - [Configuration](#configuration)
 20 | - [Usage](#usage)
 21 | - [Troubleshooting](#troubleshooting)
 22 | - [License](#license)
 23 | - [Contributing](#contributing)
 24 | - [Contact](#contact)
 25 | - [Author](#author)
 26 | 
 27 | ## Features
 28 | 
 29 | - **RAG (Retrieval Augmented Generation)** - Keyword, semantic, and hybrid search functionality for PDF documents
 30 | - **Dify External Knowledge API** - Document search functionality via Dify's external knowledge API
 31 | - **Dify Workflow** - Execute and retrieve results from Dify Workflow
 32 | - **Web Search** - Real-time web search using Tavily API
 33 | - **Automatic JSON Generation** - Automatically generate MCP JSON files needed for Claude Desktop and Cursor
 34 | 
 35 | ## Project Structure
 36 | 
 37 | ```
 38 | .
 39 | ├── case1                     # RAG example
 40 | ├── case2                     # Dify External Knowledge API example
 41 | ├── case3                     # Dify Workflow example
 42 | ├── case4                     # Web Search example
 43 | ├── data                      # Example data files
 44 | ├── docs                      # Documentation folder
 45 | │   ├── case1.md           # case1 description 🚨 Includes tips for optimized tool invocation
 46 | │   ├── case2.md           # case2 description
 47 | │   ├── case3.md           # case3 description
 48 | │   ├── case4.md           # case4 description
 49 | │   └── installation.md    # Installation guide
 50 | ├── .env.example              # .env example format
 51 | ├── pyproject.toml            # Project settings
 52 | ├── requirements.txt          # Required packages list
 53 | └── uv.lock                   # uv.lock
 54 | ```
 55 | 
 56 | ## Requirements
 57 | 
 58 | - Python >= 3.11
 59 | - Claude Desktop or Cursor (MCP supporting version)
 60 | - uv (recommended) or pip
 61 | 
 62 | ## Installation
 63 | 
 64 | ### 1. Clone the repository
 65 | 
 66 | ```bash
 67 | git clone https://github.com/teddynote-lab/mcp.git
 68 | cd mcp
 69 | ```
 70 | 
 71 | ### 2. Set up virtual environment
 72 | 
 73 | #### Using uv (recommended)
 74 | ```bash
 75 | # macOS/Linux
 76 | uv venv
 77 | uv pip install -r requirements.txt
 78 | ```
 79 | 
 80 | ```bash
 81 | # Windows
 82 | uv venv
 83 | uv pip install -r requirements_windows.txt
 84 | ```
 85 | 
 86 | #### Using pip
 87 | ```bash
 88 | python -m venv .venv
 89 | 
 90 | # Windows
 91 | .venv\Scripts\activate
 92 | pip install -r requirements_windows.txt
 93 | 
 94 | # macOS/Linux
 95 | source .venv/bin/activate
 96 | 
 97 | pip install -r requirements.txt
 98 | ```
 99 | 
100 | ### 3. Preparing the PDF File
101 | 
102 | Plese prepare a PDF file required for RAG in the `./data` directory.
103 | 
104 | ## Configuration
105 | 
106 | In order to execute each case, a `.env` file is required.
107 | Please specify the necessary environment variables in the `.env.example` file located in the root directory, and rename it to `.env`.
108 | 
109 | ### sites for configuring required environment variables for each case
110 | - https://platform.openai.com/api-keys
111 | - https://dify.ai/
112 | - https://app.tavily.com/home
113 | 
114 | ## Usage
115 | 
116 | ### 1. Generate JSON File
117 | 
118 | Run the following command in each case directory to generate the necessary JSON file:
119 | 
120 | ```bash
121 | # Activate virtual environment
122 | 
123 | # Windows
124 | .venv\Scripts\activate
125 | 
126 | # macOS/Linux
127 | source .venv/bin/activate
128 | 
129 | # Navigate to example directory
130 | cd case1
131 | 
132 | # Generate JSON file
133 | python auto_mcp_json.py
134 | ```
135 | 
136 | ### 2. Register MCP in Claude Desktop/Cursor
137 | 
138 | 1. Launch Claude Desktop or Cursor
139 | 2. Open MCP settings menu
140 | 3. Copy and paste the generated JSON content
141 | 4. Save and `restart` (If you're using Windows, we recommend fully closing the process via Task Manager and then restarting the application.)
142 | 
143 | > **Note**: When you run Claude Desktop or Cursor, the MCP server will automatically run with it. When you close the software, the MCP server will also terminate.
144 | 
145 | ## Troubleshooting
146 | 
147 | Common issues and solutions:
148 | 
149 | - **MCP Server Connection Failure**: Check if the service is running properly and if there are no port conflicts. In particular, when applying case2, you must also run `dify_ek_server.py`.
150 | - **API Key Errors**: Verify that environment variables are set correctly.
151 | - **Virtual Environment Issues**: Ensure Python version is 3.11 or higher.
152 | 
153 | ## License
154 | 
155 | [MIT LICENSE](LICENSE.md)
156 | 
157 | ## Contributing
158 | 
159 | Contributions are always welcome! Please participate in the project through issue registration or pull requests. :)
160 | 
161 | ## Contact
162 | 
163 | If you have questions or need help, please register an issue or contact:
164 | dev@brain-crew.com
165 | 
166 | ## Author
167 | [Hantaek Lim](https://github.com/LHANTAEK)


--------------------------------------------------------------------------------
/README_KR.md:
--------------------------------------------------------------------------------
  1 | # Quick-start Auto MCP : All in one Claude Desktop and Cursor
  2 | 
  3 | [English](README.md) | [한국어](README_KR.md)
  4 | 
  5 | ## 소개
  6 | 
  7 | **Quick-start Auto MCP**는 Anthropic의 Model Context Protocol(MCP)을 Claude Desktop과 Cursor에 쉽고 빠르게 등록하여 사용할 수 있도록 도와주는 도구입니다.
  8 | 
  9 | **주요 장점:**
 10 | 1. **빠른 설정**: 간단한 도구 실행 및 JSON 파일 복사/붙여넣기만으로 Claude Desktop과 Cursor에 MCP 기능을 바로 추가할 수 있습니다.
 11 | 2. **다양한 도구 제공**: 유용한 MCP 도구들을 지속적으로 업데이트합니다. Star와 Follow를 통해 나만의 도구를 꾸준히 업데이트 해보세요. :)
 12 | 
 13 | ## 목차
 14 | 
 15 | - [특징](#특징)
 16 | - [프로젝트 구조](#프로젝트-구조)
 17 | - [요구사항](#요구사항)
 18 | - [설치](#설치)
 19 | - [환경 변수 설정](#환경-변수-설정)
 20 | - [사용법](#사용법)
 21 | - [문제해결](#문제해결)
 22 | - [라이센스](#라이센스)
 23 | - [기여하기](#기여하기)
 24 | - [문의하기](#문의하기)
 25 | - [저자](#저자)
 26 | 
 27 | ## 특징
 28 | 
 29 | - **RAG(Retrieval Augmented Generation)** - PDF 문서를 대상으로 키워드, 시맨틱, 하이브리드 검색 기능
 30 | - **Dify External Knowledge API** - Dify의 외부 지식 API를 통한 문서 검색 기능
 31 | - **Dify Workflow** - Dify Workflow 실행 및 결과 검색 기능
 32 | - **Web Search** - Tavily API를 활용한 실시간 웹 검색 기능
 33 | - **자동 JSON 생성** - Claude Desktop과 Cursor에 필요한 MCP JSON 파일 자동 생성
 34 | 
 35 | ## 프로젝트 구조
 36 | 
 37 | ```
 38 | .
 39 | ├── case1                     # RAG 예제
 40 | ├── case2                     # Dify External Knowledge API 예제
 41 | ├── case3                     # Dify Workflow 예제
 42 | ├── case4                     # Web Search 예제              
 43 | ├── data                      # PDF 데이터 파일 
 44 | ├── docs                      # 문서 폴더
 45 | │   ├── case1.md           # case1 예제 설명 🚨 도구 호출 최적화 팁 포함
 46 | │   ├── case2.md           # case2 예제 설명
 47 | │   ├── case3.md           # case3 예제 설명
 48 | │   ├── case4.md           # case4 예제 설명
 49 | │   └── installation.md    # 설치 가이드
 50 | ├── .env.example              # .env 예시
 51 | ├── pyproject.toml            # 프로젝트 설정
 52 | ├── requirements.txt          # 필요 패키지 목록
 53 | └── uv.lock                   # uv.lock
 54 | ```
 55 | 
 56 | ## 요구사항
 57 | 
 58 | - Python >= 3.11
 59 | - Claude Desktop 또는 Cursor
 60 | - uv (권장) 또는 pip
 61 | 
 62 | ## 설치
 63 | 
 64 | ### 1. 저장소 복제
 65 | 
 66 | ```bash
 67 | git clone https://github.com/teddynote-lab/mcp.git
 68 | cd mcp
 69 | ```
 70 | 
 71 | ### 2. 가상 환경 설정 
 72 | 
 73 | #### uv 사용 (권장)
 74 | ```bash
 75 | # macOS/Linux
 76 | uv venv
 77 | uv pip install -r requirements.txt
 78 | ```
 79 | 
 80 | ```bash
 81 | # Windows
 82 | uv venv
 83 | uv pip install -r requirements_windows.txt
 84 | ```
 85 | 
 86 | #### pip 사용
 87 | ```bash
 88 | python -m venv .venv
 89 | 
 90 | # Windows
 91 | .venv\Scripts\activate
 92 | pip install -r requirements_windows.txt
 93 | 
 94 | # macOS/Linux
 95 | source .venv/bin/activate
 96 | 
 97 | pip install -r requirements.txt
 98 | ```
 99 | 
100 | ### 3. PDF 준비
101 | 
102 | RAG에 필요한 PDF 파일을 `./data`에 넣어주세요.
103 | 
104 | 본 프로젝트에서는 [소프트웨어정책연구소(SPRi)의 25년 3월호](https://spri.kr/posts/view/23827?code=AI-Brief&s_year=&data_page=1) `인공지능 산업의 최신 동향`을 사용했습니다.
105 | 
106 | ## 환경 변수 설정
107 | 
108 | 각 예제를 실행하기 위한 `.env` 파일이 필요합니다. 루트 디렉토리의 `.env.example`에 필요한 환경 변수를 설정하고 파일명을 `.env`로 변경해주세요.
109 | 
110 | ### 예제별 필요한 환경 변수 설정 사이트
111 | - https://platform.openai.com/api-keys
112 | - https://dify.ai/
113 | - https://app.tavily.com/home
114 | 
115 | 
116 | ## 사용법
117 | 
118 | ### 1. JSON 파일 생성
119 | 
120 | 각 예제 디렉토리에서 다음 명령을 실행하여 필요한 JSON 파일을 생성합니다:
121 | 
122 | ```bash
123 | # 가상 환경 활성화
124 | 
125 | # Windows
126 | .venv\Scripts\activate
127 | 
128 | # macOS/Linux
129 | source .venv/bin/activate
130 | 
131 | # 예제 디렉토리로 이동
132 | cd case1
133 | 
134 | # JSON 파일 생성
135 | python auto_mcp_json.py
136 | ```
137 | 
138 | ### 2. Claude Desktop/Cursor에 MCP 등록
139 | 
140 | 1. Claude Desktop 또는 Cursor 실행
141 | 2. MCP 설정 메뉴 열기
142 | 3. 생성된 JSON 내용을 복사하여 붙여넣기
143 | 4. 저장 및 `재시작` (윈도우 유저의 경우 작업관리자로 프로세스를 완전히 종료하고 재시작 해주시는 걸 권장합니다.)
144 | 
145 | > **참고**: Claude Desktop 또는 Cursor를 실행하면 MCP 서버가 자동으로 함께 실행되며, 소프트웨어를 종료하면 MCP 서버도 함께 종료됩니다.
146 | 
147 | ## Troubleshooting
148 | 
149 | 일반적인 문제 및 해결 방법:
150 | 
151 | - **MCP 서버 연결 실패**: 서비스가 올바르게 실행 중인지, 포트가 충돌하지 않는지 확인하세요. 특히, case2를 적용할 때는 `dify_ek_server.py`를 같이 실행시켜주셔야 합니다.
152 | - **API 키 오류**: 환경 변수가 올바르게 설정되었는지 확인하세요.
153 | - **가상 환경 문제**: Python 버전이 3.11 이상인지 확인하세요.
154 | 
155 | ## 라이센스
156 | 
157 | [MIT 라이센스](LICENSE.md)
158 | 
159 | ## 기여하기
160 | 
161 | 기여는 언제나 환영합니다! 이슈, 버그 또는 기능 추가에 대한 의견을 남겨주세요. :) 
162 | 
163 | ## 문의하기
164 | 
165 | 질문이나 도움이 필요하시면 이슈를 등록하거나 다음 연락처로 문의해 주세요:
166 | dev@brain-crew.com
167 | 
168 | ## 저자
169 | 
170 | [임한택 Hantaek Lim](https://github.com/LHANTAEK)


--------------------------------------------------------------------------------
/case1/auto_mcp_json.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import json
 3 | import sys
 4 | from pathlib import Path
 5 | from dotenv import load_dotenv
 6 | 
 7 | 
 8 | def get_env_variables():
 9 |     """
10 |     Load environment variables and return required variables as a dictionary.
11 | 
12 |     Returns:
13 |         dict: Dictionary containing environment variables and default configuration values
14 |     """
15 | 
16 |     load_dotenv()
17 | 
18 |     required_vars = [
19 |         "OPENAI_API_KEY",
20 |     ]
21 | 
22 |     config_vars = {"DEFAULT_TOP_K": "5"}
23 | 
24 |     env_dict = {}
25 | 
26 |     for var in required_vars:
27 |         value = os.getenv(var)
28 |         if value:
29 |             env_dict[var] = value
30 | 
31 |     env_dict.update(config_vars)
32 | 
33 |     return env_dict
34 | 
35 | 
36 | def create_mcp_json():
37 |     """
38 |     Create a Model Context Protocol (MCP) server configuration JSON file.
39 | 
40 |     This function generates a configuration file that defines how the MCP server
41 |     should be launched, including the Python interpreter path, server script location,
42 |     and necessary environment variables.
43 | 
44 |     Returns:
45 |         str: Path to the created JSON configuration file
46 |     """
47 | 
48 |     project_root = Path(__file__).parent.absolute()
49 | 
50 |     # .venv python executable path
51 |     if os.name == 'nt':  # Windows
52 |         python_path = str(project_root.parent / ".venv" / "Scripts" / "python.exe")
53 |     else:  # Mac, Ubuntu etc
54 |         python_path = str(project_root.parent / ".venv" / "bin" / "python")
55 | 
56 |     server_script = project_root / "mcp_server.py"
57 | 
58 |     env_vars = get_env_variables()
59 | 
60 |     config = {
61 |         "mcpServers": {
62 |             "rag-mcp": {
63 |                 "command": python_path,
64 |                 "args": [str(server_script)],
65 |                 "env": env_vars,
66 |             }
67 |         }
68 |     }
69 | 
70 |     json_path = project_root / "mcp_config.json"
71 | 
72 |     with open(json_path, "w", encoding="utf-8") as f:
73 |         json.dump(config, f, indent=2)
74 | 
75 |     print(f"MCP configuration file has been created: {json_path}")
76 |     print(f"Generated environment variables: {', '.join(env_vars.keys())}")
77 | 
78 |     return str(json_path)
79 | 
80 | 
81 | if __name__ == "__main__":
82 |     create_mcp_json()
83 | 


--------------------------------------------------------------------------------
/case1/config.py:
--------------------------------------------------------------------------------
 1 | from pathlib import Path
 2 | 
 3 | # Path settings
 4 | DATA_DIR = Path(__file__).parent.parent / "data"
 5 | VECTOR_DIR = Path(__file__).parent / "vector_db"
 6 | 
 7 | # Default settings
 8 | DEFAULT_CHUNK_SIZE = 600
 9 | DEFAULT_CHUNK_OVERLAP = 50
10 | DEFAULT_TOP_K = 5
11 | DEFAULT_EMBEDDING_MODEL = "text-embedding-3-small"
12 | DEFAULT_LLM_MODEL = "gpt-4o-mini"


--------------------------------------------------------------------------------
/case1/mcp_server.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | from pathlib import Path
  3 | from typing import List
  4 | 
  5 | from dotenv import load_dotenv
  6 | from langchain_core.documents import Document
  7 | from mcp.server.fastmcp import FastMCP
  8 | 
  9 | from rag import PDFRetrievalChain
 10 | import config
 11 | 
 12 | load_dotenv()
 13 | 
 14 | DATA_DIR = Path(os.getenv("DATA_DIR", config.DATA_DIR))
 15 | pdf_files = list(DATA_DIR.glob("*.pdf"))
 16 | pdf_paths = [str(path) for path in pdf_files]
 17 | 
 18 | VECTOR_DIR = Path(os.getenv("VECTOR_DIR", config.VECTOR_DIR))
 19 | 
 20 | rag_chain = PDFRetrievalChain(
 21 |     source_uri = pdf_paths,
 22 |     persist_directory = str(VECTOR_DIR),
 23 |     k = config.DEFAULT_TOP_K,
 24 |     embedding_model = config.DEFAULT_EMBEDDING_MODEL,
 25 |     llm_model = config.DEFAULT_LLM_MODEL
 26 | ).initialize()
 27 | 
 28 | mcp = FastMCP(
 29 |     name="RAG",
 30 |     version="0.0.1",
 31 |     description="RAG Search(keyword, semantic, hybrid)"
 32 | )
 33 | 
 34 | def format_search_results(docs: List[Document]) -> str:
 35 |     """
 36 |     Format search results as markdown.
 37 |     
 38 |     Args:
 39 |         docs: List of documents to format
 40 |         
 41 |     Returns:
 42 |         Markdown formatted search results
 43 | 
 44 |     """
 45 | 
 46 |     if not docs:
 47 |         return "No relevant information found."
 48 |     
 49 |     markdown_results = "## Search Results\n\n"
 50 |     
 51 |     for i, doc in enumerate(docs, 1):
 52 |         source = doc.metadata.get("source", "Unknown source")
 53 |         page = doc.metadata.get("page", None)
 54 |         page_info = f" (Page: {page+1})" if page is not None else ""
 55 |         
 56 |         markdown_results += f"### Result {i}{page_info}\n\n"
 57 |         markdown_results += f"{doc.page_content}\n\n"
 58 |         markdown_results += f"Source: {source}\n\n"
 59 |         markdown_results += "---\n\n"
 60 |     
 61 |     return markdown_results
 62 | 
 63 | @mcp.tool()
 64 | async def keyword_search(query: str, top_k: int = 5) -> str:
 65 |     """
 66 |     Performs keyword-based search on PDF documents.
 67 |     Returns the most relevant results based on exact word/phrase matches.
 68 |     Ideal for finding specific terms, definitions, or exact phrases in documents.
 69 |     
 70 |     Parameters:
 71 |         query: Search query
 72 |         top_k: Number of results to return
 73 | 
 74 |     """
 75 | 
 76 |     try:
 77 |         results = rag_chain.search_keyword(query, top_k)
 78 |         return format_search_results(results)
 79 |     except Exception as e:
 80 |         return f"An error occurred during search: {str(e)}"
 81 | 
 82 | @mcp.tool()
 83 | async def semantic_search(query: str, top_k: int = 5) -> str:
 84 |     """
 85 |     Performs semantic search on PDF documents.
 86 |     Finds content semantically similar to the query, delivering relevant information even without exact word matches.
 87 |     Best for conceptual questions, understanding themes, or when you need information related to a topic.
 88 |     
 89 |     Parameters:
 90 |         query: Search query
 91 |         top_k: Number of results to return
 92 | 
 93 |     """
 94 | 
 95 |     try:
 96 |         results = rag_chain.search_semantic(query, top_k)
 97 |         return format_search_results(results)
 98 |     except Exception as e:
 99 |         return f"An error occurred during search: {str(e)}"
100 | 
101 | @mcp.tool()
102 | async def hybrid_search(query: str, top_k: int = 5) -> str:
103 |     """
104 |     Performs hybrid search (keyword + semantic) on PDF documents.
105 |     Combines exact keyword matching and semantic similarity to deliver optimal results.
106 |     The most versatile search option for general questions or when unsure which search type is best.
107 |     
108 |     Parameters:
109 |         query: Search query
110 |         top_k: Number of results to return
111 | 
112 |     """
113 | 
114 |     try:
115 |         results = rag_chain.search_hybrid(query, top_k)
116 |         return format_search_results(results)
117 |     except Exception as e:
118 |         return f"An error occurred during search: {str(e)}"
119 | 
120 | if __name__ == "__main__":
121 |     mcp.run()


--------------------------------------------------------------------------------
/case1/rag/__init__.py:
--------------------------------------------------------------------------------
1 | from rag.base import RetrievalChain
2 | from rag.pdf import PDFRetrievalChain
3 | 
4 | __all__ = [
5 |     'RetrievalChain',
6 |     'PDFRetrievalChain',
7 | ]


--------------------------------------------------------------------------------
/case1/rag/base.py:
--------------------------------------------------------------------------------
  1 | from abc import ABC, abstractmethod
  2 | from typing import List, Dict, Any, Optional
  3 | from pathlib import Path
  4 | 
  5 | from langchain.retrievers.ensemble import EnsembleRetriever
  6 | from langchain_community.retrievers import BM25Retriever
  7 | from langchain_core.documents import Document
  8 | from langchain_core.retrievers import BaseRetriever
  9 | from langchain_openai import OpenAIEmbeddings
 10 | 
 11 | class RetrievalChain(ABC):
 12 |     """
 13 |     Abstract base class for RAG search implementations.
 14 |     
 15 |     This class provides a template for different document retrieval chains,
 16 |     allowing for customization of document loading, splitting, vectorization,
 17 |     and various search methods.
 18 |     """
 19 |     
 20 |     def __init__(self, **kwargs) -> None:
 21 |         """
 22 |         Initialize a RetrievalChain with configuration parameters.
 23 |         
 24 |         Args:
 25 |             **kwargs: Keyword arguments including:
 26 |                 source_uri: Paths to source documents
 27 |                 k: Number of results to return (default: 5)
 28 |                 embedding_model: Model name for embeddings (default: OpenAI "text-embedding-3-small")
 29 |                 persist_directory: Directory to persist vector store
 30 |         """
 31 | 
 32 |         self.source_uri = kwargs.get("source_uri", [])
 33 |         self.k = kwargs.get("k", 5)
 34 |         self.embedding_model = kwargs.get("embedding_model", "text-embedding-3-small")
 35 |         self.persist_directory = kwargs.get("persist_directory", None)
 36 |         self.embeddings = None
 37 |         self.vectorstore = None
 38 |         self.retrievers = None
 39 |         self.split_docs = None
 40 |     
 41 |     @abstractmethod
 42 |     def load_documents(self, source_uris: List[str]) -> List[Document]:
 43 |         """
 44 |         Load documents from source URIs.
 45 |         
 46 |         Args:
 47 |             source_uris: List of file paths or URIs to load documents from
 48 |             
 49 |         Returns:
 50 |             List of loaded documents
 51 |         """
 52 | 
 53 |         pass
 54 |     
 55 |     @abstractmethod
 56 |     def create_text_splitter(self) -> Any:
 57 |         """
 58 |         Create a text splitter appropriate for the document type.
 59 |         
 60 |         Returns:
 61 |             A text splitter instance
 62 |         """
 63 | 
 64 |         pass
 65 |     
 66 |     def split_documents(self, docs: List[Document], text_splitter: Any) -> List[Document]:
 67 |         """
 68 |         Split documents into chunks using the provided text splitter.
 69 |         
 70 |         Args:
 71 |             docs: Documents to split
 72 |             text_splitter: Text splitter instance
 73 |             
 74 |         Returns:
 75 |             Split document chunks
 76 |         """
 77 | 
 78 |         return text_splitter.split_documents(docs)
 79 |     
 80 |     def create_embedding(self) -> Any:
 81 |         """
 82 |         Create an embedding model instance.
 83 |         
 84 |         Returns:
 85 |             An embeddings model instance
 86 |         """
 87 | 
 88 |         return OpenAIEmbeddings(model=self.embedding_model)
 89 |     
 90 |     @abstractmethod
 91 |     def create_vectorstore(self, split_docs: List[Document]) -> Any:
 92 |         """
 93 |         Create a vector store from split documents.
 94 |         
 95 |         Args:
 96 |             split_docs: Split document chunks
 97 |             
 98 |         Returns:
 99 |             A vector store instance
100 |         """
101 | 
102 |         pass
103 |     
104 |     def create_semantic_retriever(self, vectorstore: Any) -> BaseRetriever:
105 |         """
106 |         Create a semantic search retriever.
107 |         
108 |         Args:
109 |             vectorstore: Vector store instance
110 |             
111 |         Returns:
112 |             A semantic search retriever
113 |         """
114 | 
115 |         return vectorstore.as_retriever(
116 |             search_kwargs={"k": self.k}
117 |         )
118 |     
119 |     def create_keyword_retriever(self, split_docs: List[Document]) -> BaseRetriever:
120 |         """
121 |         Create a keyword-based search retriever.
122 |         
123 |         Args:
124 |             split_docs: Split document chunks
125 |             
126 |         Returns:
127 |             A keyword search retriever
128 |         """
129 | 
130 |         return BM25Retriever.from_documents(split_docs, k=self.k)
131 |     
132 |     def create_hybrid_retriever(self, split_docs: List[Document], vectorstore: Any) -> BaseRetriever:
133 |         """
134 |         Create a hybrid search retriever combining keyword and semantic search.
135 |         
136 |         Args:
137 |             split_docs: Split document chunks
138 |             vectorstore: Vector store instance
139 |             
140 |         Returns:
141 |             A hybrid search retriever
142 |         """
143 | 
144 |         bm25_retriever = self.create_keyword_retriever(split_docs)
145 |         dense_retriever = self.create_semantic_retriever(vectorstore)
146 |         
147 |         return EnsembleRetriever(
148 |             retrievers=[bm25_retriever, dense_retriever],
149 |             weights=[0.5, 0.5]
150 |         )
151 |     
152 |     def create_retrievers(self, split_docs: List[Document]) -> Dict[str, BaseRetriever]:
153 |         """
154 |         Create all retriever types.
155 |         
156 |         Args:
157 |             split_docs: Split document chunks
158 |             
159 |         Returns:
160 |             Dictionary of retrievers by search type
161 |         """
162 | 
163 |         self.embeddings = self.create_embedding()
164 |         self.vectorstore = self.create_vectorstore(split_docs)
165 |         
166 |         return {
167 |             "semantic": self.create_semantic_retriever(self.vectorstore),
168 |             "keyword": self.create_keyword_retriever(split_docs),
169 |             "hybrid": self.create_hybrid_retriever(split_docs, self.vectorstore)
170 |         }
171 |     
172 |     def initialize(self) -> "RetrievalChain":
173 |         """
174 |         Initialize the retrieval chain by loading documents, splitting them,
175 |         and creating retriever instances.
176 |         
177 |         Returns:
178 |             The initialized retrieval chain instance
179 |         """
180 | 
181 |         docs = self.load_documents(self.source_uri)
182 |         if not docs:
183 |             print("No documents were loaded.")
184 |             return self
185 |         
186 |         text_splitter = self.create_text_splitter()
187 |         self.split_docs = self.split_documents(docs, text_splitter)
188 |         
189 |         self.retrievers = self.create_retrievers(self.split_docs)
190 |         
191 |         print(f"Initialization complete: {len(self.split_docs)} chunks created")
192 |         return self
193 |     
194 |     def search_semantic(self, query: str, k: Optional[int] = None) -> List[Document]:
195 |         """
196 |         Perform semantic search on the loaded documents.
197 |         
198 |         Args:
199 |             query: Search query
200 |             k: Number of results to return, overrides self.k
201 |             
202 |         Returns:
203 |             Relevant documents
204 |             
205 |         Raises:
206 |             ValueError: If the retrieval chain is not initialized
207 |         """
208 | 
209 |         if not hasattr(self, 'retrievers') or self.retrievers is None:
210 |             raise ValueError("Initialization required. Call initialize() method first.")
211 |         
212 |         k = k or self.k
213 |         retriever = self.retrievers["semantic"]
214 |         retriever.search_kwargs["k"] = k
215 |         
216 |         return retriever.get_relevant_documents(query)
217 |     
218 |     def search_keyword(self, query: str, k: Optional[int] = None) -> List[Document]:
219 |         """
220 |         Perform keyword-based search on the loaded documents.
221 |         
222 |         Args:
223 |             query: Search query
224 |             k: Number of results to return (Note: BM25Retriever may not support dynamic k)
225 |             
226 |         Returns:
227 |             Relevant documents
228 |             
229 |         Raises:
230 |             ValueError: If the retrieval chain is not initialized
231 |         """
232 | 
233 |         if not hasattr(self, 'retrievers') or self.retrievers is None:
234 |             raise ValueError("Initialization required. Call initialize() method first.")
235 |         
236 |         return self.retrievers["keyword"].get_relevant_documents(query)
237 |     
238 |     def search_hybrid(self, query: str, k: Optional[int] = None) -> List[Document]:
239 |         """
240 |         Perform hybrid search (keyword + semantic) on the loaded documents.
241 |         
242 |         Args:
243 |             query: Search query
244 |             k: Number of results to return (Note: EnsembleRetriever may not support dynamic k)
245 |             
246 |         Returns:
247 |             Relevant documents
248 |             
249 |         Raises:
250 |             ValueError: If the retrieval chain is not initialized
251 |         """
252 | 
253 |         if not hasattr(self, 'retrievers') or self.retrievers is None:
254 |             raise ValueError("Initialization required. Call initialize() method first.")
255 |         
256 |         return self.retrievers["hybrid"].get_relevant_documents(query)
257 |     
258 |     def search(self, query: str, k: Optional[int] = None) -> List[Document]:
259 |         """
260 |         Default search method that uses semantic search.
261 |         
262 |         Args:
263 |             query: Search query
264 |             k: Number of results to return, overrides self.k
265 |             
266 |         Returns:
267 |             Relevant documents
268 |         """
269 |         
270 |         return self.search_semantic(query, k)
271 | 


--------------------------------------------------------------------------------
/case1/rag/pdf.py:
--------------------------------------------------------------------------------
  1 | from typing import List, Optional, Any
  2 | import os
  3 | 
  4 | from langchain_community.document_loaders import PDFPlumberLoader
  5 | from langchain_text_splitters import RecursiveCharacterTextSplitter
  6 | from langchain_core.documents import Document
  7 | from langchain_chroma import Chroma
  8 | 
  9 | from rag.base import RetrievalChain
 10 | 
 11 | class PDFRetrievalChain(RetrievalChain):
 12 |     """
 13 |     PDF-specific implementation of the RetrievalChain.
 14 |     
 15 |     This class specializes in loading, splitting, and indexing PDF documents
 16 |     for retrieval.
 17 |     """
 18 |     
 19 |     def __init__(self, 
 20 |                  source_uri: List[str], 
 21 |                  persist_directory: Optional[str] = None,
 22 |                  **kwargs) -> None:
 23 |         """
 24 |         Initialize a PDF retrieval chain.
 25 |         
 26 |         Args:
 27 |             source_uri: List of PDF file paths
 28 |             persist_directory: Directory to persist vector store
 29 |             **kwargs: Additional keyword arguments for the base RetrievalChain
 30 |         """
 31 | 
 32 |         super().__init__(source_uri=source_uri, persist_directory=persist_directory, **kwargs)
 33 |     
 34 |     def load_documents(self, source_uris: List[str]) -> List[Document]:
 35 |         """
 36 |         Load PDF documents from file paths.
 37 |         
 38 |         Args:
 39 |             source_uris: List of PDF file paths
 40 |             
 41 |         Returns:
 42 |             List of loaded documents
 43 |         """
 44 | 
 45 |         docs = []
 46 |         for source_uri in source_uris:
 47 |             if not os.path.exists(source_uri):
 48 |                 print(f"File not found: {source_uri}")
 49 |                 continue
 50 |                 
 51 |             print(f"Loading PDF: {source_uri}")
 52 |             loader = PDFPlumberLoader(source_uri)
 53 |             docs.extend(loader.load())
 54 |         
 55 |         return docs
 56 |     
 57 |     def create_text_splitter(self) -> RecursiveCharacterTextSplitter:
 58 |         """
 59 |         Create a text splitter optimized for PDF documents.
 60 |         
 61 |         Returns:
 62 |             A text splitter instance suitable for PDFs
 63 |         """
 64 |         
 65 |         return RecursiveCharacterTextSplitter(
 66 |             chunk_size=600,
 67 |             chunk_overlap=50
 68 |         )
 69 |     
 70 |     def create_vectorstore(self, split_docs: List[Document]) -> Any:
 71 |         """
 72 |         Create a vector store from split PDF documents.
 73 |         
 74 |         Args:
 75 |             split_docs: Split document chunks
 76 |             
 77 |         Returns:
 78 |             A vector store instance
 79 |             
 80 |         Raises:
 81 |             ValueError: If there are no split documents
 82 |         """
 83 |         
 84 |         if not split_docs:
 85 |             raise ValueError("No split documents available.")
 86 |             
 87 |         if self.persist_directory:
 88 |             os.makedirs(self.persist_directory, exist_ok=True)
 89 |             
 90 |             if os.path.exists(self.persist_directory) and any(os.listdir(self.persist_directory)):
 91 |                 print(f"Loading existing vector store: {self.persist_directory}")
 92 | 
 93 |                 return Chroma(
 94 |                     persist_directory=self.persist_directory,
 95 |                     embedding_function=self.create_embedding()
 96 |                 )
 97 |         
 98 |         print("Creating new vector store...")
 99 | 
100 |         vectorstore = Chroma.from_documents(
101 |             documents=split_docs,
102 |             embedding=self.create_embedding(),
103 |             persist_directory=self.persist_directory
104 |         )
105 |         
106 |         return vectorstore


--------------------------------------------------------------------------------
/case2/auto_mcp_json.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import json
 3 | import sys
 4 | from pathlib import Path
 5 | 
 6 | from dotenv import load_dotenv
 7 | 
 8 | 
 9 | def get_env_variables():
10 |     """Loads environment variables and returns them as a dictionary."""
11 | 
12 |     load_dotenv()
13 | 
14 |     required_vars = ["DIFY_API_ENDPOINT", "DIFY_API_KEY", "DIFY_KNOWLEDGE_ID"]
15 | 
16 |     env_dict = {}
17 | 
18 |     for var in required_vars:
19 |         value = os.getenv(var)
20 |         if value:
21 |             env_dict[var] = value
22 | 
23 |     return env_dict
24 | 
25 | 
26 | def create_mcp_json():
27 |     """Creates MCP server configuration JSON file."""
28 | 
29 |     project_root = Path(__file__).parent.absolute()
30 | 
31 |     # .venv python executable path
32 |     if os.name == 'nt':  # Windows
33 |         python_path = str(project_root.parent / ".venv" / "Scripts" / "python.exe")
34 |     else:  # Mac, Ubuntu etc
35 |         python_path = str(project_root.parent / ".venv" / "bin" / "python")
36 | 
37 |     server_script = project_root / "mcp_server.py"
38 | 
39 |     env_vars = get_env_variables()
40 | 
41 |     config = {
42 |         "mcpServers": {
43 |             "dify-ek-api": {
44 |                 "command": python_path,
45 |                 "args": [str(server_script)],
46 |                 "env": env_vars,
47 |             }
48 |         }
49 |     }
50 | 
51 |     json_path = project_root / "mcp_config.json"
52 | 
53 |     with open(json_path, "w", encoding="utf-8") as f:
54 |         json.dump(config, f, indent=2)
55 | 
56 |     print(f"MCP configuration file created: {json_path}")
57 |     print(f"Generated environment variables: {', '.join(env_vars.keys())}")
58 | 
59 |     return str(json_path)
60 | 
61 | 
62 | if __name__ == "__main__":
63 |     create_mcp_json()
64 | 


--------------------------------------------------------------------------------
/case2/dify_ek_server.py:
--------------------------------------------------------------------------------
  1 | import logging
  2 | import os
  3 | import time
  4 | from typing import Annotated, Any, Dict, List, Optional, TypedDict
  5 | from pathlib import Path
  6 | 
  7 | import uvicorn
  8 | from dotenv import load_dotenv
  9 | from fastapi import Depends, FastAPI, HTTPException, Header
 10 | from fastapi.security import APIKeyHeader
 11 | from pydantic import BaseModel
 12 | 
 13 | from langchain.retrievers.ensemble import EnsembleRetriever
 14 | from langchain_chroma import Chroma
 15 | from langchain_community.retrievers import BM25Retriever
 16 | from langchain_community.document_loaders import PDFPlumberLoader
 17 | from langchain_core.documents import Document
 18 | from langchain_openai import OpenAIEmbeddings
 19 | from langchain_text_splitters import RecursiveCharacterTextSplitter
 20 | from langgraph.graph import END, START, StateGraph
 21 | 
 22 | load_dotenv()
 23 | 
 24 | logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
 25 | logger = logging.getLogger(__name__)
 26 | 
 27 | # API key setup
 28 | API_KEY = "dify-external-knowledge-api-key"
 29 | api_key_header = APIKeyHeader(name="Authorization")
 30 | 
 31 | # Directory setup
 32 | BASE_DIR = Path(__file__).parent
 33 | PROJECT_ROOT = BASE_DIR.parent
 34 | DATA_DIR = PROJECT_ROOT / "data" 
 35 | CHROMA_DB_DIR = BASE_DIR / "chroma_db"
 36 | 
 37 | # PDF file path (using project shared data folder)
 38 | PDF_FILES = list(DATA_DIR.glob("*.pdf"))
 39 | PDF_PATH = PDF_FILES[0] if PDF_FILES else DATA_DIR / "sample.pdf"
 40 | 
 41 | app = FastAPI(title="Dify External Knowledge API - LangGraph Version")
 42 | 
 43 | 
 44 | ###### STEP 1. State and Preprocessing Function Definition ######
 45 | 
 46 | class KnowledgeState(TypedDict):
 47 |     """
 48 |     State definition used in LangGraph graph.
 49 |     
 50 |     Each field represents data passed between graph nodes.
 51 | 
 52 |     """
 53 | 
 54 |     query: Annotated[str, "User's search query"]
 55 | 
 56 |     search_method: Annotated[str, "Search method"]
 57 | 
 58 |     top_k: Annotated[int, "Maximum number of results to return"]
 59 | 
 60 |     score_threshold: Annotated[float, "Minimum relevance score for inclusion (0.0-1.0)"]
 61 | 
 62 |     results: Annotated[List[Dict[str, Any]], "List of search results"]
 63 | 
 64 |     vector_db: Annotated[Optional[Any], "Chroma vector DB instance"]
 65 | 
 66 |     semantic_retriever: Annotated[Optional[Any], "Semantic search retriever"]
 67 |     keyword_retriever: Annotated[Optional[Any], "Keyword-based search retriever"]
 68 |     hybrid_retriever: Annotated[Optional[Any], "Hybrid search retriever"]
 69 | 
 70 | 
 71 | ###### STEP 2. Node Definition ######
 72 | 
 73 | class DocumentProcessor:
 74 |     """
 75 |     Loads PDF files, extracts text, splits into chunks,
 76 |     and stores in a vector database (ChromaDB).
 77 | 
 78 |     """
 79 | 
 80 |     def __init__(self, knowledge_id="test-knowledge-base"):
 81 |         self.knowledge_id = knowledge_id
 82 |     
 83 |     def __call__(self, state: KnowledgeState) -> KnowledgeState:
 84 |         """
 85 |         Process documents and set up vector storage.
 86 | 
 87 |         Args:
 88 |             state: Current graph state
 89 | 
 90 |         Returns:
 91 |             Updated graph state
 92 | 
 93 |         """
 94 |         
 95 |         os.makedirs(DATA_DIR, exist_ok=True)
 96 |         os.makedirs(CHROMA_DB_DIR, exist_ok=True)
 97 |         
 98 |         try:
 99 |             embedding = OpenAIEmbeddings(model='text-embedding-3-small')
100 |             chroma_exists = (CHROMA_DB_DIR / "chroma.sqlite3").exists()
101 |             
102 |             if chroma_exists:
103 |                 try:
104 |                     vector_db = Chroma(
105 |                         collection_name=self.knowledge_id,
106 |                         embedding_function=embedding,
107 |                         persist_directory=str(CHROMA_DB_DIR)
108 |                     )
109 |                     
110 |                     collection_data = vector_db.get()
111 | 
112 |                     if not collection_data.get("documents", []):
113 |                         logger.warning("Existing collection is empty. Creating a new one.")
114 |                         raise ValueError("Empty collection")
115 |                     
116 |                 except Exception as e:
117 |                     logger.warning(f"Failed to load existing vector store: {str(e)}. Creating a new one.")
118 |                     chroma_exists = False
119 |                     
120 |                     if CHROMA_DB_DIR.exists():
121 |                         backup_dir = f"{CHROMA_DB_DIR}_backup_{int(time.time())}"
122 |                         os.rename(CHROMA_DB_DIR, backup_dir)
123 |                         os.makedirs(CHROMA_DB_DIR, exist_ok=True)
124 |                     
125 |             if not chroma_exists:
126 |                 loader = PDFPlumberLoader(str(PDF_PATH))
127 |                 docs = loader.load()
128 |                 text_splitter = RecursiveCharacterTextSplitter(
129 |                     chunk_size=600,
130 |                     chunk_overlap=50
131 |                 )
132 |                 split_docs = text_splitter.split_documents(docs)
133 |                 
134 |                 if not split_docs:
135 |                     logger.warning("No text chunks available. Using temporary data.")
136 |                     split_docs = [
137 |                         Document(
138 |                             page_content="This is a test document chunk 1 for Dify external knowledge API.",
139 |                             metadata={
140 |                                 "path": str(PDF_PATH),
141 |                                 "description": "Test PDF document",
142 |                                 "title": PDF_PATH.name
143 |                             }
144 |                         ),
145 |                         Document(
146 |                             page_content="This is a test document chunk 2 about PDF processing and retrieval.",
147 |                             metadata={
148 |                                 "path": str(PDF_PATH),
149 |                                 "description": "Test PDF document",
150 |                                 "title": PDF_PATH.name
151 |                             }
152 |                         ),
153 |                         Document(
154 |                             page_content="This is a test document chunk 3 explaining external knowledge API implementation.",
155 |                             metadata={
156 |                                 "path": str(PDF_PATH),
157 |                                 "description": "Test PDF document",
158 |                                 "title": PDF_PATH.name
159 |                             }
160 |                         )
161 |                     ]
162 |                 
163 |                 vector_db = Chroma.from_documents(
164 |                     documents=split_docs,
165 |                     embedding=embedding,
166 |                     persist_directory=str(CHROMA_DB_DIR),
167 |                     collection_name=self.knowledge_id
168 |                 )
169 |             
170 |             state["vector_db"] = vector_db
171 |             
172 |         except Exception as e:
173 |             logger.error(f"Error during vector store initialization: {str(e)}")
174 |             raise
175 |         
176 |         return state
177 | 
178 | class RetrieverSetup:
179 |     """
180 |     Sets up semantic, keyword, and hybrid retrievers 
181 |     from the vector database.
182 |     
183 |     """
184 | 
185 |     def __call__(self, state: KnowledgeState) -> KnowledgeState:
186 |         """
187 |         Configure retrievers.
188 | 
189 |         Args:
190 |             state: Current graph state
191 | 
192 |         Returns:
193 |             Updated graph state with configured retrievers
194 | 
195 |         """
196 |         
197 |         vector_db = state.get("vector_db")
198 | 
199 |         if vector_db is None:
200 |             logger.error("Vector store not found in state.")
201 |             raise ValueError("Vector store not found in state")
202 |         
203 |         top_k = state.get("top_k", 5)
204 |         
205 |         try:
206 |             semantic_retriever = vector_db.as_retriever(
207 |                 search_kwargs={"k": top_k}
208 |             )
209 |             state["semantic_retriever"] = semantic_retriever
210 |             logger.info("Semantic retriever setup complete")
211 |             
212 |             try:
213 |                 result = vector_db.get()
214 |                 
215 |                 if "documents" in result and result["documents"]:
216 |                     docs = result["documents"]
217 |                     metadatas = result.get("metadatas", [None] * len(docs))
218 |                     logger.info(f"Retrieved {len(docs)} documents from ChromaDB.")
219 |                 else:
220 |                     logger.warning("Could not retrieve documents from ChromaDB. Creating temporary documents.")
221 |                     docs = ["This is a temporary document for testing purposes."]
222 |                     metadatas = [None]
223 |                 
224 |                 doc_objects = [
225 |                     Document(
226 |                         page_content=text,
227 |                         metadata=meta if meta else {}
228 |                     )
229 |                     for text, meta in zip(docs, metadatas)
230 |                 ]
231 |                 
232 |                 keyword_retriever = BM25Retriever.from_documents(doc_objects)
233 |                 keyword_retriever.k = top_k
234 |                 state["keyword_retriever"] = keyword_retriever
235 |                 
236 |                 hybrid_retriever = EnsembleRetriever(
237 |                     retrievers=[keyword_retriever, semantic_retriever],
238 |                     weights=[0.5, 0.5]
239 |                 )
240 |                 state["hybrid_retriever"] = hybrid_retriever
241 |                 
242 |             except Exception as inner_e:
243 |                 logger.error(f"Error during BM25 retriever setup: {str(inner_e)}")
244 |                 logger.info("Using semantic retriever only.")
245 |                 state["keyword_retriever"] = semantic_retriever  # Fallback
246 |                 state["hybrid_retriever"] = semantic_retriever   # Fallback
247 |             
248 |         except Exception as e:
249 |             logger.error(f"Error during retriever setup: {str(e)}")
250 |             raise
251 |         
252 |         return state
253 | 
254 | class PerformRetrieval:
255 |     """
256 |     Performs search using the appropriate retriever based on user query.
257 | 
258 |     """
259 | 
260 |     def __call__(self, state: KnowledgeState) -> KnowledgeState:
261 |         """
262 |         Execute retrieval process.
263 | 
264 |         Args:
265 |             state: Current graph state
266 | 
267 |         Returns:
268 |             Updated graph state with search results
269 | 
270 |         """
271 |         
272 |         query = state.get("query", "")
273 |         search_method = state.get("search_method", "hybrid_search")
274 |         top_k = state.get("top_k", 5)
275 |         score_threshold = state.get("score_threshold", 0.5) 
276 |         logger.info(f"Performing search: query='{query}', method={search_method}, top_k={top_k}")
277 |         
278 |         retriever = None
279 | 
280 |         if search_method == "keyword_search":
281 |             retriever = state.get("keyword_retriever")
282 |         elif search_method == "semantic_search":
283 |             retriever = state.get("semantic_retriever")
284 |         elif search_method == "hybrid_search":
285 |             retriever = state.get("hybrid_retriever")
286 |         elif search_method == "full_text_search":
287 |             retriever = state.get("keyword_retriever")
288 |         else:
289 |             retriever = state.get("hybrid_retriever")
290 |         
291 |         if not retriever:
292 |             logger.error(f"Retriever not found: {search_method}")
293 |             retriever = state.get("hybrid_retriever")
294 |             if not retriever:
295 |                 raise ValueError(f"No retriever available in state")
296 |             
297 |             logger.warning(f"Could not find {search_method} retriever, using hybrid retriever instead.")
298 |         
299 |         try:
300 |             docs = retriever.get_relevant_documents(query)  
301 |             docs = docs[:top_k]
302 |             
303 |             results = []
304 |             for i, doc in enumerate(docs):
305 |                 metadata = doc.metadata.copy() if hasattr(doc, 'metadata') and doc.metadata else {}
306 |                 score = max(0.95 - (i * 0.1), score_threshold)
307 |                 
308 |                 results.append({
309 |                     "metadata": metadata,
310 |                     "score": score,
311 |                     "title": doc.metadata.get("Title", doc.metadata.get("title", "Document chunk")),
312 |                     "content": doc.page_content
313 |                 })
314 |             
315 |             state["results"] = results
316 | 
317 |             if not results:
318 |                 logger.warning("No search results. Adding default response.")
319 | 
320 |                 state["results"] = [{
321 |                     "metadata": {
322 |                         "path": str(PDF_PATH),
323 |                         "description": "Default response"
324 |                     },
325 |                     "score": 0.5,
326 |                     "title": "Default response",
327 |                     "content": f"No relevant documents found for query: '{query}'"
328 |                 }]
329 |                 
330 |         except Exception as e:
331 |             logger.error(f"Error during retrieval: {str(e)}")
332 | 
333 |             state["results"] = [{
334 |                 "metadata": {
335 |                     "path": "error",
336 |                     "description": "Error occurred during retrieval"
337 |                 },
338 |                 "score": 0.5,
339 |                 "title": "Error",
340 |                 "content": f"An error occurred during retrieval: {str(e)}"
341 |             }]
342 |         
343 |         return state
344 | 
345 | 
346 | ###### STEP 3. Graph Creation and Compilation ######
347 | 
348 | def create_knowledge_graph():
349 |     """
350 |     Creates a LangGraph-based knowledge retrieval graph.
351 |     
352 |     Returns:
353 |         Compiled graph instance
354 |         
355 |     """
356 |     
357 |     graph_builder = StateGraph(KnowledgeState)
358 |     
359 |     graph_builder.add_node("document_processor", DocumentProcessor())
360 |     graph_builder.add_node("retriever_setup", RetrieverSetup())
361 |     graph_builder.add_node("perform_retrieval", PerformRetrieval())
362 |     
363 |     graph_builder.add_edge(START, "document_processor")
364 |     graph_builder.add_edge("document_processor", "retriever_setup")
365 |     graph_builder.add_edge("retriever_setup", "perform_retrieval")
366 |     graph_builder.add_edge("perform_retrieval", END)
367 |     
368 |     return graph_builder.compile()
369 | 
370 | 
371 | ###### STEP 4. Graph Instance Creation ######
372 | 
373 | try:
374 |     knowledge_graph = create_knowledge_graph()
375 |     logger.info("Knowledge graph instance creation complete")
376 | 
377 | except Exception as e:
378 |     logger.error(f"Error creating knowledge graph: {str(e)}")
379 |     knowledge_graph = None
380 | 
381 | 
382 | ###### STEP 5. API Request and Response Class Definition ######
383 | 
384 | class RetrievalSetting(BaseModel):
385 |     """Retrieval settings model"""
386 | 
387 |     top_k: Annotated[int, "Maximum number of results to return"]
388 |     score_threshold: Annotated[float, "Minimum relevance score for inclusion (0.0-1.0)"]
389 | 
390 | 
391 | class ExternalKnowledgeRequest(BaseModel):
392 |     """External knowledge API request model"""
393 | 
394 |     knowledge_id: Annotated[str, "ID of the knowledge base to search"]
395 |     query: Annotated[str, "User search query"]
396 |     search_method: Annotated[str, "Search method (semantic_search, keyword_search, hybrid_search)"] = "hybrid_search"
397 |     retrieval_setting: Annotated[RetrievalSetting, "Retrieval settings"]
398 | 
399 | 
400 | ###### STEP 6. API Key Validation Function ######
401 | 
402 | async def verify_api_key(authorization: str = Header(...)):
403 |     """API key validation function"""
404 | 
405 |     if not authorization.startswith("Bearer "):
406 |         logger.warning("Invalid Authorization header format")
407 | 
408 |         raise HTTPException(
409 |             status_code=403,
410 |             detail={
411 |                 "error_code": 1001,
412 |                 "error_msg": "Invalid Authorization header format. Expected 'Bearer ' format."
413 |             }
414 |         )
415 |     
416 |     token = authorization.replace("Bearer ", "")
417 | 
418 |     if token != API_KEY:
419 |         logger.warning("Authentication failed: Invalid API key")
420 | 
421 |         raise HTTPException(
422 |             status_code=403,
423 |             detail={
424 |                 "error_code": 1002,
425 |                 "error_msg": "Authorization failed"
426 |             }
427 |         )
428 |     
429 |     return token
430 | 
431 | 
432 | ###### STEP 7. API Endpoint Definition ######
433 | 
434 | @app.post("/retrieval")
435 | async def retrieve_knowledge(
436 |     request: ExternalKnowledgeRequest,
437 |     token: str = Depends(verify_api_key)):
438 |     """Document retrieval API endpoint"""
439 | 
440 |     logger.info(f"API request received: query='{request.query}'")
441 |     
442 |     if knowledge_graph is None:
443 |         logger.error("Knowledge graph is not initialized.")
444 | 
445 |         raise HTTPException(status_code=500, detail="Knowledge graph is not initialized")
446 |     
447 |     initial_state = KnowledgeState(
448 |         query=request.query,
449 |         search_method=request.search_method,
450 |         top_k=request.retrieval_setting.top_k,
451 |         score_threshold=request.retrieval_setting.score_threshold,
452 |         results=[],
453 |         vector_db=None,
454 |         semantic_retriever=None,
455 |         keyword_retriever=None,
456 |         hybrid_retriever=None
457 |     )
458 |     
459 |     try:
460 |         final_state = knowledge_graph.invoke(initial_state)
461 |         results = final_state.get("results", [])
462 |         
463 |         response_records = []
464 | 
465 |         for r in results:
466 |             metadata = r.get("metadata", {})
467 |             if not metadata:
468 |                 metadata = {"path": "unknown", "description": ""}
469 | 
470 |             response_records.append({
471 |                 "metadata": metadata,
472 |                 "score": r.get("score", 0.5),
473 |                 "title": r.get("title", "Document"),
474 |                 "content": r.get("content", "No content")
475 |             })
476 |         
477 |         return {"records": response_records}
478 |     
479 |     except Exception as e:
480 |         logger.error(f"Error during knowledge graph execution: {str(e)}")
481 | 
482 |         return {"records": [{
483 |             "metadata": {
484 |                 "path": "error",
485 |                 "description": "Error response"
486 |             },
487 |             "score": 0.5,
488 |             "title": "Error",
489 |             "content": f"An error occurred: {str(e)}"
490 |         }]}
491 | 
492 | @app.get("/health")
493 | async def health_check():
494 |     """Server health check endpoint"""
495 | 
496 |     health_status = {
497 |         "status": "healthy" if knowledge_graph is not None else "unhealthy",
498 |         "knowledge_graph_initialized": knowledge_graph is not None,
499 |         "openai_api_key_set": os.getenv("OPENAI_API_KEY") is not None,
500 |         "data_directory_exists": DATA_DIR.exists(),
501 |         "chroma_db_directory_exists": CHROMA_DB_DIR.exists(),
502 |         "pdf_exists": PDF_PATH.exists()
503 |     }
504 |     
505 |     return health_status
506 | 
507 | if __name__ == "__main__":
508 |     logger.info("Starting server...")
509 |     uvicorn.run(app, host="0.0.0.0", port=8000)


--------------------------------------------------------------------------------
/case2/mcp_server.py:
--------------------------------------------------------------------------------
  1 | import httpx
  2 | import os
  3 | import json
  4 | from typing import Dict
  5 | 
  6 | from mcp.server.fastmcp import FastMCP
  7 | from dotenv import load_dotenv
  8 | 
  9 | load_dotenv()
 10 | 
 11 | API_ENDPOINT = os.getenv("DIFY_API_ENDPOINT", "http://localhost:8000/retrieval")
 12 | API_KEY = os.getenv("DIFY_API_KEY", "dify-external-knowledge-api-key")
 13 | KNOWLEDGE_ID = os.getenv("DIFY_KNOWLEDGE_ID", "test-knowledge-base")
 14 | 
 15 | mcp = FastMCP(
 16 |     name="Dify External Knowledge API",
 17 |     version="0.0.1",
 18 |     description="Three search methods(semantic_search, keyword_search, hybrid_search) using the Dify External Knowledge API specification"
 19 | )
 20 | 
 21 | def format_search_results(data: Dict) -> str:
 22 |     """Formats search results in a readable form."""
 23 | 
 24 |     records = data.get("records", [])
 25 |     
 26 |     if not records:
 27 |         return "No search results found."
 28 |     
 29 |     formatted_results = "# Search Results\n\n"
 30 |     
 31 |     for i, record in enumerate(records):
 32 |         content = record.get("content", "")
 33 |         score = record.get("score", 0)
 34 |         title = record.get("title", f"Result {i+1}")
 35 |         metadata = record.get("metadata", {})
 36 |         
 37 |         # Extract metadata if available
 38 |         source_info = []
 39 |         if "title" in metadata:
 40 |             source_info.append(f"File: {os.path.basename(metadata['title'])}")
 41 |         elif "path" in metadata:
 42 |             source_info.append(f"File: {os.path.basename(metadata['path'])}")
 43 |         if "page" in metadata:
 44 |             source_info.append(f"Page: {metadata['page']}")
 45 |             
 46 |         source_text = " | ".join(source_info) if source_info else "No source information"
 47 |         
 48 |         formatted_results += f"## {title} (Relevance: {score:.2f})\n"
 49 |         formatted_results += f"{source_text}\n\n"
 50 |         formatted_results += f"{content}\n\n"
 51 |         formatted_results += "---\n\n"
 52 |     
 53 |     formatted_results += "This information was retrieved via the Dify External Knowledge API."
 54 |     return formatted_results
 55 | 
 56 | @mcp.tool()
 57 | async def dify_ek_search(
 58 |     query: str, 
 59 |     top_k: int = 5, 
 60 |     score_threshold: float = 0.5,
 61 |     search_method: str = "hybrid_search",
 62 |     ctx = None
 63 | ) -> str:
 64 |     """
 65 |     Searches for information in Dify External knowledge base.
 66 |     Returns search results with document content, relevance scores, and source information.
 67 |     Use when you need to find specific information in enterprise documents, knowledge bases, or specialized content.
 68 |     
 69 |     Parameters:
 70 |         query: Search question or keywords
 71 |         top_k: Maximum number of results to return
 72 |         score_threshold: Minimum relevance score for inclusion (0.0-1.0)
 73 |         search_method: Search method (semantic_search, keyword_search, hybrid_search)
 74 | 
 75 |     """
 76 | 
 77 |     if ctx:
 78 |         ctx.info(f"Search query: {query}")
 79 |         ctx.info(f"Maximum results: {top_k}")
 80 |         ctx.info(f"Minimum score: {score_threshold}")
 81 |     
 82 |     # Input validation
 83 |     if not query or not query.strip():
 84 |         return "Error: Search query is empty."
 85 |     
 86 |     if top_k < 1:
 87 |         top_k = 1
 88 |     elif top_k > 20:
 89 |         top_k = 20
 90 |     
 91 |     if score_threshold < 0:
 92 |         score_threshold = 0
 93 |     elif score_threshold > 1:
 94 |         score_threshold = 1
 95 |     
 96 |     try:
 97 |         if ctx:
 98 |             ctx.info(f"Calling Dify API: {API_ENDPOINT}")
 99 |         
100 |         request_data = {
101 |             "knowledge_id": KNOWLEDGE_ID,
102 |             "query": query,
103 |             "search_method": search_method,
104 |             "retrieval_setting": {
105 |                 "top_k": top_k,
106 |                 "score_threshold": score_threshold
107 |             }
108 |         }
109 |         
110 |         async with httpx.AsyncClient(timeout=30.0) as client:
111 |             response = await client.post(
112 |                 API_ENDPOINT,
113 |                 headers={
114 |                     "Content-Type": "application/json",
115 |                     "Authorization": f"Bearer {API_KEY}"
116 |                 },
117 |                 json=request_data
118 |             )
119 |             
120 |             if response.status_code != 200:
121 |                 error_message = f"Dify API error: HTTP {response.status_code}"
122 |                 try:
123 |                     error_detail = response.json()
124 |                     if isinstance(error_detail, dict) and "error_msg" in error_detail:
125 |                         error_message += f" - {error_detail['error_msg']}"
126 |                 except:
127 |                     error_message += f" - {response.text[:100]}"
128 |                 
129 |                 if ctx:
130 |                     ctx.error(error_message)
131 |                 return f"Search failed\n\n{error_message}"
132 |             
133 |             try:
134 |                 data = response.json()
135 |                 return format_search_results(data)
136 |                 
137 |             except json.JSONDecodeError:
138 |                 if ctx:
139 |                     ctx.error("JSON parsing error")
140 |                 return "Search failed\n\nCould not parse API response."
141 |             
142 |     except httpx.RequestError as e:
143 |         error_message = f"API request error: {str(e)}"
144 |         if ctx:
145 |             ctx.error(error_message)
146 |         return f"Search failed\n\n{error_message}"
147 |     
148 |     except Exception as e:
149 |         error_message = f"Unexpected error: {str(e)}"
150 |         if ctx:
151 |             ctx.error(error_message)
152 |         return f"Search failed\n\n{error_message}"
153 | 
154 | @mcp.prompt()
155 | def ai_trend_learning_guide(
156 |     topic: str = "",
157 |     learning_level: str = "beginner",
158 |     time_horizon: str = "short-term"
159 | ) -> str:
160 |     """
161 |     Creates customized AI learning guides based on SPRI monthly AI reports.
162 |     Provides latest AI trends and learning roadmaps tailored to learner level and desired timeframe.
163 |     Perfect for educational planning, career development in AI, or understanding current AI landscape.
164 |     
165 |     Parameters:
166 |         topic: AI topic of interest (optional - e.g., "generative AI", "computer vision", "NLP")
167 |         learning_level: Learner level ("beginner", "intermediate", "advanced")
168 |         time_horizon: Learning plan duration ("short-term", "medium-term", "long-term")
169 | 
170 |     """
171 |     
172 |     level_approaches = {
173 |         "beginner": "Focuses on understanding basic concepts and principles, with practical learning paths.",
174 |         "intermediate": "Focuses on advanced concepts and practical project implementation, with paths to improve application skills.",
175 |         "advanced": "Focuses on latest research trends and advanced technology implementation, with innovative approaches and expertise enhancement."
176 |     }
177 |     
178 |     time_plans = {
179 |         "short-term": "Proposes intensive learning plans centered on core skills and knowledge that can be acquired within 1-3 months.",
180 |         "medium-term": "Proposes step-by-step learning plans to systematically build capabilities over 3-6 months.",
181 |         "long-term": "Proposes comprehensive learning plans to develop expertise from a long-term perspective of 6 months to 1 year."
182 |     }
183 |     
184 |     level_approach = level_approaches.get(learning_level, level_approaches["beginner"])
185 |     time_plan = time_plans.get(time_horizon, time_plans["short-term"])
186 |     
187 |     output_template = f"""
188 |     # {topic if topic else 'AI Trends'} Learning Guide
189 |     
190 |     ## 1. Trend Analysis
191 |     - Key trends
192 |     - Technological changes
193 |     - Industry impact
194 |     
195 |     ## 2. Core Knowledge Areas
196 |     - Basic concepts
197 |     - Core technologies
198 |     - Key algorithms/methodologies
199 |     
200 |     ## 3. Learning Roadmap
201 |     - Step-by-step learning plan
202 |     - Recommended resources
203 |     - Practical projects
204 |     
205 |     ## 4. Career and Application Opportunities
206 |     - Related roles/positions
207 |     - Industry use cases
208 |     - Future outlook
209 |     """
210 |     
211 |     # Generate final prompt
212 |     prompt = (
213 |         f"You are an AI learning guide expert who analyzes the latest AI trends based on SPRI monthly AI reports "
214 |         f"and provides customized learning directions.\n\n"
215 |         
216 |         f"## Learner Profile\n"
217 |         f"- Level: {learning_level} ({level_approach})\n"
218 |         f"- Learning Plan: {time_horizon} ({time_plan})\n\n"
219 |         
220 |         f"## Analysis Target\n"
221 |         f"Please analyze based on the March issue of the SPRI monthly AI report. "
222 |         f"{'Please focus on ' + topic + '-related content in your analysis.' if topic else 'Please analyze overall AI trends.'}\n\n"
223 |         
224 |         f"## Information to Provide\n"
225 |         f"1. Summary of latest AI trends and their importance\n"
226 |         f"2. Core knowledge and technical elements in the field\n"
227 |         f"3. Step-by-step learning plan and recommended resources\n"
228 |         f"4. Practical application suggestions and career recommendations\n\n"
229 |         
230 |         f"Please structure your analysis results as follows:\n\n{output_template}\n\n"
231 |         
232 |         f"Search the report to provide practical and specific information. Suggest learning directions "
233 |         f"that align with the latest trends, and create a practical guide that learners can easily follow."
234 |     )
235 |     
236 |     return prompt
237 | 
238 | @mcp.resource("help: hantaek@brain-crew.com")
239 | def get_help() -> str:
240 |     """Provides help for Dify knowledge search in Claude Desktop."""
241 | 
242 |     return """
243 |             # Dify External Knowledge Search MCP Tool Usage Guide
244 | 
245 |             Enables Claude to search for information in documents using the Dify External Knowledge API.
246 | 
247 |             ## Available Tools
248 | 
249 |             1. search_knowledge - Search for information in the knowledge base
250 |             - `query`: Search query
251 |             - `top_k`: Maximum number of results to return (default: 5)
252 |             - `score_threshold`: Minimum relevance score (default: 0.5)
253 |             - `search_method`: Search method (semantic, keyword, hybrid) (default: hybrid)
254 | 
255 |             2. prompt(ai_trend_learning_guide)
256 |             - `topic`: AI topic of interest (optional - e.g., "generative AI", "computer vision", "NLP")
257 |             - `learning_level`: Learner level ("beginner", "intermediate", "advanced")
258 |             - `time_horizon`: Learning plan duration ("short-term", "medium-term", "long-term")
259 | 
260 |             """
261 | 
262 | if __name__ == "__main__":
263 |     mcp.run()


--------------------------------------------------------------------------------
/case3/auto_mcp_json.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import json
 3 | import sys
 4 | from pathlib import Path
 5 | 
 6 | from dotenv import load_dotenv
 7 | 
 8 | 
 9 | def get_env_variables():
10 |     """Loads environment variables and returns them as a dictionary."""
11 | 
12 |     load_dotenv()
13 | 
14 |     required_vars = ["DIFY_BASE_URL", "DIFY_APP_SK"]
15 | 
16 |     env_dict = {}
17 | 
18 |     for var in required_vars:
19 |         value = os.getenv(var)
20 |         if value:
21 |             env_dict[var] = value
22 | 
23 |     return env_dict
24 | 
25 | 
26 | def create_mcp_json():
27 |     """Creates MCP server configuration JSON file."""
28 | 
29 |     project_root = Path(__file__).parent.absolute()
30 | 
31 |     # .venv python executable path
32 |     if os.name == 'nt':  # Windows
33 |         python_path = str(project_root.parent / ".venv" / "Scripts" / "python.exe")
34 |     else:  # Mac, Ubuntu etc
35 |         python_path = str(project_root.parent / ".venv" / "bin" / "python")
36 | 
37 |     server_script = project_root / "mcp_server.py"
38 | 
39 |     env_vars = get_env_variables()
40 | 
41 |     config = {
42 |         "mcpServers": {
43 |             "dify-workflow": {
44 |                 "command": python_path,
45 |                 "args": [str(server_script)],
46 |                 "env": env_vars,
47 |             }
48 |         }
49 |     }
50 | 
51 |     json_path = project_root / "mcp_config.json"
52 | 
53 |     with open(json_path, "w", encoding="utf-8") as f:
54 |         json.dump(config, f, indent=2)
55 | 
56 |     print(f"MCP configuration file created: {json_path}")
57 |     print(f"Generated environment variables: {', '.join(env_vars.keys())}")
58 | 
59 |     return str(json_path)
60 | 
61 | 
62 | if __name__ == "__main__":
63 |     create_mcp_json()
64 | 


--------------------------------------------------------------------------------
/case3/mcp_server.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import requests
 3 | 
 4 | from dotenv import load_dotenv
 5 | from mcp.server.fastmcp import FastMCP
 6 | 
 7 | load_dotenv()
 8 | 
 9 | mcp = FastMCP(
10 |     name="Dify Workflow",
11 |     version="0.0.1",
12 |     description="Retrieve Dify Workflow execution results"
13 | )
14 | 
15 | @mcp.tool()
16 | async def dify_workflow(input: str) -> str:
17 |     """
18 |     Executes a Dify workflow and returns the results.
19 |     Automates complex AI tasks and provides immediate results.
20 |     Useful for text analysis, content generation, or processing user inputs through Dify workflows.
21 |     
22 |     Parameters:
23 |         input: Input text to process
24 |         
25 |     """
26 | 
27 |     dify_base_url = os.getenv("DIFY_BASE_URL")
28 |     dify_app_sk = os.getenv("DIFY_APP_SK")
29 |     
30 |     url = f"{dify_base_url}/workflows/run"
31 |     headers = {
32 |         "Authorization": f"Bearer {dify_app_sk}",
33 |         "Content-Type": "application/json"
34 |     }
35 |     data = {
36 |         "inputs": {"input": input},
37 |         "response_mode": "blocking",
38 |         "user": "default_user",
39 |     }
40 |     
41 |     response = requests.post(url, headers=headers, json=data)
42 |     response.raise_for_status()
43 |     result = response.json()
44 |     
45 |     outputs = {}
46 | 
47 |     if "outputs" in result.get("data", {}):
48 |         outputs = result["data"]["outputs"]
49 |     
50 |     return next(iter(outputs.values()), "No output received from Dify workflow.")
51 | 
52 | if __name__ == "__main__":
53 |     mcp.run()


--------------------------------------------------------------------------------
/case4/auto_mcp_json.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import json
 3 | import sys
 4 | from pathlib import Path
 5 | 
 6 | from dotenv import load_dotenv
 7 | 
 8 | 
 9 | def get_env_variables():
10 |     """Loads environment variables and returns them as a dictionary."""
11 | 
12 |     load_dotenv()
13 | 
14 |     required_vars = [
15 |         "TAVILY_API_KEY",
16 |     ]
17 | 
18 |     env_dict = {}
19 | 
20 |     for var in required_vars:
21 |         value = os.getenv(var)
22 |         if value:
23 |             env_dict[var] = value
24 | 
25 |     return env_dict
26 | 
27 | 
28 | def create_mcp_json():
29 |     """Creates MCP server configuration JSON file."""
30 | 
31 |     project_root = Path(__file__).parent.absolute()
32 | 
33 |     # .venv python executable path
34 |     if os.name == 'nt':  # Windows
35 |         python_path = str(project_root.parent / ".venv" / "Scripts" / "python.exe")
36 |     else:  # Mac, Ubuntu etc
37 |         python_path = str(project_root.parent / ".venv" / "bin" / "python")
38 | 
39 |     server_script = project_root / "mcp_server.py"
40 | 
41 |     env_vars = get_env_variables()
42 | 
43 |     config = {
44 |         "mcpServers": {
45 |             "tavily-web-search": {
46 |                 "command": python_path,
47 |                 "args": [str(server_script)],
48 |                 "env": env_vars,
49 |             }
50 |         }
51 |     }
52 | 
53 |     json_path = project_root / "mcp_config.json"
54 | 
55 |     with open(json_path, "w", encoding="utf-8") as f:
56 |         json.dump(config, f, indent=2)
57 | 
58 |     print(f"MCP configuration file created: {json_path}")
59 |     print(f"Generated environment variables: {', '.join(env_vars.keys())}")
60 | 
61 |     return str(json_path)
62 | 
63 | 
64 | if __name__ == "__main__":
65 |     create_mcp_json()
66 | 


--------------------------------------------------------------------------------
/case4/mcp_server.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | 
  3 | from dotenv import load_dotenv
  4 | from mcp.server.fastmcp import FastMCP
  5 | from tavily import TavilyClient
  6 | 
  7 | load_dotenv()
  8 | 
  9 | tavily_api_key = os.getenv("TAVILY_API_KEY")
 10 | tavily_client = TavilyClient(api_key = tavily_api_key)
 11 | 
 12 | websearch_config = {
 13 |     "parameters": {
 14 |         "default_num_results": 5,
 15 |         "include_domains": []
 16 |     }
 17 | }
 18 | 
 19 | mcp = FastMCP(
 20 |     name="web_search", 
 21 |     version="1.0.0",
 22 |     description="Web search capability using Tavily API that provides real-time internet search results. Supports both basic and advanced search with filtering options including domain restrictions, text inclusion requirements, and date filtering. Returns formatted results with titles, URLs, publication dates, and content summaries."
 23 | )
 24 | 
 25 | def format_search_results(response):
 26 |     """
 27 |     Converts search results to markdown format.
 28 |     
 29 |     Args:
 30 |         search_results: Tavily search results
 31 |         
 32 |     Returns:
 33 |         String in markdown format
 34 | 
 35 |     """
 36 | 
 37 |     if not response.get("results"):
 38 |         return "No results found."
 39 | 
 40 |     markdown_results = "### Search Results:\n\n"
 41 | 
 42 |     for idx, result in enumerate(response.get("results", []), 1):
 43 |         title = result.get("title", "No title")
 44 |         url = result.get("url", "")
 45 |         published_date = result.get("published_date", "")
 46 |         content = result.get("content", "")
 47 |         score = result.get("score", 0)
 48 |         
 49 |         date_info = f" (Published: {published_date})" if published_date else ""
 50 |         
 51 |         markdown_results += f"**{idx}.** [{title}]({url}){date_info}\n"
 52 |         markdown_results += f"**Relevance Score:** {score:.2f}\n"
 53 |         
 54 |         if content:
 55 |             markdown_results += f"> **Content:** {content}\n\n"
 56 |         else:
 57 |             markdown_results += "\n"
 58 |     
 59 |     if response.get("answer"):
 60 |         markdown_results += f"\n### Answer:\n{response.get('answer')}\n\n"
 61 |     
 62 |     if response.get("response_time"):
 63 |         markdown_results += f"\n*Search completed in {response.get('response_time'):.2f} seconds*"
 64 |     
 65 |     return markdown_results
 66 |     
 67 | @mcp.tool()
 68 | async def search_web(query: str, num_results: int = None) -> str:
 69 |     """
 70 |     Performs real-time web search using the Tavily API.
 71 |     Returns latest search results in markdown format including titles, URLs, and content summaries.
 72 |     Use when you need current information, recent events, or data not available in your training.
 73 | 
 74 |     
 75 |     Parameters:
 76 |         query: Search query
 77 |         num_results: Number of results to return (default: 5)
 78 | 
 79 |     """
 80 | 
 81 |     try:
 82 |         search_args = {
 83 |             "max_results": num_results or websearch_config["parameters"]["default_num_results"],
 84 |             "search_depth": "basic"
 85 |         }
 86 |         
 87 |         search_results = tavily_client.search(
 88 |             query=query,
 89 |             **search_args
 90 |         )
 91 |         
 92 |         return format_search_results(search_results)
 93 |     except Exception as e:
 94 |         return f"Error occurred during Tavily search: {e}"
 95 | 
 96 | @mcp.resource("help: dev@brain-crew.com")
 97 | def get_search_help() -> str:
 98 |     """Provides help for web search tools."""
 99 | 
100 |     return """
101 |             # Web Search Tool Usage Guide
102 |             
103 |             Provides Claude with real-time web search capability through the Tavily API.
104 |             
105 |             ## Web Search
106 |             The `search_web` tool performs simple web searches.
107 |             - Parameters: 
108 |             - query: Search query
109 |             - num_results: Number of results to return (optional, default: 5)
110 |             
111 |             ## Examples
112 |             - Web search: "I'm curious about the latest AI development trends"
113 | 
114 |             """
115 | 
116 | if __name__ == "__main__":
117 |     mcp.run()


--------------------------------------------------------------------------------
/data/.gitkeep:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/teddynote-lab/mcp-usecase/89e147c81802ba5e8c4511aa4d111681161410eb/data/.gitkeep


--------------------------------------------------------------------------------
/docs/case1.md:
--------------------------------------------------------------------------------
  1 | # RAG(Retrieval Augmented Generation) 예제
  2 | 
  3 | [English](#rag-retrieval-augmented-generation-example) | [한국어](#rag-retrieval-augmented-generation-예제-1)
  4 | 
  5 | ## RAG (Retrieval Augmented Generation) 예제
  6 | 
  7 | 이 예제에서는 PDF 문서를 대상으로 키워드 검색, 시맨틱 검색, 하이브리드 검색 기능을 구현한 MCP 서버를 제공합니다.
  8 | 
  9 | ### 기능
 10 | 
 11 | - **키워드 검색**: 문서 내에서 특정 키워드와 정확히 일치하는 내용을 검색합니다.
 12 | - **시맨틱 검색**: 임베딩 모델을 사용하여 의미적으로 유사한 내용을 검색합니다.
 13 | - **하이브리드 검색**: 키워드 검색과 시맨틱 검색을 결합하여 보다 정확한 결과를 제공합니다.
 14 | 
 15 | ### 설정
 16 | 
 17 | 다음 환경 변수를 루트 디렉토리의 `.env` 파일에 설정해야 합니다.
 18 | 
 19 | ```
 20 | OPENAI_API_KEY = "sk-"
 21 | ```
 22 | 
 23 | ### 사용 방법
 24 | 
 25 | 1. 환경 설정 확인
 26 |    ```bash
 27 |    # case1 디렉토리로 이동
 28 |    cd case1
 29 |    
 30 |    # 필요한 환경 변수 설정 확인
 31 |    # .env 파일이 올바르게 구성되었는지 확인하세요
 32 |    ```
 33 | 
 34 | 2. JSON 파일 생성
 35 |    ```bash
 36 |    # 가상 환경 활성화 (아직 활성화하지 않은 경우)
 37 |    source ../.venv/bin/activate  # macOS/Linux
 38 |    ..\.venv\Scripts\activate      # Windows
 39 |    
 40 |    # JSON 파일 생성
 41 |    python auto_mcp_json.py
 42 |    ```
 43 | 
 44 | 3. Claude Desktop 또는 Cursor에 적용
 45 |    - 생성된 JSON 내용을 복사
 46 |    - Claude Desktop 또는 Cursor의 MCP 설정에 붙여넣기
 47 |    - 설정 저장 및 적용
 48 | 
 49 | ### 사용 예시
 50 | 
 51 | Claude Desktop 또는 Cursor에서 다음과 같이 사용할 수 있습니다.
 52 | 
 53 | - **키워드 검색**: "키워드 검색으로 AI에 대한 정의를 찾아줄래?"
 54 | - **시맨틱 검색**: "시멘틱 검색을 해서 인공지능의 최근 발전 동향을 알려줘."
 55 | - **하이브리드 검색**: "하이브리드 검색을 해서 최근 LLM의 활용 사례를 알려줘."
 56 | 
 57 | ### 구현 세부사항
 58 | 
 59 | `case1/mcp_server.py` 파일은 다음과 같은 주요 구성 요소로 이루어져 있습니다.
 60 | 
 61 | 1. PDF 파일 경로 설정 및 RAG 체인 초기화
 62 | 2. 검색 결과 포맷팅 함수
 63 | 3. 키워드, 시맨틱, 하이브리드 검색 도구 정의
 64 | 
 65 | ### 🚨 도구 Docstring 최적화
 66 | 
 67 | 이 예제에서 제공하는 도구들이 Claude와 같은 AI 에이전트에 의해 효과적으로 사용되기 위해서는 명확하고 맥락적인 docstring을 작성하는 것이 중요합니다.
 68 | 
 69 | #### 왜 Docstring이 중요한가요?
 70 | 
 71 | `@mcp.tool()`로 도구를 정의할 때, 제공하는 docstring은 Claude가 도구를 이해하고 사용하는 방식에 직접적인 영향을 미칩니다. Claude는 다음과 같은 목적으로 docstring을 읽습니다.
 72 | 
 73 | 1. **도구의 목적 이해**: Claude는 docstring을 분석하여 도구가 무엇을 하는지 파악합니다.
 74 | 2. **사용 시점 결정**: Claude는 해당 도구를 사용해야 할 상황을 판단합니다.
 75 | 3. **매개변수 형식 파악**: Claude는 필수 및 선택적 매개변수를 학습합니다.
 76 | 
 77 | 사용자가 명시적으로 도구 이름을 언급하지 않더라도, 잘 작성된 docstring을 통해 Claude가 상황에 맞게 적절한 도구를 선택할 수 있습니다. 이는 더 자연스러운 대화 흐름과 최상의 결과를 얻는 데 필수적입니다.
 78 | 
 79 | #### 효과적인 Docstring 구조
 80 | 
 81 | 최적의 결과를 위해 docstring을 다음과 같이 구성하세요:
 82 | 
 83 | ```python
 84 | @mcp.tool()
 85 | async def your_tool_name(param1: str, param2: int = 5) -> str:
 86 |     """
 87 |     도구가 하는 일에 대한 짧은 설명 (1줄).
 88 |     결과 또는 출력 형식에 대한 자세한 내용 (1줄).
 89 |     이 도구를 사용해야 하는 상황에 대한 맥락적 힌트 (1줄).
 90 |     
 91 |     Parameters:
 92 |         param1: 첫 번째 매개변수 설명
 93 |         param2: 기본값이 있는 두 번째 매개변수 설명
 94 |     """
 95 |     # 함수 구현...
 96 | ```
 97 | 
 98 | 
 99 | 
100 | 이러한 docstring을 통해 Claude는 다음과 같은 상황에서 지능적으로 도구를 선택할 수 있습니다.
101 | - 사용자가 "문서에서 X의 정의는 무엇인가요?"라고 물으면 **keyword_search** 선택
102 | - 사용자가 "문서에서 X 개념에 대해 설명해주세요"라고 물으면 **semantic_search** 선택
103 | - 사용자가 "문서에서 X에 대해 무엇이라고 하나요?"라고 물으면 **hybrid_search** 선택
104 | 
105 | 이처럼 사용자가 명시적으로 도구 이름을 언급하지 않더라도, 맥락적 힌트를 통해 Claude가 올바른 도구를 선택할 수 있습니다.
106 | 
107 | ---
108 | 
109 | ## RAG (Retrieval Augmented Generation) Example
110 | 
111 | This example provides an MCP server that implements keyword search, semantic search, and hybrid search functionality for PDF documents.
112 | 
113 | ### Features
114 | 
115 | - **Keyword Search**: Searches for content that exactly matches specific keywords in documents.
116 | - **Semantic Search**: Uses embedding models to search for semantically similar content.
117 | - **Hybrid Search**: Combines keyword and semantic search to provide more accurate results.
118 | 
119 | ### Configuration
120 | 
121 | Please ensure that the following environment variables are configured in the `.env` file at the root directory.
122 | 
123 | ```
124 | OPENAI_API_KEY = "sk-"
125 | ```
126 | 
127 | ### Usage Instructions
128 | 
129 | 1. Check environment configuration
130 |    ```bash
131 |    # Navigate to case1 directory
132 |    cd case1
133 |    
134 |    # Check the required environment variables
135 |    # Make sure the .env file is properly configured
136 |    ```
137 | 
138 | 2. Generate JSON file
139 |    ```bash
140 |    # Activate virtual environment (if not already activated)
141 |    source ../.venv/bin/activate  # macOS/Linux
142 |    ..\.venv\Scripts\activate      # Windows
143 |    
144 |    # Generate JSON file
145 |    python auto_mcp_json.py
146 |    ```
147 | 
148 | 3. Apply to Claude Desktop or Cursor
149 |    - Copy the generated JSON content
150 |    - Paste it into the MCP settings of Claude Desktop or Cursor
151 |    - Save and apply settings
152 | 
153 | ### Usage Examples
154 | 
155 | You can use it in Claude Desktop or Cursor as follows:
156 | 
157 | - **Keyword Search**: "Can you find the definition of AI using keyword search?"
158 | - **Semantic Search**: "Could you use semantic search to tell me about recent developments in AI?"
159 | - **Hybrid Search**: "Tell me about use cases of LLM using hybrid search."
160 | 
161 | ### Implementation Details
162 | 
163 | The `case1/mcp_server.py` file consists of the following main components:
164 | 
165 | 1. PDF file path setup and RAG chain initialization
166 | 2. Search results formatting function
167 | 3. Definition of keyword, semantic, and hybrid search tools
168 | 
169 | The vector database uses Chroma DB to efficiently store and search the contents of PDF documents.
170 | 
171 | ### 🚨 Optimizing Tool Docstrings
172 | 
173 | For the tools in this example to be effectively used by AI agents like Claude, it's important to write clear and contextual docstrings.
174 | 
175 | #### Why Docstrings Matter
176 | 
177 | When defining tools with `@mcp.tool()`, the docstring you provide directly influences how Claude understands and uses the tool. Claude reads these docstrings to:
178 | 
179 | 1. **Understand the tool's purpose**: Claude analyzes the docstring to know what the tool does
180 | 2. **Decide when to use it**: Claude determines which situations call for this specific tool
181 | 3. **Know how to format parameters**: Claude learns the required and optional parameters
182 | 
183 | Even when users don't explicitly mention the tool name, well-written docstrings allow Claude to select the appropriate tool based on the context. This is essential for a more natural conversation flow and optimal results.
184 | 
185 | #### Effective Docstring Structure
186 | 
187 | For optimal results, structure your docstrings like this:
188 | 
189 | ```python
190 | @mcp.tool()
191 | async def your_tool_name(param1: str, param2: int = 5) -> str:
192 |     """
193 |     Short description of what the tool does (1 line).
194 |     More details about the results or output format (1 line).
195 |     Contextual hints about when to use this tool (1 line).
196 |     
197 |     Parameters:
198 |         param1: Description of first parameter
199 |         param2: Description of second parameter with default value
200 |     """
201 |     # Function implementation...
202 | ```
203 | 
204 | With these docstrings, Claude can intelligently choose:
205 | - **keyword_search** when a user asks "What is the definition of X in the document?"
206 | - **semantic_search** when a user asks "Tell me about the concept of X from the document"
207 | - **hybrid_search** when a user asks "What does the document say about X?"
208 | 
209 | This way, even if users don't explicitly name the tool, Claude can select the right one through contextual hints.


--------------------------------------------------------------------------------
/docs/case2.md:
--------------------------------------------------------------------------------
  1 | # Dify External Knowledge API 예제
  2 | 
  3 | [English](#dify-external-knowledge-api-example) | [한국어](#dify-external-knowledge-api-예제-1)
  4 | 
  5 | ## Dify 외부지식
  6 | 
  7 | 이 예제에서는 Dify 외부지식 API 형식과 동일한 문서 검색 도구를 통해 MCP 서버를 제공합니다. 또한 SPRI 월간 AI 보고서를 기반으로 맞춤형 학습 가이드를 생성하는 `프롬프트 템플릿`도 포함되어 있습니다. `Dify에 등록된 외부지식에 직접 요청을 하는 것이 아니기 때문에` case2를 시도하기 위해서는 반드시 로컬에서 `dify_ek_server.py`를 실행시켜주셔야 합니다. 
  8 | 
  9 | Dify에 외부지식을 등록하는 방법이 궁금하신 분들은 [이곳을 클릭해주세요.](https://ballistic-hedgehog-95e.notion.site/How-to-register-External-Knowledge-in-Dify-1bfbeae069358056a878c60c82b4ad0d?pvs=4) -> 아직 익숙하지 않으셔도 괜찮습니다.
 10 | 
 11 | ### 기능
 12 | 
 13 | - **다양한 검색 방법**: 시맨틱 검색, 키워드 검색, 하이브리드 검색을 지원합니다.
 14 | - **검색 결과 포맷팅**: 검색 결과를 가독성 있는 형태로 제공합니다.
 15 | - **AI 트렌드 학습 가이드**: SPRI 월간 AI 보고서를 기반으로 맞춤형 학습 가이드를 생성합니다.
 16 | 
 17 | ### 설정
 18 | 
 19 | 다음 환경 변수를 루트 디렉토리의 `.env` 파일에 설정해야 합니다.
 20 | 
 21 | ```
 22 | DIFY_API_ENDPOINT = http://localhost:8000/retrieval
 23 | DIFY_API_KEY = your-dify-api-key
 24 | DIFY_KNOWLEDGE_ID = your-knowledge-base-id
 25 | ```
 26 | 
 27 | - `DIFY_API_ENDPOINT`: Dify API 엔드포인트 URL
 28 | - `DIFY_API_KEY`: Dify API 키
 29 | - `DIFY_KNOWLEDGE_ID`: 검색할 지식 베이스 ID
 30 | 
 31 | ### 사용 방법
 32 | 
 33 | 1. 환경 설정 확인
 34 |    ```bash
 35 |    # case2 디렉토리로 이동
 36 |    cd case2
 37 |    
 38 |    # 필요한 환경 변수 설정 확인
 39 |    # .env 파일이 올바르게 구성되었는지 확인하세요
 40 |    ```
 41 | 
 42 | 2. Dify 외부지식 로컬서버 실행
 43 |    ```bash
 44 |    # 로컬서버를 실행하기 전에 data 폴더의 pdf 문서를 확인해주세요.
 45 |    python dify_ek_server.py
 46 |    ```
 47 | 
 48 | 3. JSON 파일 생성
 49 |    ```bash
 50 |    # 가상 환경 활성화 (아직 활성화하지 않은 경우)
 51 |    source ../.venv/bin/activate  # macOS/Linux
 52 |    ..\.venv\Scripts\activate      # Windows
 53 |    
 54 |    # JSON 파일 생성
 55 |    python auto_mcp_json.py
 56 |    ```
 57 | 
 58 | 4. Claude Desktop 또는 Cursor에 적용
 59 |    - 생성된 JSON 내용을 복사
 60 |    - Claude Desktop 또는 Cursor의 MCP 설정에 붙여넣기
 61 |    - 설정 저장 및 적용
 62 | 
 63 | ### 사용 예시
 64 | 
 65 | Claude Desktop 또는 Cursor에서 다음과 같이 사용할 수 있습니다.
 66 | 
 67 | #### 1. Dify 외부지식 검색
 68 | ```bash
 69 | # ex
 70 | "외부지식을 사용해서 최근 생성형 AI 기술 동향에 대해 검색해줘."
 71 | ```
 72 | 
 73 | #### 2. AI 트렌드 학습 가이드 생성
 74 | 클로드 데스크탑에서 프롬프트 템플릿을 클릭해주세요.
 75 | ```bash
 76 | # ex
 77 | " Topic: LLM "
 78 | " Learning_level: 초급 "
 79 | " Time_horizon: 중기 "
 80 | ```
 81 | 
 82 | ### 구현 세부사항
 83 | 
 84 | `case2/mcp_server.py` 파일에는 다음과 같은 주요 구성 요소가 포함되어 있습니다:
 85 | 
 86 | 1. 문서 검색 도구
 87 | 2. AI 트렌드 학습 가이드 프롬프트 템플릿
 88 | 3. 도움말 리소스
 89 | 
 90 | ---
 91 | 
 92 | ## Dify External Knowledge API Example
 93 | 
 94 | In this example, we provide an MCP server that follows the same document retrieval format as Dify's External Knowledge API. It also includes a prompt template that generates a customized AI learning guide.
 95 | 
 96 | `Since this does not make direct requests` to the external knowledge registered in Dify, you must run `dify_ek_server.py` locally in order to try Case 2.
 97 | 
 98 | To learn how to register external knowledge in Dify, [click here.](https://ballistic-hedgehog-95e.notion.site/How-to-register-External-Knowledge-in-Dify-1bfbeae069358056a878c60c82b4ad0d)
 99 | -> No worries if you’re not familiar with this yet.
100 | 
101 | ### Features
102 | 
103 | - **Various Search Methods**: Supports semantic search, keyword search, and hybrid search.
104 | - **Search Results Formatting**: Provides search results in a readable format.
105 | - **AI Trends Learning Guide**: Generates customized learning guides based on SPRI monthly AI reports.
106 | 
107 | ### Configuration
108 | 
109 | Please ensure that the following environment variables are configured in the `.env` file at the root directory.
110 | 
111 | ```
112 | DIFY_API_ENDPOINT = http://localhost:8000/retrieval
113 | DIFY_API_KEY = your-dify-api-key
114 | DIFY_KNOWLEDGE_ID = your-knowledge-base-id
115 | ```
116 | 
117 | - `DIFY_API_ENDPOINT`: Dify API endpoint URL
118 | - `DIFY_API_KEY`: Dify API key
119 | - `DIFY_KNOWLEDGE_ID`: Knowledge base ID to search
120 | 
121 | ### Usage Instructions
122 | 
123 | 1. Check environment configuration
124 |    ```bash
125 |    # Navigate to case2 directory
126 |    cd case2
127 |    
128 |    # Check the required environment variables
129 |    # Make sure the .env file is properly configured
130 |    ```
131 | 
132 | 2. Run Dify external knowledge local server 
133 |    ```bash
134 |    # Please check the PDF documents in the data folder before running the local server.
135 |    python dify_ek_server.py
136 |    ```
137 | 
138 | 3. Generate JSON file
139 |    ```bash
140 |    # Activate virtual environment (if not already activated)
141 |    source ../.venv/bin/activate  # macOS/Linux
142 |    ..\.venv\Scripts\activate      # Windows
143 |    
144 |    # Generate JSON file
145 |    python auto_mcp_json.py
146 |    ```
147 | 
148 | 4. Apply to Claude Desktop or Cursor
149 |    - Copy the generated JSON content
150 |    - Paste it into the MCP settings of Claude Desktop or Cursor
151 |    - Save and apply settings
152 | 
153 | ### Usage Examples
154 | 
155 | You can use it in Claude Desktop or Cursor as follows:
156 | 
157 | #### External Knowledge Search with Dify
158 | ```
159 | Use external knowledge to search for recent trends in LLM.
160 | ```
161 | 
162 | #### AI Trends Learning Guide Generation
163 | Click the prompt template.
164 | ```
165 | Topic: LLM  
166 | Learning_level: Beginner  
167 | Time_horizon: Mid-term  
168 | ```
169 | 
170 | ### Implementation Details
171 | 
172 | The `case2/mcp_server.py` file includes the following main components:
173 | 
174 | 1. Document search tool
175 | 2. AI trends learning guide prompt template
176 | 3. Help resource


--------------------------------------------------------------------------------
/docs/case3.md:
--------------------------------------------------------------------------------
  1 | # Dify Workflow 예제
  2 | 
  3 | [English](#dify-workflow-example) | [한국어](#dify-workflow-예제-1)
  4 | 
  5 | ## Dify Workflow 예제
  6 | 
  7 | 이 예제에서는 Dify Workflow API를 사용하여 워크플로우를 실행하고 결과를 가져오는 MCP 서버를 제공합니다. Dify의 워크플로우 기능을 통해 복잡한 AI 작업을 자동화하고 그 결과를 Claude에서 활용할 수 있습니다.
  8 | 
  9 | ### 기능
 10 | 
 11 | - **Dify Workflow 실행**: 사용자 입력을 받아 Dify의 워크플로우를 실행합니다.
 12 | - **결과 반환**: 워크플로우 실행 결과를 Claude에 반환합니다.
 13 | - **에러 처리**: API 요청 및 응답 처리 과정에서 발생할 수 있는 에러를 적절하게 처리합니다.
 14 | 
 15 | ### 설정
 16 | 
 17 | 다음 환경 변수를 루트 디렉토리의 `.env` 파일에 설정해야 합니다.
 18 | 
 19 | ```
 20 | DIFY_BASE_URL = https://api.dify.ai/v1
 21 | DIFY_APP_SK = your-dify-app-sk
 22 | ```
 23 | 
 24 | - `DIFY_BASE_URL`: Dify API의 기본 URL
 25 | - `DIFY_APP_SK`: Dify 애플리케이션 시크릿 키
 26 | 
 27 | ### 사용 방법
 28 | 
 29 | 1. 환경 설정 확인
 30 |    ```bash
 31 |    # case3 디렉토리로 이동
 32 |    cd case3
 33 |    
 34 |    # 필요한 환경 변수 설정 확인
 35 |    # .env 파일이 올바르게 구성되었는지 확인하세요
 36 |    ```
 37 | 
 38 | 2. JSON 파일 생성
 39 |    ```bash
 40 |    # 가상 환경 활성화 (아직 활성화하지 않은 경우)
 41 |    source ../.venv/bin/activate  # macOS/Linux
 42 |    ..\.venv\Scripts\activate      # Windows
 43 |    
 44 |    # JSON 파일 생성
 45 |    python auto_mcp_json.py
 46 |    ```
 47 | 
 48 | 3. Claude Desktop 또는 Cursor에 적용
 49 |    - 생성된 JSON 내용을 복사
 50 |    - Claude Desktop 또는 Cursor의 MCP 설정에 붙여넣기
 51 |    - 설정 저장 및 적용
 52 | 
 53 | ### 사용 예시
 54 | 
 55 | Claude Desktop 또는 Cursor에서 다음과 같이 사용할 수 있습니다:
 56 | 
 57 | ```
 58 | Dify 워크플로우를 실행해서 "인공지능의 윤리적 문제"에 대한 분석을 해줘.
 59 | ```
 60 | 
 61 | 이 요청은 Dify 워크플로우에 "인공지능의 윤리적 문제"라는 입력을 전달하고, 워크플로우가 실행된 후 결과를 Claude에 표시합니다.
 62 | 
 63 | ### 구현 세부사항
 64 | 
 65 | `case3/mcp_server.py` 파일에는 다음과 같은 주요 구성 요소가 포함되어 있습니다.
 66 | 
 67 | 1. 환경 변수 로드
 68 | 2. FastMCP 서버 초기화
 69 | 3. Dify 워크플로우 실행 도구 정의
 70 | 4. HTTP 요청 및 응답 처리
 71 | 5. 에러 처리
 72 | 
 73 | 이 구현은 Dify의 워크플로우 API를 사용하여 사용자 입력을 처리하고 결과를 반환합니다. 워크플로우는 Dify 플랫폼에서 미리 구성되어야 하며, 해당 워크플로우에 접근할 수 있는 권한이 있어야 합니다.
 74 | 
 75 | ---
 76 | 
 77 | ## Dify Workflow Example
 78 | 
 79 | This example provides an MCP server that executes workflows using the Dify Workflow API and retrieves the results. Through Dify's workflow functionality, you can automate complex AI tasks and utilize the results in Claude.
 80 | 
 81 | ### Features
 82 | 
 83 | - **Execute Dify Workflow**: Executes Dify's workflow with user input.
 84 | - **Return Results**: Returns workflow execution results to Claude.
 85 | - **Error Handling**: Properly handles errors that may occur during API request and response processing.
 86 | 
 87 | ### Configuration
 88 | 
 89 | Please ensure that the following environment variables are configured in the `.env` file at the root directory.
 90 | 
 91 | ```
 92 | DIFY_BASE_URL = https://api.dify.ai/v1
 93 | DIFY_APP_SK = your-dify-app-sk
 94 | ```
 95 | 
 96 | - `DIFY_BASE_URL`: Base URL for the Dify API
 97 | - `DIFY_APP_SK`: Secret key for your Dify application
 98 | 
 99 | ### Usage Instructions
100 | 
101 | 1. Check environment configuration
102 |    ```bash
103 |    # Navigate to case3 directory
104 |    cd case3
105 |    
106 |    # Check the required environment variables
107 |    # Make sure the .env file is properly configured
108 |    ```
109 | 
110 | 2. Generate JSON file
111 |    ```bash
112 |    # Activate virtual environment (if not already activated)
113 |    source ../.venv/bin/activate  # macOS/Linux
114 |    ..\.venv\Scripts\activate      # Windows
115 |    
116 |    # Generate JSON file
117 |    python auto_mcp_json.py
118 |    ```
119 | 
120 | 3. Apply to Claude Desktop or Cursor
121 |    - Copy the generated JSON content
122 |    - Paste it into the MCP settings of Claude Desktop or Cursor
123 |    - Save and apply settings
124 | 
125 | ### Usage Examples
126 | 
127 | You can use it in Claude Desktop or Cursor as follows:
128 | 
129 | ```
130 | Execute a Dify workflow to analyze "ethical issues in artificial intelligence".
131 | ```
132 | 
133 | This request passes "ethical issues in artificial intelligence" as input to the Dify workflow, and after the workflow is executed, the results are displayed in Claude.
134 | 
135 | ### Implementation Details
136 | 
137 | The `case3/mcp_server.py` file includes the following main components.
138 | 
139 | 1. Load environment variables
140 | 2. Initialize FastMCP server
141 | 3. Define Dify workflow execution tool
142 | 4. Handle HTTP requests and responses
143 | 5. Error handling
144 | 
145 | This implementation uses Dify's workflow API to process user input and return results. The workflow must be pre-configured on the Dify platform, and you must have permission to access that workflow.


--------------------------------------------------------------------------------
/docs/case4.md:
--------------------------------------------------------------------------------
  1 | # Web Search 예제
  2 | 
  3 | [English](#web-search-example) | [한국어](#web-search-예제-1)
  4 | 
  5 | ## Web Search 예제
  6 | 
  7 | 이 예제에서는 Tavily API를 활용하여 실시간 웹 검색 기능을 제공하는 MCP 서버를 구현합니다. 간단하고 효율적인 웹 검색 기능을 Claude에 제공합니다.
  8 | 
  9 | ### 기능
 10 | 
 11 | - **웹 검색**: 간단한 쿼리로 웹 검색을 수행합니다.
 12 | - **결과 포맷팅**: 제목, URL, 발행 날짜, 내용 요약이 포함된 형식으로 결과를 반환합니다.
 13 | 
 14 | ### 설정
 15 | 
 16 | 다음 환경 변수를 루트 디렉토리의 `.env` 파일에 설정해야 합니다.
 17 | 
 18 | ```
 19 | TAVILY_API_KEY = your-tavily-api-key
 20 | ```
 21 | 
 22 | - `TAVILY_API_KEY`: Tavily API 키 (https://tavily.com/ 에서 발급 가능)
 23 | 
 24 | ### 사용 방법
 25 | 
 26 | 1. 환경 설정 확인
 27 |    ```bash
 28 |    # case4 디렉토리로 이동
 29 |    cd case4
 30 |    
 31 |    # 필요한 환경 변수 설정 확인
 32 |    # .env 파일이 올바르게 구성되었는지 확인하세요
 33 |    ```
 34 | 
 35 | 2. JSON 파일 생성
 36 |    ```bash
 37 |    # 가상 환경 활성화 (아직 활성화하지 않은 경우)
 38 |    source ../.venv/bin/activate  # macOS/Linux
 39 |    ..\.venv\Scripts\activate      # Windows
 40 |    
 41 |    # JSON 파일 생성
 42 |    python auto_mcp_json.py
 43 |    ```
 44 | 
 45 | 3. Claude Desktop 또는 Cursor에 적용
 46 |    - 생성된 JSON 내용을 복사
 47 |    - Claude Desktop 또는 Cursor의 MCP 설정에 붙여넣기
 48 |    - 설정 저장 및 적용
 49 | 
 50 | ### 사용 예시
 51 | 
 52 | Claude Desktop 또는 Cursor에서 다음과 같이 사용할 수 있습니다:
 53 | 
 54 | ```
 55 | 최근 생성형 AI 기술 동향에 대해 검색해줘
 56 | ```
 57 | 
 58 | ```
 59 | 인공지능 윤리에 대한 최신 논의를 검색해서 알려줘
 60 | ```
 61 | 
 62 | ### 구현 세부사항
 63 | 
 64 | `case4/mcp_server.py` 파일에는 다음과 같은 주요 구성 요소가 포함되어 있습니다:
 65 | 
 66 | 1. Tavily API 연결 설정
 67 | 2. 웹 검색 설정 구성
 68 | 3. 검색 결과 포맷팅 함수
 69 | 4. 웹 검색 도구
 70 | 5. 도움말 리소스
 71 | 
 72 | 이 구현은 Tavily API를 사용하여 검색 결과를 가져오고, 내용 요약을 자동으로 생성합니다. 검색 결과는 마크다운 형식으로 포맷팅되어 Claude에 표시됩니다.
 73 | 
 74 | ---
 75 | 
 76 | ## Web Search Example
 77 | 
 78 | This example implements an MCP server that provides real-time web search functionality using the Tavily API. It offers simple and efficient web search capabilities to Claude.
 79 | 
 80 | ### Features
 81 | 
 82 | - **Web Search**: Performs web searches with simple queries.
 83 | - **Result Formatting**: Returns results in a format including titles, URLs, publication dates, and content summaries.
 84 | 
 85 | ### Configuration
 86 | 
 87 | Please ensure that the following environment variables are configured in the `.env` file at the root directory.
 88 | 
 89 | ```
 90 | TAVILY_API_KEY=your-tavily-api-key
 91 | ```
 92 | 
 93 | - `TAVILY_API_KEY`: Tavily API key (can be obtained from https://tavily.com/)
 94 | 
 95 | ### Usage Instructions
 96 | 
 97 | 1. Check environment configuration
 98 |    ```bash
 99 |    # Navigate to case4 directory
100 |    cd case4
101 |    
102 |    # Check the required environment variables
103 |    # Make sure the .env file is properly configured
104 |    ```
105 | 
106 | 2. Generate JSON file
107 |    ```bash
108 |    # Activate virtual environment (if not already activated)
109 |    source ../.venv/bin/activate  # macOS/Linux
110 |    ..\.venv\Scripts\activate      # Windows
111 |    
112 |    # Generate JSON file
113 |    python auto_mcp_json.py
114 |    ```
115 | 
116 | 3. Apply to Claude Desktop or Cursor
117 |    - Copy the generated JSON content
118 |    - Paste it into the MCP settings of Claude Desktop or Cursor
119 |    - Save and apply settings
120 | 
121 | ### Usage Examples
122 | 
123 | You can use it in Claude Desktop or Cursor as follows:
124 | 
125 | ```
126 | Search for recent trends in generative AI technology
127 | ```
128 | 
129 | ```
130 | Find the latest discussions on AI ethics and provide the results
131 | ```
132 | 
133 | ### Implementation Details
134 | 
135 | The `case4/mcp_server.py` file includes the following main components:
136 | 
137 | 1. Tavily API connection setup
138 | 2. Web search configuration
139 | 3. Search results formatting function
140 | 4. Web search tool
141 | 5. Help resource
142 | 
143 | This implementation uses the Tavily API to fetch search results and automatically generate content summaries. The search results are formatted in markdown format and displayed in Claude.


--------------------------------------------------------------------------------
/docs/installation.md:
--------------------------------------------------------------------------------
  1 | # 설치 가이드 (Installation Guide)
  2 | 
  3 | [English](#installation-guide) | [한국어](#설치-가이드-1)
  4 | 
  5 | ## 설치 가이드
  6 | 
  7 | 이 문서에서는 Quick-start Auto MCP 프로젝트의 설치 방법에 대해 상세히 설명합니다.
  8 | 
  9 | ### 시스템 요구사항
 10 | 
 11 | - Python 3.11 이상
 12 | - Claude Desktop 또는 Cursor (MCP 지원 버전)
 13 | - Git (선택 사항)
 14 | 
 15 | ### 설치 단계
 16 | 
 17 | #### 1. 저장소 복제
 18 | 
 19 | Git을 사용하여 저장소를 복제합니다.
 20 | 
 21 | ```bash
 22 | git clone https://github.com/teddynote-lab/mcp.git
 23 | cd mcp
 24 | ```
 25 | 
 26 | 또는 GitHub에서 ZIP 파일로 다운로드하여 압축을 풀 수도 있습니다.
 27 | 
 28 | #### 2. 가상 환경 설정
 29 | 
 30 | ##### uv 사용 (권장)
 31 | 
 32 | [uv](https://github.com/astral-sh/uv)는 더 빠른 파이썬 패키지 설치 및 환경 관리 도구입니다. 아직 설치하지 않았다면 먼저 설치해 주세요.
 33 | 
 34 | ```bash
 35 | # uv 설치 (macOS/Linux)
 36 | curl -LsSf https://astral.sh/uv/install.sh | sh
 37 | 
 38 | # uv 설치 (Windows PowerShell)
 39 | powershell -c "irm https://astral.sh/uv/install.ps1 | iex"
 40 | ```
 41 | 
 42 | uv를 사용하여 가상 환경을 설정하고 패키지를 설치합니다.
 43 | 
 44 | ```bash
 45 | uv venv
 46 | uv pip install -r requirements.txt
 47 | ```
 48 | 
 49 | ##### pip 사용
 50 | 
 51 | 기존 Python 도구를 사용하여 가상 환경을 설정할 수도 있습니다.
 52 | 
 53 | ```bash
 54 | # 가상 환경 생성
 55 | python -m venv .venv
 56 | 
 57 | # 가상 환경 활성화 (Windows)
 58 | .venv\Scripts\activate
 59 | 
 60 | # 가상 환경 활성화 (macOS/Linux)
 61 | source .venv/bin/activate
 62 | 
 63 | # 패키지 설치
 64 | pip install -r requirements.txt
 65 | ```
 66 | 
 67 | #### 3. 환경 변수 설정
 68 | 
 69 | 루트 디렉토리의 `.env.example`에 필요한 환경 변수를 설정하고 파일명을 `.env`로 바꿔주세요.
 70 | 
 71 | ##### example1 (RAG)
 72 | ```
 73 | OPENAI_API_KEY = "your-openai-api-key"
 74 | ```
 75 | 
 76 | ##### example2 (Dify External Knowledge API)
 77 | ```
 78 | DIFY_API_ENDPOINT = http://localhost:8000/retrieval
 79 | DIFY_API_KEY = your-dify-api-key
 80 | DIFY_KNOWLEDGE_ID = your-knowledge-base-id
 81 | ```
 82 | 
 83 | ##### example3 (Dify Workflow)
 84 | ```
 85 | DIFY_BASE_URL = https://api.dify.ai/v1
 86 | DIFY_APP_SK = your-dify-app-sk
 87 | ```
 88 | 
 89 | ##### example4 (Web Search)
 90 | ```
 91 | EXA_API_KEY = your-exa-api-key
 92 | ```
 93 | 
 94 | ### 설치 확인
 95 | 
 96 | 설치가 올바르게 이루어졌는지 확인하기 위해 다음 명령을 실행할 수 있습니다.
 97 | 
 98 | ```bash
 99 | # 가상 환경이 활성화된 상태에서
100 | cd example1
101 | python auto_mcp_json.py
102 | ```
103 | 
104 | 성공적으로 JSON 파일이 생성되면 설치가 완료된 것입니다.
105 | 
106 | ---
107 | 
108 | ## Installation Guide
109 | 
110 | This document provides detailed instructions for installing the Quick-start Auto MCP project.
111 | 
112 | ### System Requirements
113 | 
114 | - Python 3.11 or higher
115 | - Claude Desktop or Cursor (MCP supporting version)
116 | - Git (optional)
117 | 
118 | ### Installation Steps
119 | 
120 | #### 1. Clone the Repository
121 | 
122 | Clone the repository using Git:
123 | 
124 | ```bash
125 | git clone https://github.com/teddynote-lab/mcp.git
126 | cd mcp
127 | ```
128 | 
129 | Alternatively, you can download and extract the ZIP file from GitHub.
130 | 
131 | #### 2. Set Up Virtual Environment
132 | 
133 | ##### Using uv (recommended)
134 | 
135 | [uv](https://github.com/astral-sh/uv) is a faster Python package installer and environment manager. If you haven't installed it yet, install it first.
136 | 
137 | ```bash
138 | # Install uv (macOS/Linux)
139 | curl -LsSf https://astral.sh/uv/install.sh | sh
140 | 
141 | # Install uv (Windows PowerShell)
142 | powershell -c "irm https://astral.sh/uv/install.ps1 | iex"
143 | ```
144 | 
145 | Set up a virtual environment and install packages using uv.
146 | 
147 | ```bash
148 | uv venv
149 | uv pip install -r requirements.txt
150 | ```
151 | 
152 | ##### Using pip
153 | 
154 | You can also set up a virtual environment using traditional Python tools.
155 | 
156 | ```bash
157 | # Create virtual environment
158 | python -m venv .venv
159 | 
160 | # Activate virtual environment (Windows)
161 | .venv\Scripts\activate
162 | 
163 | # Activate virtual environment (macOS/Linux)
164 | source .venv/bin/activate
165 | 
166 | # Install packages
167 | pip install -r requirements.txt
168 | ```
169 | 
170 | #### 3. Set Environment Variables for Each Example
171 | 
172 | Set the necessary environment variables in the .env.example file found in the root directory, then rename the file to .env.
173 | 
174 | ##### example1 (RAG)
175 | ```
176 | OPENAI_API_KEY = "your-openai-api-key"
177 | ```
178 | 
179 | ##### example2 (Dify External Knowledge API)
180 | ```
181 | DIFY_API_ENDPOINT = http://localhost:8000/retrieval
182 | DIFY_API_KEY = your-dify-api-key
183 | DIFY_KNOWLEDGE_ID = your-knowledge-base-id
184 | ```
185 | 
186 | ##### example3 (Dify Workflow)
187 | ```
188 | DIFY_BASE_URL = https://api.dify.ai/v1
189 | DIFY_APP_SK = your-dify-app-sk
190 | ```
191 | 
192 | ##### example4 (Web Search)
193 | ```
194 | EXA_API_KEY = your-exa-api-key
195 | ```
196 | 
197 | ### Verify Installation
198 | 
199 | To verify that the installation has been completed successfully, you can run the following command:
200 | 
201 | ```bash
202 | # With the virtual environment activated
203 | cd example1
204 | python auto_mcp_json.py
205 | ```
206 | 
207 | If a JSON file is successfully generated, the installation is complete.


--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
1 | [project]
2 | name = "mcp"
3 | version = "0.1.0"
4 | description = "Add your description here"
5 | readme = "README.md"
6 | requires-python = ">=3.11"
7 | dependencies = []
8 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
  1 | aiohappyeyeballs==2.6.1
  2 | aiohttp==3.11.14
  3 | aiosignal==1.3.2
  4 | annotated-types==0.7.0
  5 | antlr4-python3-runtime==4.9.3
  6 | anyio==4.9.0
  7 | asgiref==3.8.1
  8 | attrs==25.3.0
  9 | backoff==2.2.1
 10 | bcrypt==4.3.0
 11 | build==1.2.2.post1
 12 | cachetools==5.5.2
 13 | certifi==2025.1.31
 14 | cffi==1.17.1
 15 | charset-normalizer==3.4.1
 16 | chroma-hnswlib==0.7.6
 17 | chromadb==0.6.3
 18 | click==8.1.8
 19 | coloredlogs==15.0.1
 20 | cryptography==44.0.2
 21 | dataclasses-json==0.6.7
 22 | deprecated==1.2.18
 23 | distro==1.9.0
 24 | durationpy==0.9
 25 | fastapi==0.115.11
 26 | filelock==3.18.0
 27 | flatbuffers==25.2.10
 28 | frozenlist==1.5.0
 29 | fsspec==2025.3.0
 30 | google-auth==2.38.0
 31 | googleapis-common-protos==1.69.1
 32 | grpcio==1.71.0
 33 | h11==0.14.0
 34 | httpcore==1.0.7
 35 | httptools==0.6.4
 36 | httpx==0.28.1
 37 | httpx-sse==0.4.0
 38 | huggingface-hub==0.29.3
 39 | humanfriendly==10.0
 40 | idna==3.10
 41 | importlib-metadata==8.6.1
 42 | importlib-resources==6.5.2
 43 | jiter==0.9.0
 44 | jsonpatch==1.33
 45 | jsonpointer==3.0.0
 46 | kubernetes==32.0.1
 47 | langchain==0.3.20
 48 | langchain-chroma==0.2.2
 49 | langchain-community==0.3.19
 50 | langchain-core==0.3.45
 51 | langchain-openai==0.3.8
 52 | langchain-text-splitters==0.3.6
 53 | langgraph==0.3.11
 54 | langgraph-checkpoint==2.0.20
 55 | langgraph-prebuilt==0.1.3
 56 | langgraph-sdk==0.1.57
 57 | langsmith==0.3.15
 58 | markdown-it-py==3.0.0
 59 | marshmallow==3.26.1
 60 | mcp==1.4.1
 61 | mdurl==0.1.2
 62 | mmh3==5.1.0
 63 | monotonic==1.6
 64 | mpmath==1.3.0
 65 | msgpack==1.1.0
 66 | multidict==6.1.0
 67 | mypy-extensions==1.0.0
 68 | numpy==1.26.4
 69 | oauthlib==3.2.2
 70 | omegaconf==2.3.0
 71 | onnxruntime==1.21.0
 72 | openai==1.66.3
 73 | opentelemetry-api==1.31.0
 74 | opentelemetry-exporter-otlp-proto-common==1.31.0
 75 | opentelemetry-exporter-otlp-proto-grpc==1.31.0
 76 | opentelemetry-instrumentation==0.52b0
 77 | opentelemetry-instrumentation-asgi==0.52b0
 78 | opentelemetry-instrumentation-fastapi==0.52b0
 79 | opentelemetry-proto==1.31.0
 80 | opentelemetry-sdk==1.31.0
 81 | opentelemetry-semantic-conventions==0.52b0
 82 | opentelemetry-util-http==0.52b0
 83 | orjson==3.10.15
 84 | overrides==7.7.0
 85 | packaging==24.2
 86 | pdfminer-six==20231228
 87 | pdfplumber==0.11.5
 88 | pillow==11.1.0
 89 | posthog==3.20.0
 90 | propcache==0.3.0
 91 | protobuf==5.29.3
 92 | pyasn1==0.6.1
 93 | pyasn1-modules==0.4.1
 94 | pycparser==2.22
 95 | pydantic==2.10.6
 96 | pydantic-core==2.27.2
 97 | pydantic-settings==2.8.1
 98 | pygments==2.19.1
 99 | pypdf==5.4.0
100 | pypdfium2==4.30.1
101 | pypika==0.48.9
102 | pyproject-hooks==1.2.0
103 | python-dateutil==2.9.0.post0
104 | python-dotenv==1.0.1
105 | pyyaml==6.0.2
106 | rank-bm25==0.2.2
107 | regex==2024.11.6
108 | requests==2.32.3
109 | requests-oauthlib==2.0.0
110 | requests-toolbelt==1.0.0
111 | rich==13.9.4
112 | rsa==4.9
113 | shellingham==1.5.4
114 | six==1.17.0
115 | sniffio==1.3.1
116 | sqlalchemy==2.0.39
117 | sse-starlette==2.2.1
118 | starlette==0.46.1
119 | sympy==1.13.3
120 | tavily-python==0.5.1
121 | tenacity==9.0.0
122 | tiktoken==0.9.0
123 | tokenizers==0.21.1
124 | tqdm==4.67.1
125 | typer==0.15.2
126 | typing-extensions==4.12.2
127 | typing-inspect==0.9.0
128 | urllib3==2.3.0
129 | uvicorn==0.34.0
130 | uvloop==0.21.0
131 | watchfiles==1.0.4
132 | websocket-client==1.8.0
133 | websockets==15.0.1
134 | wrapt==1.17.2
135 | yarl==1.18.3
136 | zipp==3.21.0
137 | zstandard==0.23.0
138 | 


--------------------------------------------------------------------------------
/requirements_windows.txt:
--------------------------------------------------------------------------------
  1 | aiohappyeyeballs==2.6.1
  2 | aiohttp==3.11.14
  3 | aiosignal==1.3.2
  4 | annotated-types==0.7.0
  5 | antlr4-python3-runtime==4.9.3
  6 | anyio==4.9.0
  7 | asgiref==3.8.1
  8 | attrs==25.3.0
  9 | backoff==2.2.1
 10 | bcrypt==4.3.0
 11 | build==1.2.2.post1
 12 | cachetools==5.5.2
 13 | certifi==2025.1.31
 14 | cffi==1.17.1
 15 | charset-normalizer==3.4.1
 16 | chroma-hnswlib==0.7.6
 17 | chromadb==0.6.3
 18 | click==8.1.8
 19 | coloredlogs==15.0.1
 20 | cryptography==44.0.2
 21 | dataclasses-json==0.6.7
 22 | deprecated==1.2.18
 23 | distro==1.9.0
 24 | durationpy==0.9
 25 | fastapi==0.115.11
 26 | filelock==3.18.0
 27 | flatbuffers==25.2.10
 28 | frozenlist==1.5.0
 29 | fsspec==2025.3.0
 30 | google-auth==2.38.0
 31 | googleapis-common-protos==1.69.1
 32 | grpcio==1.71.0
 33 | h11==0.14.0
 34 | httpcore==1.0.7
 35 | httptools==0.6.4
 36 | httpx==0.28.1
 37 | httpx-sse==0.4.0
 38 | huggingface-hub==0.29.3
 39 | humanfriendly==10.0
 40 | idna==3.10
 41 | importlib-metadata==8.6.1
 42 | importlib-resources==6.5.2
 43 | jiter==0.9.0
 44 | jsonpatch==1.33
 45 | jsonpointer==3.0.0
 46 | kubernetes==32.0.1
 47 | langchain==0.3.20
 48 | langchain-chroma==0.2.2
 49 | langchain-community==0.3.19
 50 | langchain-core==0.3.45
 51 | langchain-openai==0.3.8
 52 | langchain-text-splitters==0.3.6
 53 | langgraph==0.3.11
 54 | langgraph-checkpoint==2.0.20
 55 | langgraph-prebuilt==0.1.3
 56 | langgraph-sdk==0.1.57
 57 | langsmith==0.3.15
 58 | markdown-it-py==3.0.0
 59 | marshmallow==3.26.1
 60 | mcp==1.4.1
 61 | mdurl==0.1.2
 62 | mmh3==5.1.0
 63 | monotonic==1.6
 64 | mpmath==1.3.0
 65 | msgpack==1.1.0
 66 | multidict==6.1.0
 67 | mypy-extensions==1.0.0
 68 | numpy==1.26.4
 69 | oauthlib==3.2.2
 70 | omegaconf==2.3.0
 71 | onnxruntime==1.21.0
 72 | openai==1.66.3
 73 | opentelemetry-api==1.31.0
 74 | opentelemetry-exporter-otlp-proto-common==1.31.0
 75 | opentelemetry-exporter-otlp-proto-grpc==1.31.0
 76 | opentelemetry-instrumentation==0.52b0
 77 | opentelemetry-instrumentation-asgi==0.52b0
 78 | opentelemetry-instrumentation-fastapi==0.52b0
 79 | opentelemetry-proto==1.31.0
 80 | opentelemetry-sdk==1.31.0
 81 | opentelemetry-semantic-conventions==0.52b0
 82 | opentelemetry-util-http==0.52b0
 83 | orjson==3.10.15
 84 | overrides==7.7.0
 85 | packaging==24.2
 86 | pdfminer-six==20231228
 87 | pdfplumber==0.11.5
 88 | pillow==11.1.0
 89 | posthog==3.20.0
 90 | propcache==0.3.0
 91 | protobuf==5.29.3
 92 | pyasn1==0.6.1
 93 | pyasn1-modules==0.4.1
 94 | pycparser==2.22
 95 | pydantic==2.10.6
 96 | pydantic-core==2.27.2
 97 | pydantic-settings==2.8.1
 98 | pygments==2.19.1
 99 | pypdf==5.4.0
100 | pypdfium2==4.30.1
101 | pypika==0.48.9
102 | pyproject-hooks==1.2.0
103 | python-dateutil==2.9.0.post0
104 | python-dotenv==1.0.1
105 | pyyaml==6.0.2
106 | rank-bm25==0.2.2
107 | regex==2024.11.6
108 | requests==2.32.3
109 | requests-oauthlib==2.0.0
110 | requests-toolbelt==1.0.0
111 | rich==13.9.4
112 | rsa==4.9
113 | shellingham==1.5.4
114 | six==1.17.0
115 | sniffio==1.3.1
116 | sqlalchemy==2.0.39
117 | sse-starlette==2.2.1
118 | starlette==0.46.1
119 | sympy==1.13.3
120 | tavily-python==0.5.1
121 | tenacity==9.0.0
122 | tiktoken==0.9.0
123 | tokenizers==0.21.1
124 | tqdm==4.67.1
125 | typer==0.15.2
126 | typing-extensions==4.12.2
127 | typing-inspect==0.9.0
128 | urllib3==2.3.0
129 | uvicorn==0.34.0
130 | winloop==0.1.8
131 | watchfiles==1.0.4
132 | websocket-client==1.8.0
133 | websockets==15.0.1
134 | wrapt==1.17.2
135 | yarl==1.18.3
136 | zipp==3.21.0
137 | zstandard==0.23.0
138 | 


--------------------------------------------------------------------------------
/uv.lock:
--------------------------------------------------------------------------------
  1 | aiohappyeyeballs==2.6.1
  2 |     # via
  3 |     #   -r requirements.txt
  4 |     #   aiohttp
  5 | aiohttp==3.11.14
  6 |     # via
  7 |     #   -r requirements.txt
  8 |     #   langchain-community
  9 | aiosignal==1.3.2
 10 |     # via
 11 |     #   -r requirements.txt
 12 |     #   aiohttp
 13 | annotated-types==0.7.0
 14 |     # via
 15 |     #   -r requirements.txt
 16 |     #   pydantic
 17 | antlr4-python3-runtime==4.9.3
 18 |     # via
 19 |     #   -r requirements.txt
 20 |     #   omegaconf
 21 | anyio==4.9.0
 22 |     # via
 23 |     #   -r requirements.txt
 24 |     #   httpx
 25 |     #   mcp
 26 |     #   openai
 27 |     #   sse-starlette
 28 |     #   starlette
 29 |     #   watchfiles
 30 | asgiref==3.8.1
 31 |     # via
 32 |     #   -r requirements.txt
 33 |     #   opentelemetry-instrumentation-asgi
 34 | attrs==25.3.0
 35 |     # via
 36 |     #   -r requirements.txt
 37 |     #   aiohttp
 38 | backoff==2.2.1
 39 |     # via
 40 |     #   -r requirements.txt
 41 |     #   posthog
 42 | bcrypt==4.3.0
 43 |     # via
 44 |     #   -r requirements.txt
 45 |     #   chromadb
 46 | build==1.2.2.post1
 47 |     # via
 48 |     #   -r requirements.txt
 49 |     #   chromadb
 50 | cachetools==5.5.2
 51 |     # via
 52 |     #   -r requirements.txt
 53 |     #   google-auth
 54 | certifi==2025.1.31
 55 |     # via
 56 |     #   -r requirements.txt
 57 |     #   httpcore
 58 |     #   httpx
 59 |     #   kubernetes
 60 |     #   requests
 61 | cffi==1.17.1
 62 |     # via
 63 |     #   -r requirements.txt
 64 |     #   cryptography
 65 | charset-normalizer==3.4.1
 66 |     # via
 67 |     #   -r requirements.txt
 68 |     #   pdfminer-six
 69 |     #   requests
 70 | chroma-hnswlib==0.7.6
 71 |     # via
 72 |     #   -r requirements.txt
 73 |     #   chromadb
 74 | chromadb==0.6.3
 75 |     # via
 76 |     #   -r requirements.txt
 77 |     #   langchain-chroma
 78 | click==8.1.8
 79 |     # via
 80 |     #   -r requirements.txt
 81 |     #   typer
 82 |     #   uvicorn
 83 | coloredlogs==15.0.1
 84 |     # via
 85 |     #   -r requirements.txt
 86 |     #   onnxruntime
 87 | cryptography==44.0.2
 88 |     # via
 89 |     #   -r requirements.txt
 90 |     #   pdfminer-six
 91 | dataclasses-json==0.6.7
 92 |     # via
 93 |     #   -r requirements.txt
 94 |     #   langchain-community
 95 | deprecated==1.2.18
 96 |     # via
 97 |     #   -r requirements.txt
 98 |     #   opentelemetry-api
 99 |     #   opentelemetry-exporter-otlp-proto-grpc
100 |     #   opentelemetry-semantic-conventions
101 | distro==1.9.0
102 |     # via
103 |     #   -r requirements.txt
104 |     #   openai
105 |     #   posthog
106 | durationpy==0.9
107 |     # via
108 |     #   -r requirements.txt
109 |     #   kubernetes
110 | fastapi==0.115.11
111 |     # via
112 |     #   -r requirements.txt
113 |     #   chromadb
114 | filelock==3.18.0
115 |     # via
116 |     #   -r requirements.txt
117 |     #   huggingface-hub
118 | flatbuffers==25.2.10
119 |     # via
120 |     #   -r requirements.txt
121 |     #   onnxruntime
122 | frozenlist==1.5.0
123 |     # via
124 |     #   -r requirements.txt
125 |     #   aiohttp
126 |     #   aiosignal
127 | fsspec==2025.3.0
128 |     # via
129 |     #   -r requirements.txt
130 |     #   huggingface-hub
131 | google-auth==2.38.0
132 |     # via
133 |     #   -r requirements.txt
134 |     #   kubernetes
135 | googleapis-common-protos==1.69.1
136 |     # via
137 |     #   -r requirements.txt
138 |     #   opentelemetry-exporter-otlp-proto-grpc
139 | grpcio==1.71.0
140 |     # via
141 |     #   -r requirements.txt
142 |     #   chromadb
143 |     #   opentelemetry-exporter-otlp-proto-grpc
144 | h11==0.14.0
145 |     # via
146 |     #   -r requirements.txt
147 |     #   httpcore
148 |     #   uvicorn
149 | httpcore==1.0.7
150 |     # via
151 |     #   -r requirements.txt
152 |     #   httpx
153 | httptools==0.6.4
154 |     # via
155 |     #   -r requirements.txt
156 |     #   uvicorn
157 | httpx==0.28.1
158 |     # via
159 |     #   -r requirements.txt
160 |     #   chromadb
161 |     #   langgraph-sdk
162 |     #   langsmith
163 |     #   mcp
164 |     #   openai
165 |     #   tavily-python
166 | httpx-sse==0.4.0
167 |     # via
168 |     #   -r requirements.txt
169 |     #   langchain-community
170 |     #   mcp
171 | huggingface-hub==0.29.3
172 |     # via
173 |     #   -r requirements.txt
174 |     #   tokenizers
175 | humanfriendly==10.0
176 |     # via
177 |     #   -r requirements.txt
178 |     #   coloredlogs
179 | idna==3.10
180 |     # via
181 |     #   -r requirements.txt
182 |     #   anyio
183 |     #   httpx
184 |     #   requests
185 |     #   yarl
186 | importlib-metadata==8.6.1
187 |     # via
188 |     #   -r requirements.txt
189 |     #   opentelemetry-api
190 | importlib-resources==6.5.2
191 |     # via
192 |     #   -r requirements.txt
193 |     #   chromadb
194 | jiter==0.9.0
195 |     # via
196 |     #   -r requirements.txt
197 |     #   openai
198 | jsonpatch==1.33
199 |     # via
200 |     #   -r requirements.txt
201 |     #   langchain-core
202 | jsonpointer==3.0.0
203 |     # via
204 |     #   -r requirements.txt
205 |     #   jsonpatch
206 | kubernetes==32.0.1
207 |     # via
208 |     #   -r requirements.txt
209 |     #   chromadb
210 | langchain==0.3.20
211 |     # via
212 |     #   -r requirements.txt
213 |     #   langchain-community
214 | langchain-chroma==0.2.2
215 |     # via -r requirements.txt
216 | langchain-community==0.3.19
217 |     # via -r requirements.txt
218 | langchain-core==0.3.45
219 |     # via
220 |     #   -r requirements.txt
221 |     #   langchain
222 |     #   langchain-chroma
223 |     #   langchain-community
224 |     #   langchain-openai
225 |     #   langchain-text-splitters
226 |     #   langgraph
227 |     #   langgraph-checkpoint
228 |     #   langgraph-prebuilt
229 | langchain-openai==0.3.8
230 |     # via -r requirements.txt
231 | langchain-text-splitters==0.3.6
232 |     # via
233 |     #   -r requirements.txt
234 |     #   langchain
235 | langgraph==0.3.11
236 |     # via -r requirements.txt
237 | langgraph-checkpoint==2.0.20
238 |     # via
239 |     #   -r requirements.txt
240 |     #   langgraph
241 |     #   langgraph-prebuilt
242 | langgraph-prebuilt==0.1.3
243 |     # via
244 |     #   -r requirements.txt
245 |     #   langgraph
246 | langgraph-sdk==0.1.57
247 |     # via
248 |     #   -r requirements.txt
249 |     #   langgraph
250 | langsmith==0.3.15
251 |     # via
252 |     #   -r requirements.txt
253 |     #   langchain
254 |     #   langchain-community
255 |     #   langchain-core
256 | markdown-it-py==3.0.0
257 |     # via
258 |     #   -r requirements.txt
259 |     #   rich
260 | marshmallow==3.26.1
261 |     # via
262 |     #   -r requirements.txt
263 |     #   dataclasses-json
264 | mcp==1.4.1
265 |     # via -r requirements.txt
266 | mdurl==0.1.2
267 |     # via
268 |     #   -r requirements.txt
269 |     #   markdown-it-py
270 | mmh3==5.1.0
271 |     # via
272 |     #   -r requirements.txt
273 |     #   chromadb
274 | monotonic==1.6
275 |     # via
276 |     #   -r requirements.txt
277 |     #   posthog
278 | mpmath==1.3.0
279 |     # via
280 |     #   -r requirements.txt
281 |     #   sympy
282 | msgpack==1.1.0
283 |     # via
284 |     #   -r requirements.txt
285 |     #   langgraph-checkpoint
286 | multidict==6.1.0
287 |     # via
288 |     #   -r requirements.txt
289 |     #   aiohttp
290 |     #   yarl
291 | mypy-extensions==1.0.0
292 |     # via
293 |     #   -r requirements.txt
294 |     #   typing-inspect
295 | numpy==1.26.4
296 |     # via
297 |     #   -r requirements.txt
298 |     #   chroma-hnswlib
299 |     #   chromadb
300 |     #   langchain-chroma
301 |     #   langchain-community
302 |     #   onnxruntime
303 |     #   rank-bm25
304 | oauthlib==3.2.2
305 |     # via
306 |     #   -r requirements.txt
307 |     #   kubernetes
308 |     #   requests-oauthlib
309 | omegaconf==2.3.0
310 |     # via -r requirements.txt
311 | onnxruntime==1.21.0
312 |     # via
313 |     #   -r requirements.txt
314 |     #   chromadb
315 | openai==1.66.3
316 |     # via
317 |     #   -r requirements.txt
318 |     #   langchain-openai
319 | opentelemetry-api==1.31.0
320 |     # via
321 |     #   -r requirements.txt
322 |     #   chromadb
323 |     #   opentelemetry-exporter-otlp-proto-grpc
324 |     #   opentelemetry-instrumentation
325 |     #   opentelemetry-instrumentation-asgi
326 |     #   opentelemetry-instrumentation-fastapi
327 |     #   opentelemetry-sdk
328 |     #   opentelemetry-semantic-conventions
329 | opentelemetry-exporter-otlp-proto-common==1.31.0
330 |     # via
331 |     #   -r requirements.txt
332 |     #   opentelemetry-exporter-otlp-proto-grpc
333 | opentelemetry-exporter-otlp-proto-grpc==1.31.0
334 |     # via
335 |     #   -r requirements.txt
336 |     #   chromadb
337 | opentelemetry-instrumentation==0.52b0
338 |     # via
339 |     #   -r requirements.txt
340 |     #   opentelemetry-instrumentation-asgi
341 |     #   opentelemetry-instrumentation-fastapi
342 | opentelemetry-instrumentation-asgi==0.52b0
343 |     # via
344 |     #   -r requirements.txt
345 |     #   opentelemetry-instrumentation-fastapi
346 | opentelemetry-instrumentation-fastapi==0.52b0
347 |     # via
348 |     #   -r requirements.txt
349 |     #   chromadb
350 | opentelemetry-proto==1.31.0
351 |     # via
352 |     #   -r requirements.txt
353 |     #   opentelemetry-exporter-otlp-proto-common
354 |     #   opentelemetry-exporter-otlp-proto-grpc
355 | opentelemetry-sdk==1.31.0
356 |     # via
357 |     #   -r requirements.txt
358 |     #   chromadb
359 |     #   opentelemetry-exporter-otlp-proto-grpc
360 | opentelemetry-semantic-conventions==0.52b0
361 |     # via
362 |     #   -r requirements.txt
363 |     #   opentelemetry-instrumentation
364 |     #   opentelemetry-instrumentation-asgi
365 |     #   opentelemetry-instrumentation-fastapi
366 |     #   opentelemetry-sdk
367 | opentelemetry-util-http==0.52b0
368 |     # via
369 |     #   -r requirements.txt
370 |     #   opentelemetry-instrumentation-asgi
371 |     #   opentelemetry-instrumentation-fastapi
372 | orjson==3.10.15
373 |     # via
374 |     #   -r requirements.txt
375 |     #   chromadb
376 |     #   langgraph-sdk
377 |     #   langsmith
378 | overrides==7.7.0
379 |     # via
380 |     #   -r requirements.txt
381 |     #   chromadb
382 | packaging==24.2
383 |     # via
384 |     #   -r requirements.txt
385 |     #   build
386 |     #   huggingface-hub
387 |     #   langchain-core
388 |     #   langsmith
389 |     #   marshmallow
390 |     #   onnxruntime
391 |     #   opentelemetry-instrumentation
392 | pdfminer-six==20231228
393 |     # via
394 |     #   -r requirements.txt
395 |     #   pdfplumber
396 | pdfplumber==0.11.5
397 |     # via -r requirements.txt
398 | pillow==11.1.0
399 |     # via
400 |     #   -r requirements.txt
401 |     #   pdfplumber
402 | posthog==3.20.0
403 |     # via
404 |     #   -r requirements.txt
405 |     #   chromadb
406 | propcache==0.3.0
407 |     # via
408 |     #   -r requirements.txt
409 |     #   aiohttp
410 |     #   yarl
411 | protobuf==5.29.3
412 |     # via
413 |     #   -r requirements.txt
414 |     #   googleapis-common-protos
415 |     #   onnxruntime
416 |     #   opentelemetry-proto
417 | pyasn1==0.6.1
418 |     # via
419 |     #   -r requirements.txt
420 |     #   pyasn1-modules
421 |     #   rsa
422 | pyasn1-modules==0.4.1
423 |     # via
424 |     #   -r requirements.txt
425 |     #   google-auth
426 | pycparser==2.22
427 |     # via
428 |     #   -r requirements.txt
429 |     #   cffi
430 | pydantic==2.10.6
431 |     # via
432 |     #   -r requirements.txt
433 |     #   chromadb
434 |     #   fastapi
435 |     #   langchain
436 |     #   langchain-core
437 |     #   langsmith
438 |     #   mcp
439 |     #   openai
440 |     #   pydantic-settings
441 | pydantic-core==2.27.2
442 |     # via
443 |     #   -r requirements.txt
444 |     #   pydantic
445 | pydantic-settings==2.8.1
446 |     # via
447 |     #   -r requirements.txt
448 |     #   langchain-community
449 |     #   mcp
450 | pygments==2.19.1
451 |     # via
452 |     #   -r requirements.txt
453 |     #   rich
454 | pypdf==5.4.0
455 |     # via -r requirements.txt
456 | pypdfium2==4.30.1
457 |     # via
458 |     #   -r requirements.txt
459 |     #   pdfplumber
460 | pypika==0.48.9
461 |     # via
462 |     #   -r requirements.txt
463 |     #   chromadb
464 | pyproject-hooks==1.2.0
465 |     # via
466 |     #   -r requirements.txt
467 |     #   build
468 | python-dateutil==2.9.0.post0
469 |     # via
470 |     #   -r requirements.txt
471 |     #   kubernetes
472 |     #   posthog
473 | python-dotenv==1.0.1
474 |     # via
475 |     #   -r requirements.txt
476 |     #   pydantic-settings
477 |     #   uvicorn
478 | pyyaml==6.0.2
479 |     # via
480 |     #   -r requirements.txt
481 |     #   chromadb
482 |     #   huggingface-hub
483 |     #   kubernetes
484 |     #   langchain
485 |     #   langchain-community
486 |     #   langchain-core
487 |     #   omegaconf
488 |     #   uvicorn
489 | rank-bm25==0.2.2
490 |     # via -r requirements.txt
491 | regex==2024.11.6
492 |     # via
493 |     #   -r requirements.txt
494 |     #   tiktoken
495 | requests==2.32.3
496 |     # via
497 |     #   -r requirements.txt
498 |     #   huggingface-hub
499 |     #   kubernetes
500 |     #   langchain
501 |     #   langchain-community
502 |     #   langsmith
503 |     #   posthog
504 |     #   requests-oauthlib
505 |     #   requests-toolbelt
506 |     #   tavily-python
507 |     #   tiktoken
508 | requests-oauthlib==2.0.0
509 |     # via
510 |     #   -r requirements.txt
511 |     #   kubernetes
512 | requests-toolbelt==1.0.0
513 |     # via
514 |     #   -r requirements.txt
515 |     #   langsmith
516 | rich==13.9.4
517 |     # via
518 |     #   -r requirements.txt
519 |     #   chromadb
520 |     #   typer
521 | rsa==4.9
522 |     # via
523 |     #   -r requirements.txt
524 |     #   google-auth
525 | shellingham==1.5.4
526 |     # via
527 |     #   -r requirements.txt
528 |     #   typer
529 | six==1.17.0
530 |     # via
531 |     #   -r requirements.txt
532 |     #   kubernetes
533 |     #   posthog
534 |     #   python-dateutil
535 | sniffio==1.3.1
536 |     # via
537 |     #   -r requirements.txt
538 |     #   anyio
539 |     #   openai
540 | sqlalchemy==2.0.39
541 |     # via
542 |     #   -r requirements.txt
543 |     #   langchain
544 |     #   langchain-community
545 | sse-starlette==2.2.1
546 |     # via
547 |     #   -r requirements.txt
548 |     #   mcp
549 | starlette==0.46.1
550 |     # via
551 |     #   -r requirements.txt
552 |     #   fastapi
553 |     #   mcp
554 |     #   sse-starlette
555 | sympy==1.13.3
556 |     # via
557 |     #   -r requirements.txt
558 |     #   onnxruntime
559 | tavily-python==0.5.1
560 |     # via -r requirements.txt
561 | tenacity==9.0.0
562 |     # via
563 |     #   -r requirements.txt
564 |     #   chromadb
565 |     #   langchain-community
566 |     #   langchain-core
567 | tiktoken==0.9.0
568 |     # via
569 |     #   -r requirements.txt
570 |     #   langchain-openai
571 |     #   tavily-python
572 | tokenizers==0.21.1
573 |     # via
574 |     #   -r requirements.txt
575 |     #   chromadb
576 | tqdm==4.67.1
577 |     # via
578 |     #   -r requirements.txt
579 |     #   chromadb
580 |     #   huggingface-hub
581 |     #   openai
582 | typer==0.15.2
583 |     # via
584 |     #   -r requirements.txt
585 |     #   chromadb
586 | typing-extensions==4.12.2
587 |     # via
588 |     #   -r requirements.txt
589 |     #   anyio
590 |     #   chromadb
591 |     #   fastapi
592 |     #   huggingface-hub
593 |     #   langchain-core
594 |     #   openai
595 |     #   opentelemetry-sdk
596 |     #   pydantic
597 |     #   pydantic-core
598 |     #   sqlalchemy
599 |     #   typer
600 |     #   typing-inspect
601 | typing-inspect==0.9.0
602 |     # via
603 |     #   -r requirements.txt
604 |     #   dataclasses-json
605 | urllib3==2.3.0
606 |     # via
607 |     #   -r requirements.txt
608 |     #   kubernetes
609 |     #   requests
610 | uvicorn==0.34.0
611 |     # via
612 |     #   -r requirements.txt
613 |     #   chromadb
614 |     #   mcp
615 | uvloop==0.21.0
616 |     # via
617 |     #   -r requirements.txt
618 |     #   uvicorn
619 | watchfiles==1.0.4
620 |     # via
621 |     #   -r requirements.txt
622 |     #   uvicorn
623 | websocket-client==1.8.0
624 |     # via
625 |     #   -r requirements.txt
626 |     #   kubernetes
627 | websockets==15.0.1
628 |     # via
629 |     #   -r requirements.txt
630 |     #   uvicorn
631 | wrapt==1.17.2
632 |     # via
633 |     #   -r requirements.txt
634 |     #   deprecated
635 |     #   opentelemetry-instrumentation
636 | yarl==1.18.3
637 |     # via
638 |     #   -r requirements.txt
639 |     #   aiohttp
640 | zipp==3.21.0
641 |     # via
642 |     #   -r requirements.txt
643 |     #   importlib-metadata
644 | zstandard==0.23.0
645 |     # via
646 |     #   -r requirements.txt
647 |     #   langsmith
648 | 


--------------------------------------------------------------------------------