├── .env.example ├── .gitignore ├── LICENSE.md ├── README.md ├── README_KR.md ├── case1 ├── auto_mcp_json.py ├── config.py ├── mcp_server.py └── rag │ ├── __init__.py │ ├── base.py │ └── pdf.py ├── case2 ├── auto_mcp_json.py ├── dify_ek_server.py └── mcp_server.py ├── case3 ├── auto_mcp_json.py └── mcp_server.py ├── case4 ├── auto_mcp_json.py └── mcp_server.py ├── data └── .gitkeep ├── docs ├── case1.md ├── case2.md ├── case3.md ├── case4.md └── installation.md ├── pyproject.toml ├── requirements.txt ├── requirements_windows.txt └── uv.lock /.env.example: -------------------------------------------------------------------------------- 1 | # case1: OpenAI API Setting 2 | OPENAI_API_KEY = "sk-" 3 | 4 | # case2: Dify Exteral Knowledge API Setting example 5 | DIFY_API_ENDPOINT = "http://localhost:8000/retrieval" 6 | DIFY_API_KEY = "dify-external-knowledge-api-key" 7 | DIFY_KNOWLEDGE_ID = "test-knowledge-base" 8 | 9 | # case3: Dify Workflow API Setting example 10 | DIFY_BASE_URL = "https://api.dify.ai/v1" 11 | DIFY_APP_SK = "app-" 12 | 13 | # case4: Tavily API Setting example 14 | TAVILY_API_KEY = "tvly" -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | share/python-wheels/ 24 | *.egg-info/ 25 | .installed.cfg 26 | *.egg 27 | MANIFEST 28 | 29 | # PyInstaller 30 | # Usually these files are written by a python script from a template 31 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 32 | *.manifest 33 | *.spec 34 | 35 | # Installer logs 36 | pip-log.txt 37 | pip-delete-this-directory.txt 38 | 39 | # Unit test / coverage reports 40 | htmlcov/ 41 | .tox/ 42 | .nox/ 43 | .coverage 44 | .coverage.* 45 | .cache 46 | nosetests.xml 47 | coverage.xml 48 | *.cover 49 | *.py,cover 50 | .hypothesis/ 51 | .pytest_cache/ 52 | cover/ 53 | 54 | # Translations 55 | *.mo 56 | *.pot 57 | 58 | # Django stuff: 59 | *.log 60 | local_settings.py 61 | db.sqlite3 62 | db.sqlite3-journal 63 | 64 | # Flask stuff: 65 | instance/ 66 | .webassets-cache 67 | 68 | # Scrapy stuff: 69 | .scrapy 70 | 71 | # Sphinx documentation 72 | docs/_build/ 73 | 74 | # PyBuilder 75 | .pybuilder/ 76 | target/ 77 | 78 | # Jupyter Notebook 79 | .ipynb_checkpoints 80 | 81 | # IPython 82 | profile_default/ 83 | ipython_config.py 84 | 85 | # pyenv 86 | # For a library or package, you might want to ignore these files since the code is 87 | # intended to run in multiple environments; otherwise, check them in: 88 | # .python-version 89 | 90 | # pipenv 91 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 92 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 93 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 94 | # install all needed dependencies. 95 | #Pipfile.lock 96 | 97 | # UV 98 | # Similar to Pipfile.lock, it is generally recommended to include uv.lock in version control. 99 | # This is especially recommended for binary packages to ensure reproducibility, and is more 100 | # commonly ignored for libraries. 101 | #uv.lock 102 | 103 | # poetry 104 | # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. 105 | # This is especially recommended for binary packages to ensure reproducibility, and is more 106 | # commonly ignored for libraries. 107 | # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control 108 | #poetry.lock 109 | 110 | # pdm 111 | # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. 112 | #pdm.lock 113 | # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it 114 | # in version control. 115 | # https://pdm.fming.dev/latest/usage/project/#working-with-version-control 116 | .pdm.toml 117 | .pdm-python 118 | .pdm-build/ 119 | 120 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm 121 | __pypackages__/ 122 | 123 | # Celery stuff 124 | celerybeat-schedule 125 | celerybeat.pid 126 | 127 | # SageMath parsed files 128 | *.sage.py 129 | 130 | # Environments 131 | .env 132 | .venv 133 | env/ 134 | venv/ 135 | ENV/ 136 | env.bak/ 137 | venv.bak/ 138 | .python-version 139 | 140 | # Spyder project settings 141 | .spyderproject 142 | .spyproject 143 | 144 | # Rope project settings 145 | .ropeproject 146 | 147 | # mkdocs documentation 148 | /site 149 | 150 | # mypy 151 | .mypy_cache/ 152 | .dmypy.json 153 | dmypy.json 154 | 155 | # Pyre type checker 156 | .pyre/ 157 | 158 | # pytype static type analyzer 159 | .pytype/ 160 | 161 | # Cython debug symbols 162 | cython_debug/ 163 | 164 | # PyCharm 165 | # JetBrains specific template is maintained in a separate JetBrains.gitignore that can 166 | # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore 167 | # and can be added to the global gitignore or merged into this file. For a more nuclear 168 | # option (not recommended) you can uncomment the following to ignore the entire idea folder. 169 | #.idea/ 170 | 171 | # PyPI configuration file 172 | .pypirc 173 | 174 | # etc 175 | vector* 176 | chroma* 177 | .DS_Store 178 | *.pdf -------------------------------------------------------------------------------- /LICENSE.md: -------------------------------------------------------------------------------- 1 | # MIT License 2 | 3 | Copyright (c) 2025 TeddyNote 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in 13 | all copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 21 | THE SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Quick-start Auto MCP : All in one Claude Desktop and Cursor 2 | 3 | [English](README.md) | [한국어](README_KR.md) 4 | 5 | ## Introduction 6 | 7 | **Quick-start Auto MCP** is a tool that helps you easily and quickly register Anthropic's Model Context Protocol (MCP) in Claude Desktop and Cursor. 8 | 9 | **Key advantages:** 10 | 1. **Quick Setup**: Add MCP functionality to Claude Desktop and Cursor simply by running a tool and copying/pasting the generated JSON file. 11 | 2. **Various Tools Provided**: We continuously update useful MCP tools. Stay up to date with your personalized toolkit by starring and following us. :) 12 | 13 | ## Table of Contents 14 | 15 | - [Features](#features) 16 | - [Project Structure](#project-structure) 17 | - [Requirements](#requirements) 18 | - [Installation](#installation) 19 | - [Configuration](#configuration) 20 | - [Usage](#usage) 21 | - [Troubleshooting](#troubleshooting) 22 | - [License](#license) 23 | - [Contributing](#contributing) 24 | - [Contact](#contact) 25 | - [Author](#author) 26 | 27 | ## Features 28 | 29 | - **RAG (Retrieval Augmented Generation)** - Keyword, semantic, and hybrid search functionality for PDF documents 30 | - **Dify External Knowledge API** - Document search functionality via Dify's external knowledge API 31 | - **Dify Workflow** - Execute and retrieve results from Dify Workflow 32 | - **Web Search** - Real-time web search using Tavily API 33 | - **Automatic JSON Generation** - Automatically generate MCP JSON files needed for Claude Desktop and Cursor 34 | 35 | ## Project Structure 36 | 37 | ``` 38 | . 39 | ├── case1 # RAG example 40 | ├── case2 # Dify External Knowledge API example 41 | ├── case3 # Dify Workflow example 42 | ├── case4 # Web Search example 43 | ├── data # Example data files 44 | ├── docs # Documentation folder 45 | │ ├── case1.md # case1 description 🚨 Includes tips for optimized tool invocation 46 | │ ├── case2.md # case2 description 47 | │ ├── case3.md # case3 description 48 | │ ├── case4.md # case4 description 49 | │ └── installation.md # Installation guide 50 | ├── .env.example # .env example format 51 | ├── pyproject.toml # Project settings 52 | ├── requirements.txt # Required packages list 53 | └── uv.lock # uv.lock 54 | ``` 55 | 56 | ## Requirements 57 | 58 | - Python >= 3.11 59 | - Claude Desktop or Cursor (MCP supporting version) 60 | - uv (recommended) or pip 61 | 62 | ## Installation 63 | 64 | ### 1. Clone the repository 65 | 66 | ```bash 67 | git clone https://github.com/teddynote-lab/mcp.git 68 | cd mcp 69 | ``` 70 | 71 | ### 2. Set up virtual environment 72 | 73 | #### Using uv (recommended) 74 | ```bash 75 | # macOS/Linux 76 | uv venv 77 | uv pip install -r requirements.txt 78 | ``` 79 | 80 | ```bash 81 | # Windows 82 | uv venv 83 | uv pip install -r requirements_windows.txt 84 | ``` 85 | 86 | #### Using pip 87 | ```bash 88 | python -m venv .venv 89 | 90 | # Windows 91 | .venv\Scripts\activate 92 | pip install -r requirements_windows.txt 93 | 94 | # macOS/Linux 95 | source .venv/bin/activate 96 | 97 | pip install -r requirements.txt 98 | ``` 99 | 100 | ### 3. Preparing the PDF File 101 | 102 | Plese prepare a PDF file required for RAG in the `./data` directory. 103 | 104 | ## Configuration 105 | 106 | In order to execute each case, a `.env` file is required. 107 | Please specify the necessary environment variables in the `.env.example` file located in the root directory, and rename it to `.env`. 108 | 109 | ### sites for configuring required environment variables for each case 110 | - https://platform.openai.com/api-keys 111 | - https://dify.ai/ 112 | - https://app.tavily.com/home 113 | 114 | ## Usage 115 | 116 | ### 1. Generate JSON File 117 | 118 | Run the following command in each case directory to generate the necessary JSON file: 119 | 120 | ```bash 121 | # Activate virtual environment 122 | 123 | # Windows 124 | .venv\Scripts\activate 125 | 126 | # macOS/Linux 127 | source .venv/bin/activate 128 | 129 | # Navigate to example directory 130 | cd case1 131 | 132 | # Generate JSON file 133 | python auto_mcp_json.py 134 | ``` 135 | 136 | ### 2. Register MCP in Claude Desktop/Cursor 137 | 138 | 1. Launch Claude Desktop or Cursor 139 | 2. Open MCP settings menu 140 | 3. Copy and paste the generated JSON content 141 | 4. Save and `restart` (If you're using Windows, we recommend fully closing the process via Task Manager and then restarting the application.) 142 | 143 | > **Note**: When you run Claude Desktop or Cursor, the MCP server will automatically run with it. When you close the software, the MCP server will also terminate. 144 | 145 | ## Troubleshooting 146 | 147 | Common issues and solutions: 148 | 149 | - **MCP Server Connection Failure**: Check if the service is running properly and if there are no port conflicts. In particular, when applying case2, you must also run `dify_ek_server.py`. 150 | - **API Key Errors**: Verify that environment variables are set correctly. 151 | - **Virtual Environment Issues**: Ensure Python version is 3.11 or higher. 152 | 153 | ## License 154 | 155 | [MIT LICENSE](LICENSE.md) 156 | 157 | ## Contributing 158 | 159 | Contributions are always welcome! Please participate in the project through issue registration or pull requests. :) 160 | 161 | ## Contact 162 | 163 | If you have questions or need help, please register an issue or contact: 164 | dev@brain-crew.com 165 | 166 | ## Author 167 | [Hantaek Lim](https://github.com/LHANTAEK) -------------------------------------------------------------------------------- /README_KR.md: -------------------------------------------------------------------------------- 1 | # Quick-start Auto MCP : All in one Claude Desktop and Cursor 2 | 3 | [English](README.md) | [한국어](README_KR.md) 4 | 5 | ## 소개 6 | 7 | **Quick-start Auto MCP**는 Anthropic의 Model Context Protocol(MCP)을 Claude Desktop과 Cursor에 쉽고 빠르게 등록하여 사용할 수 있도록 도와주는 도구입니다. 8 | 9 | **주요 장점:** 10 | 1. **빠른 설정**: 간단한 도구 실행 및 JSON 파일 복사/붙여넣기만으로 Claude Desktop과 Cursor에 MCP 기능을 바로 추가할 수 있습니다. 11 | 2. **다양한 도구 제공**: 유용한 MCP 도구들을 지속적으로 업데이트합니다. Star와 Follow를 통해 나만의 도구를 꾸준히 업데이트 해보세요. :) 12 | 13 | ## 목차 14 | 15 | - [특징](#특징) 16 | - [프로젝트 구조](#프로젝트-구조) 17 | - [요구사항](#요구사항) 18 | - [설치](#설치) 19 | - [환경 변수 설정](#환경-변수-설정) 20 | - [사용법](#사용법) 21 | - [문제해결](#문제해결) 22 | - [라이센스](#라이센스) 23 | - [기여하기](#기여하기) 24 | - [문의하기](#문의하기) 25 | - [저자](#저자) 26 | 27 | ## 특징 28 | 29 | - **RAG(Retrieval Augmented Generation)** - PDF 문서를 대상으로 키워드, 시맨틱, 하이브리드 검색 기능 30 | - **Dify External Knowledge API** - Dify의 외부 지식 API를 통한 문서 검색 기능 31 | - **Dify Workflow** - Dify Workflow 실행 및 결과 검색 기능 32 | - **Web Search** - Tavily API를 활용한 실시간 웹 검색 기능 33 | - **자동 JSON 생성** - Claude Desktop과 Cursor에 필요한 MCP JSON 파일 자동 생성 34 | 35 | ## 프로젝트 구조 36 | 37 | ``` 38 | . 39 | ├── case1 # RAG 예제 40 | ├── case2 # Dify External Knowledge API 예제 41 | ├── case3 # Dify Workflow 예제 42 | ├── case4 # Web Search 예제 43 | ├── data # PDF 데이터 파일 44 | ├── docs # 문서 폴더 45 | │ ├── case1.md # case1 예제 설명 🚨 도구 호출 최적화 팁 포함 46 | │ ├── case2.md # case2 예제 설명 47 | │ ├── case3.md # case3 예제 설명 48 | │ ├── case4.md # case4 예제 설명 49 | │ └── installation.md # 설치 가이드 50 | ├── .env.example # .env 예시 51 | ├── pyproject.toml # 프로젝트 설정 52 | ├── requirements.txt # 필요 패키지 목록 53 | └── uv.lock # uv.lock 54 | ``` 55 | 56 | ## 요구사항 57 | 58 | - Python >= 3.11 59 | - Claude Desktop 또는 Cursor 60 | - uv (권장) 또는 pip 61 | 62 | ## 설치 63 | 64 | ### 1. 저장소 복제 65 | 66 | ```bash 67 | git clone https://github.com/teddynote-lab/mcp.git 68 | cd mcp 69 | ``` 70 | 71 | ### 2. 가상 환경 설정 72 | 73 | #### uv 사용 (권장) 74 | ```bash 75 | # macOS/Linux 76 | uv venv 77 | uv pip install -r requirements.txt 78 | ``` 79 | 80 | ```bash 81 | # Windows 82 | uv venv 83 | uv pip install -r requirements_windows.txt 84 | ``` 85 | 86 | #### pip 사용 87 | ```bash 88 | python -m venv .venv 89 | 90 | # Windows 91 | .venv\Scripts\activate 92 | pip install -r requirements_windows.txt 93 | 94 | # macOS/Linux 95 | source .venv/bin/activate 96 | 97 | pip install -r requirements.txt 98 | ``` 99 | 100 | ### 3. PDF 준비 101 | 102 | RAG에 필요한 PDF 파일을 `./data`에 넣어주세요. 103 | 104 | 본 프로젝트에서는 [소프트웨어정책연구소(SPRi)의 25년 3월호](https://spri.kr/posts/view/23827?code=AI-Brief&s_year=&data_page=1) `인공지능 산업의 최신 동향`을 사용했습니다. 105 | 106 | ## 환경 변수 설정 107 | 108 | 각 예제를 실행하기 위한 `.env` 파일이 필요합니다. 루트 디렉토리의 `.env.example`에 필요한 환경 변수를 설정하고 파일명을 `.env`로 변경해주세요. 109 | 110 | ### 예제별 필요한 환경 변수 설정 사이트 111 | - https://platform.openai.com/api-keys 112 | - https://dify.ai/ 113 | - https://app.tavily.com/home 114 | 115 | 116 | ## 사용법 117 | 118 | ### 1. JSON 파일 생성 119 | 120 | 각 예제 디렉토리에서 다음 명령을 실행하여 필요한 JSON 파일을 생성합니다: 121 | 122 | ```bash 123 | # 가상 환경 활성화 124 | 125 | # Windows 126 | .venv\Scripts\activate 127 | 128 | # macOS/Linux 129 | source .venv/bin/activate 130 | 131 | # 예제 디렉토리로 이동 132 | cd case1 133 | 134 | # JSON 파일 생성 135 | python auto_mcp_json.py 136 | ``` 137 | 138 | ### 2. Claude Desktop/Cursor에 MCP 등록 139 | 140 | 1. Claude Desktop 또는 Cursor 실행 141 | 2. MCP 설정 메뉴 열기 142 | 3. 생성된 JSON 내용을 복사하여 붙여넣기 143 | 4. 저장 및 `재시작` (윈도우 유저의 경우 작업관리자로 프로세스를 완전히 종료하고 재시작 해주시는 걸 권장합니다.) 144 | 145 | > **참고**: Claude Desktop 또는 Cursor를 실행하면 MCP 서버가 자동으로 함께 실행되며, 소프트웨어를 종료하면 MCP 서버도 함께 종료됩니다. 146 | 147 | ## Troubleshooting 148 | 149 | 일반적인 문제 및 해결 방법: 150 | 151 | - **MCP 서버 연결 실패**: 서비스가 올바르게 실행 중인지, 포트가 충돌하지 않는지 확인하세요. 특히, case2를 적용할 때는 `dify_ek_server.py`를 같이 실행시켜주셔야 합니다. 152 | - **API 키 오류**: 환경 변수가 올바르게 설정되었는지 확인하세요. 153 | - **가상 환경 문제**: Python 버전이 3.11 이상인지 확인하세요. 154 | 155 | ## 라이센스 156 | 157 | [MIT 라이센스](LICENSE.md) 158 | 159 | ## 기여하기 160 | 161 | 기여는 언제나 환영합니다! 이슈, 버그 또는 기능 추가에 대한 의견을 남겨주세요. :) 162 | 163 | ## 문의하기 164 | 165 | 질문이나 도움이 필요하시면 이슈를 등록하거나 다음 연락처로 문의해 주세요: 166 | dev@brain-crew.com 167 | 168 | ## 저자 169 | 170 | [임한택 Hantaek Lim](https://github.com/LHANTAEK) -------------------------------------------------------------------------------- /case1/auto_mcp_json.py: -------------------------------------------------------------------------------- 1 | import os 2 | import json 3 | import sys 4 | from pathlib import Path 5 | from dotenv import load_dotenv 6 | 7 | 8 | def get_env_variables(): 9 | """ 10 | Load environment variables and return required variables as a dictionary. 11 | 12 | Returns: 13 | dict: Dictionary containing environment variables and default configuration values 14 | """ 15 | 16 | load_dotenv() 17 | 18 | required_vars = [ 19 | "OPENAI_API_KEY", 20 | ] 21 | 22 | config_vars = {"DEFAULT_TOP_K": "5"} 23 | 24 | env_dict = {} 25 | 26 | for var in required_vars: 27 | value = os.getenv(var) 28 | if value: 29 | env_dict[var] = value 30 | 31 | env_dict.update(config_vars) 32 | 33 | return env_dict 34 | 35 | 36 | def create_mcp_json(): 37 | """ 38 | Create a Model Context Protocol (MCP) server configuration JSON file. 39 | 40 | This function generates a configuration file that defines how the MCP server 41 | should be launched, including the Python interpreter path, server script location, 42 | and necessary environment variables. 43 | 44 | Returns: 45 | str: Path to the created JSON configuration file 46 | """ 47 | 48 | project_root = Path(__file__).parent.absolute() 49 | 50 | # .venv python executable path 51 | if os.name == 'nt': # Windows 52 | python_path = str(project_root.parent / ".venv" / "Scripts" / "python.exe") 53 | else: # Mac, Ubuntu etc 54 | python_path = str(project_root.parent / ".venv" / "bin" / "python") 55 | 56 | server_script = project_root / "mcp_server.py" 57 | 58 | env_vars = get_env_variables() 59 | 60 | config = { 61 | "mcpServers": { 62 | "rag-mcp": { 63 | "command": python_path, 64 | "args": [str(server_script)], 65 | "env": env_vars, 66 | } 67 | } 68 | } 69 | 70 | json_path = project_root / "mcp_config.json" 71 | 72 | with open(json_path, "w", encoding="utf-8") as f: 73 | json.dump(config, f, indent=2) 74 | 75 | print(f"MCP configuration file has been created: {json_path}") 76 | print(f"Generated environment variables: {', '.join(env_vars.keys())}") 77 | 78 | return str(json_path) 79 | 80 | 81 | if __name__ == "__main__": 82 | create_mcp_json() 83 | -------------------------------------------------------------------------------- /case1/config.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | 3 | # Path settings 4 | DATA_DIR = Path(__file__).parent.parent / "data" 5 | VECTOR_DIR = Path(__file__).parent / "vector_db" 6 | 7 | # Default settings 8 | DEFAULT_CHUNK_SIZE = 600 9 | DEFAULT_CHUNK_OVERLAP = 50 10 | DEFAULT_TOP_K = 5 11 | DEFAULT_EMBEDDING_MODEL = "text-embedding-3-small" 12 | DEFAULT_LLM_MODEL = "gpt-4o-mini" -------------------------------------------------------------------------------- /case1/mcp_server.py: -------------------------------------------------------------------------------- 1 | import os 2 | from pathlib import Path 3 | from typing import List 4 | 5 | from dotenv import load_dotenv 6 | from langchain_core.documents import Document 7 | from mcp.server.fastmcp import FastMCP 8 | 9 | from rag import PDFRetrievalChain 10 | import config 11 | 12 | load_dotenv() 13 | 14 | DATA_DIR = Path(os.getenv("DATA_DIR", config.DATA_DIR)) 15 | pdf_files = list(DATA_DIR.glob("*.pdf")) 16 | pdf_paths = [str(path) for path in pdf_files] 17 | 18 | VECTOR_DIR = Path(os.getenv("VECTOR_DIR", config.VECTOR_DIR)) 19 | 20 | rag_chain = PDFRetrievalChain( 21 | source_uri = pdf_paths, 22 | persist_directory = str(VECTOR_DIR), 23 | k = config.DEFAULT_TOP_K, 24 | embedding_model = config.DEFAULT_EMBEDDING_MODEL, 25 | llm_model = config.DEFAULT_LLM_MODEL 26 | ).initialize() 27 | 28 | mcp = FastMCP( 29 | name="RAG", 30 | version="0.0.1", 31 | description="RAG Search(keyword, semantic, hybrid)" 32 | ) 33 | 34 | def format_search_results(docs: List[Document]) -> str: 35 | """ 36 | Format search results as markdown. 37 | 38 | Args: 39 | docs: List of documents to format 40 | 41 | Returns: 42 | Markdown formatted search results 43 | 44 | """ 45 | 46 | if not docs: 47 | return "No relevant information found." 48 | 49 | markdown_results = "## Search Results\n\n" 50 | 51 | for i, doc in enumerate(docs, 1): 52 | source = doc.metadata.get("source", "Unknown source") 53 | page = doc.metadata.get("page", None) 54 | page_info = f" (Page: {page+1})" if page is not None else "" 55 | 56 | markdown_results += f"### Result {i}{page_info}\n\n" 57 | markdown_results += f"{doc.page_content}\n\n" 58 | markdown_results += f"Source: {source}\n\n" 59 | markdown_results += "---\n\n" 60 | 61 | return markdown_results 62 | 63 | @mcp.tool() 64 | async def keyword_search(query: str, top_k: int = 5) -> str: 65 | """ 66 | Performs keyword-based search on PDF documents. 67 | Returns the most relevant results based on exact word/phrase matches. 68 | Ideal for finding specific terms, definitions, or exact phrases in documents. 69 | 70 | Parameters: 71 | query: Search query 72 | top_k: Number of results to return 73 | 74 | """ 75 | 76 | try: 77 | results = rag_chain.search_keyword(query, top_k) 78 | return format_search_results(results) 79 | except Exception as e: 80 | return f"An error occurred during search: {str(e)}" 81 | 82 | @mcp.tool() 83 | async def semantic_search(query: str, top_k: int = 5) -> str: 84 | """ 85 | Performs semantic search on PDF documents. 86 | Finds content semantically similar to the query, delivering relevant information even without exact word matches. 87 | Best for conceptual questions, understanding themes, or when you need information related to a topic. 88 | 89 | Parameters: 90 | query: Search query 91 | top_k: Number of results to return 92 | 93 | """ 94 | 95 | try: 96 | results = rag_chain.search_semantic(query, top_k) 97 | return format_search_results(results) 98 | except Exception as e: 99 | return f"An error occurred during search: {str(e)}" 100 | 101 | @mcp.tool() 102 | async def hybrid_search(query: str, top_k: int = 5) -> str: 103 | """ 104 | Performs hybrid search (keyword + semantic) on PDF documents. 105 | Combines exact keyword matching and semantic similarity to deliver optimal results. 106 | The most versatile search option for general questions or when unsure which search type is best. 107 | 108 | Parameters: 109 | query: Search query 110 | top_k: Number of results to return 111 | 112 | """ 113 | 114 | try: 115 | results = rag_chain.search_hybrid(query, top_k) 116 | return format_search_results(results) 117 | except Exception as e: 118 | return f"An error occurred during search: {str(e)}" 119 | 120 | if __name__ == "__main__": 121 | mcp.run() -------------------------------------------------------------------------------- /case1/rag/__init__.py: -------------------------------------------------------------------------------- 1 | from rag.base import RetrievalChain 2 | from rag.pdf import PDFRetrievalChain 3 | 4 | __all__ = [ 5 | 'RetrievalChain', 6 | 'PDFRetrievalChain', 7 | ] -------------------------------------------------------------------------------- /case1/rag/base.py: -------------------------------------------------------------------------------- 1 | from abc import ABC, abstractmethod 2 | from typing import List, Dict, Any, Optional 3 | from pathlib import Path 4 | 5 | from langchain.retrievers.ensemble import EnsembleRetriever 6 | from langchain_community.retrievers import BM25Retriever 7 | from langchain_core.documents import Document 8 | from langchain_core.retrievers import BaseRetriever 9 | from langchain_openai import OpenAIEmbeddings 10 | 11 | class RetrievalChain(ABC): 12 | """ 13 | Abstract base class for RAG search implementations. 14 | 15 | This class provides a template for different document retrieval chains, 16 | allowing for customization of document loading, splitting, vectorization, 17 | and various search methods. 18 | """ 19 | 20 | def __init__(self, **kwargs) -> None: 21 | """ 22 | Initialize a RetrievalChain with configuration parameters. 23 | 24 | Args: 25 | **kwargs: Keyword arguments including: 26 | source_uri: Paths to source documents 27 | k: Number of results to return (default: 5) 28 | embedding_model: Model name for embeddings (default: OpenAI "text-embedding-3-small") 29 | persist_directory: Directory to persist vector store 30 | """ 31 | 32 | self.source_uri = kwargs.get("source_uri", []) 33 | self.k = kwargs.get("k", 5) 34 | self.embedding_model = kwargs.get("embedding_model", "text-embedding-3-small") 35 | self.persist_directory = kwargs.get("persist_directory", None) 36 | self.embeddings = None 37 | self.vectorstore = None 38 | self.retrievers = None 39 | self.split_docs = None 40 | 41 | @abstractmethod 42 | def load_documents(self, source_uris: List[str]) -> List[Document]: 43 | """ 44 | Load documents from source URIs. 45 | 46 | Args: 47 | source_uris: List of file paths or URIs to load documents from 48 | 49 | Returns: 50 | List of loaded documents 51 | """ 52 | 53 | pass 54 | 55 | @abstractmethod 56 | def create_text_splitter(self) -> Any: 57 | """ 58 | Create a text splitter appropriate for the document type. 59 | 60 | Returns: 61 | A text splitter instance 62 | """ 63 | 64 | pass 65 | 66 | def split_documents(self, docs: List[Document], text_splitter: Any) -> List[Document]: 67 | """ 68 | Split documents into chunks using the provided text splitter. 69 | 70 | Args: 71 | docs: Documents to split 72 | text_splitter: Text splitter instance 73 | 74 | Returns: 75 | Split document chunks 76 | """ 77 | 78 | return text_splitter.split_documents(docs) 79 | 80 | def create_embedding(self) -> Any: 81 | """ 82 | Create an embedding model instance. 83 | 84 | Returns: 85 | An embeddings model instance 86 | """ 87 | 88 | return OpenAIEmbeddings(model=self.embedding_model) 89 | 90 | @abstractmethod 91 | def create_vectorstore(self, split_docs: List[Document]) -> Any: 92 | """ 93 | Create a vector store from split documents. 94 | 95 | Args: 96 | split_docs: Split document chunks 97 | 98 | Returns: 99 | A vector store instance 100 | """ 101 | 102 | pass 103 | 104 | def create_semantic_retriever(self, vectorstore: Any) -> BaseRetriever: 105 | """ 106 | Create a semantic search retriever. 107 | 108 | Args: 109 | vectorstore: Vector store instance 110 | 111 | Returns: 112 | A semantic search retriever 113 | """ 114 | 115 | return vectorstore.as_retriever( 116 | search_kwargs={"k": self.k} 117 | ) 118 | 119 | def create_keyword_retriever(self, split_docs: List[Document]) -> BaseRetriever: 120 | """ 121 | Create a keyword-based search retriever. 122 | 123 | Args: 124 | split_docs: Split document chunks 125 | 126 | Returns: 127 | A keyword search retriever 128 | """ 129 | 130 | return BM25Retriever.from_documents(split_docs, k=self.k) 131 | 132 | def create_hybrid_retriever(self, split_docs: List[Document], vectorstore: Any) -> BaseRetriever: 133 | """ 134 | Create a hybrid search retriever combining keyword and semantic search. 135 | 136 | Args: 137 | split_docs: Split document chunks 138 | vectorstore: Vector store instance 139 | 140 | Returns: 141 | A hybrid search retriever 142 | """ 143 | 144 | bm25_retriever = self.create_keyword_retriever(split_docs) 145 | dense_retriever = self.create_semantic_retriever(vectorstore) 146 | 147 | return EnsembleRetriever( 148 | retrievers=[bm25_retriever, dense_retriever], 149 | weights=[0.5, 0.5] 150 | ) 151 | 152 | def create_retrievers(self, split_docs: List[Document]) -> Dict[str, BaseRetriever]: 153 | """ 154 | Create all retriever types. 155 | 156 | Args: 157 | split_docs: Split document chunks 158 | 159 | Returns: 160 | Dictionary of retrievers by search type 161 | """ 162 | 163 | self.embeddings = self.create_embedding() 164 | self.vectorstore = self.create_vectorstore(split_docs) 165 | 166 | return { 167 | "semantic": self.create_semantic_retriever(self.vectorstore), 168 | "keyword": self.create_keyword_retriever(split_docs), 169 | "hybrid": self.create_hybrid_retriever(split_docs, self.vectorstore) 170 | } 171 | 172 | def initialize(self) -> "RetrievalChain": 173 | """ 174 | Initialize the retrieval chain by loading documents, splitting them, 175 | and creating retriever instances. 176 | 177 | Returns: 178 | The initialized retrieval chain instance 179 | """ 180 | 181 | docs = self.load_documents(self.source_uri) 182 | if not docs: 183 | print("No documents were loaded.") 184 | return self 185 | 186 | text_splitter = self.create_text_splitter() 187 | self.split_docs = self.split_documents(docs, text_splitter) 188 | 189 | self.retrievers = self.create_retrievers(self.split_docs) 190 | 191 | print(f"Initialization complete: {len(self.split_docs)} chunks created") 192 | return self 193 | 194 | def search_semantic(self, query: str, k: Optional[int] = None) -> List[Document]: 195 | """ 196 | Perform semantic search on the loaded documents. 197 | 198 | Args: 199 | query: Search query 200 | k: Number of results to return, overrides self.k 201 | 202 | Returns: 203 | Relevant documents 204 | 205 | Raises: 206 | ValueError: If the retrieval chain is not initialized 207 | """ 208 | 209 | if not hasattr(self, 'retrievers') or self.retrievers is None: 210 | raise ValueError("Initialization required. Call initialize() method first.") 211 | 212 | k = k or self.k 213 | retriever = self.retrievers["semantic"] 214 | retriever.search_kwargs["k"] = k 215 | 216 | return retriever.get_relevant_documents(query) 217 | 218 | def search_keyword(self, query: str, k: Optional[int] = None) -> List[Document]: 219 | """ 220 | Perform keyword-based search on the loaded documents. 221 | 222 | Args: 223 | query: Search query 224 | k: Number of results to return (Note: BM25Retriever may not support dynamic k) 225 | 226 | Returns: 227 | Relevant documents 228 | 229 | Raises: 230 | ValueError: If the retrieval chain is not initialized 231 | """ 232 | 233 | if not hasattr(self, 'retrievers') or self.retrievers is None: 234 | raise ValueError("Initialization required. Call initialize() method first.") 235 | 236 | return self.retrievers["keyword"].get_relevant_documents(query) 237 | 238 | def search_hybrid(self, query: str, k: Optional[int] = None) -> List[Document]: 239 | """ 240 | Perform hybrid search (keyword + semantic) on the loaded documents. 241 | 242 | Args: 243 | query: Search query 244 | k: Number of results to return (Note: EnsembleRetriever may not support dynamic k) 245 | 246 | Returns: 247 | Relevant documents 248 | 249 | Raises: 250 | ValueError: If the retrieval chain is not initialized 251 | """ 252 | 253 | if not hasattr(self, 'retrievers') or self.retrievers is None: 254 | raise ValueError("Initialization required. Call initialize() method first.") 255 | 256 | return self.retrievers["hybrid"].get_relevant_documents(query) 257 | 258 | def search(self, query: str, k: Optional[int] = None) -> List[Document]: 259 | """ 260 | Default search method that uses semantic search. 261 | 262 | Args: 263 | query: Search query 264 | k: Number of results to return, overrides self.k 265 | 266 | Returns: 267 | Relevant documents 268 | """ 269 | 270 | return self.search_semantic(query, k) 271 | -------------------------------------------------------------------------------- /case1/rag/pdf.py: -------------------------------------------------------------------------------- 1 | from typing import List, Optional, Any 2 | import os 3 | 4 | from langchain_community.document_loaders import PDFPlumberLoader 5 | from langchain_text_splitters import RecursiveCharacterTextSplitter 6 | from langchain_core.documents import Document 7 | from langchain_chroma import Chroma 8 | 9 | from rag.base import RetrievalChain 10 | 11 | class PDFRetrievalChain(RetrievalChain): 12 | """ 13 | PDF-specific implementation of the RetrievalChain. 14 | 15 | This class specializes in loading, splitting, and indexing PDF documents 16 | for retrieval. 17 | """ 18 | 19 | def __init__(self, 20 | source_uri: List[str], 21 | persist_directory: Optional[str] = None, 22 | **kwargs) -> None: 23 | """ 24 | Initialize a PDF retrieval chain. 25 | 26 | Args: 27 | source_uri: List of PDF file paths 28 | persist_directory: Directory to persist vector store 29 | **kwargs: Additional keyword arguments for the base RetrievalChain 30 | """ 31 | 32 | super().__init__(source_uri=source_uri, persist_directory=persist_directory, **kwargs) 33 | 34 | def load_documents(self, source_uris: List[str]) -> List[Document]: 35 | """ 36 | Load PDF documents from file paths. 37 | 38 | Args: 39 | source_uris: List of PDF file paths 40 | 41 | Returns: 42 | List of loaded documents 43 | """ 44 | 45 | docs = [] 46 | for source_uri in source_uris: 47 | if not os.path.exists(source_uri): 48 | print(f"File not found: {source_uri}") 49 | continue 50 | 51 | print(f"Loading PDF: {source_uri}") 52 | loader = PDFPlumberLoader(source_uri) 53 | docs.extend(loader.load()) 54 | 55 | return docs 56 | 57 | def create_text_splitter(self) -> RecursiveCharacterTextSplitter: 58 | """ 59 | Create a text splitter optimized for PDF documents. 60 | 61 | Returns: 62 | A text splitter instance suitable for PDFs 63 | """ 64 | 65 | return RecursiveCharacterTextSplitter( 66 | chunk_size=600, 67 | chunk_overlap=50 68 | ) 69 | 70 | def create_vectorstore(self, split_docs: List[Document]) -> Any: 71 | """ 72 | Create a vector store from split PDF documents. 73 | 74 | Args: 75 | split_docs: Split document chunks 76 | 77 | Returns: 78 | A vector store instance 79 | 80 | Raises: 81 | ValueError: If there are no split documents 82 | """ 83 | 84 | if not split_docs: 85 | raise ValueError("No split documents available.") 86 | 87 | if self.persist_directory: 88 | os.makedirs(self.persist_directory, exist_ok=True) 89 | 90 | if os.path.exists(self.persist_directory) and any(os.listdir(self.persist_directory)): 91 | print(f"Loading existing vector store: {self.persist_directory}") 92 | 93 | return Chroma( 94 | persist_directory=self.persist_directory, 95 | embedding_function=self.create_embedding() 96 | ) 97 | 98 | print("Creating new vector store...") 99 | 100 | vectorstore = Chroma.from_documents( 101 | documents=split_docs, 102 | embedding=self.create_embedding(), 103 | persist_directory=self.persist_directory 104 | ) 105 | 106 | return vectorstore -------------------------------------------------------------------------------- /case2/auto_mcp_json.py: -------------------------------------------------------------------------------- 1 | import os 2 | import json 3 | import sys 4 | from pathlib import Path 5 | 6 | from dotenv import load_dotenv 7 | 8 | 9 | def get_env_variables(): 10 | """Loads environment variables and returns them as a dictionary.""" 11 | 12 | load_dotenv() 13 | 14 | required_vars = ["DIFY_API_ENDPOINT", "DIFY_API_KEY", "DIFY_KNOWLEDGE_ID"] 15 | 16 | env_dict = {} 17 | 18 | for var in required_vars: 19 | value = os.getenv(var) 20 | if value: 21 | env_dict[var] = value 22 | 23 | return env_dict 24 | 25 | 26 | def create_mcp_json(): 27 | """Creates MCP server configuration JSON file.""" 28 | 29 | project_root = Path(__file__).parent.absolute() 30 | 31 | # .venv python executable path 32 | if os.name == 'nt': # Windows 33 | python_path = str(project_root.parent / ".venv" / "Scripts" / "python.exe") 34 | else: # Mac, Ubuntu etc 35 | python_path = str(project_root.parent / ".venv" / "bin" / "python") 36 | 37 | server_script = project_root / "mcp_server.py" 38 | 39 | env_vars = get_env_variables() 40 | 41 | config = { 42 | "mcpServers": { 43 | "dify-ek-api": { 44 | "command": python_path, 45 | "args": [str(server_script)], 46 | "env": env_vars, 47 | } 48 | } 49 | } 50 | 51 | json_path = project_root / "mcp_config.json" 52 | 53 | with open(json_path, "w", encoding="utf-8") as f: 54 | json.dump(config, f, indent=2) 55 | 56 | print(f"MCP configuration file created: {json_path}") 57 | print(f"Generated environment variables: {', '.join(env_vars.keys())}") 58 | 59 | return str(json_path) 60 | 61 | 62 | if __name__ == "__main__": 63 | create_mcp_json() 64 | -------------------------------------------------------------------------------- /case2/dify_ek_server.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import os 3 | import time 4 | from typing import Annotated, Any, Dict, List, Optional, TypedDict 5 | from pathlib import Path 6 | 7 | import uvicorn 8 | from dotenv import load_dotenv 9 | from fastapi import Depends, FastAPI, HTTPException, Header 10 | from fastapi.security import APIKeyHeader 11 | from pydantic import BaseModel 12 | 13 | from langchain.retrievers.ensemble import EnsembleRetriever 14 | from langchain_chroma import Chroma 15 | from langchain_community.retrievers import BM25Retriever 16 | from langchain_community.document_loaders import PDFPlumberLoader 17 | from langchain_core.documents import Document 18 | from langchain_openai import OpenAIEmbeddings 19 | from langchain_text_splitters import RecursiveCharacterTextSplitter 20 | from langgraph.graph import END, START, StateGraph 21 | 22 | load_dotenv() 23 | 24 | logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s') 25 | logger = logging.getLogger(__name__) 26 | 27 | # API key setup 28 | API_KEY = "dify-external-knowledge-api-key" 29 | api_key_header = APIKeyHeader(name="Authorization") 30 | 31 | # Directory setup 32 | BASE_DIR = Path(__file__).parent 33 | PROJECT_ROOT = BASE_DIR.parent 34 | DATA_DIR = PROJECT_ROOT / "data" 35 | CHROMA_DB_DIR = BASE_DIR / "chroma_db" 36 | 37 | # PDF file path (using project shared data folder) 38 | PDF_FILES = list(DATA_DIR.glob("*.pdf")) 39 | PDF_PATH = PDF_FILES[0] if PDF_FILES else DATA_DIR / "sample.pdf" 40 | 41 | app = FastAPI(title="Dify External Knowledge API - LangGraph Version") 42 | 43 | 44 | ###### STEP 1. State and Preprocessing Function Definition ###### 45 | 46 | class KnowledgeState(TypedDict): 47 | """ 48 | State definition used in LangGraph graph. 49 | 50 | Each field represents data passed between graph nodes. 51 | 52 | """ 53 | 54 | query: Annotated[str, "User's search query"] 55 | 56 | search_method: Annotated[str, "Search method"] 57 | 58 | top_k: Annotated[int, "Maximum number of results to return"] 59 | 60 | score_threshold: Annotated[float, "Minimum relevance score for inclusion (0.0-1.0)"] 61 | 62 | results: Annotated[List[Dict[str, Any]], "List of search results"] 63 | 64 | vector_db: Annotated[Optional[Any], "Chroma vector DB instance"] 65 | 66 | semantic_retriever: Annotated[Optional[Any], "Semantic search retriever"] 67 | keyword_retriever: Annotated[Optional[Any], "Keyword-based search retriever"] 68 | hybrid_retriever: Annotated[Optional[Any], "Hybrid search retriever"] 69 | 70 | 71 | ###### STEP 2. Node Definition ###### 72 | 73 | class DocumentProcessor: 74 | """ 75 | Loads PDF files, extracts text, splits into chunks, 76 | and stores in a vector database (ChromaDB). 77 | 78 | """ 79 | 80 | def __init__(self, knowledge_id="test-knowledge-base"): 81 | self.knowledge_id = knowledge_id 82 | 83 | def __call__(self, state: KnowledgeState) -> KnowledgeState: 84 | """ 85 | Process documents and set up vector storage. 86 | 87 | Args: 88 | state: Current graph state 89 | 90 | Returns: 91 | Updated graph state 92 | 93 | """ 94 | 95 | os.makedirs(DATA_DIR, exist_ok=True) 96 | os.makedirs(CHROMA_DB_DIR, exist_ok=True) 97 | 98 | try: 99 | embedding = OpenAIEmbeddings(model='text-embedding-3-small') 100 | chroma_exists = (CHROMA_DB_DIR / "chroma.sqlite3").exists() 101 | 102 | if chroma_exists: 103 | try: 104 | vector_db = Chroma( 105 | collection_name=self.knowledge_id, 106 | embedding_function=embedding, 107 | persist_directory=str(CHROMA_DB_DIR) 108 | ) 109 | 110 | collection_data = vector_db.get() 111 | 112 | if not collection_data.get("documents", []): 113 | logger.warning("Existing collection is empty. Creating a new one.") 114 | raise ValueError("Empty collection") 115 | 116 | except Exception as e: 117 | logger.warning(f"Failed to load existing vector store: {str(e)}. Creating a new one.") 118 | chroma_exists = False 119 | 120 | if CHROMA_DB_DIR.exists(): 121 | backup_dir = f"{CHROMA_DB_DIR}_backup_{int(time.time())}" 122 | os.rename(CHROMA_DB_DIR, backup_dir) 123 | os.makedirs(CHROMA_DB_DIR, exist_ok=True) 124 | 125 | if not chroma_exists: 126 | loader = PDFPlumberLoader(str(PDF_PATH)) 127 | docs = loader.load() 128 | text_splitter = RecursiveCharacterTextSplitter( 129 | chunk_size=600, 130 | chunk_overlap=50 131 | ) 132 | split_docs = text_splitter.split_documents(docs) 133 | 134 | if not split_docs: 135 | logger.warning("No text chunks available. Using temporary data.") 136 | split_docs = [ 137 | Document( 138 | page_content="This is a test document chunk 1 for Dify external knowledge API.", 139 | metadata={ 140 | "path": str(PDF_PATH), 141 | "description": "Test PDF document", 142 | "title": PDF_PATH.name 143 | } 144 | ), 145 | Document( 146 | page_content="This is a test document chunk 2 about PDF processing and retrieval.", 147 | metadata={ 148 | "path": str(PDF_PATH), 149 | "description": "Test PDF document", 150 | "title": PDF_PATH.name 151 | } 152 | ), 153 | Document( 154 | page_content="This is a test document chunk 3 explaining external knowledge API implementation.", 155 | metadata={ 156 | "path": str(PDF_PATH), 157 | "description": "Test PDF document", 158 | "title": PDF_PATH.name 159 | } 160 | ) 161 | ] 162 | 163 | vector_db = Chroma.from_documents( 164 | documents=split_docs, 165 | embedding=embedding, 166 | persist_directory=str(CHROMA_DB_DIR), 167 | collection_name=self.knowledge_id 168 | ) 169 | 170 | state["vector_db"] = vector_db 171 | 172 | except Exception as e: 173 | logger.error(f"Error during vector store initialization: {str(e)}") 174 | raise 175 | 176 | return state 177 | 178 | class RetrieverSetup: 179 | """ 180 | Sets up semantic, keyword, and hybrid retrievers 181 | from the vector database. 182 | 183 | """ 184 | 185 | def __call__(self, state: KnowledgeState) -> KnowledgeState: 186 | """ 187 | Configure retrievers. 188 | 189 | Args: 190 | state: Current graph state 191 | 192 | Returns: 193 | Updated graph state with configured retrievers 194 | 195 | """ 196 | 197 | vector_db = state.get("vector_db") 198 | 199 | if vector_db is None: 200 | logger.error("Vector store not found in state.") 201 | raise ValueError("Vector store not found in state") 202 | 203 | top_k = state.get("top_k", 5) 204 | 205 | try: 206 | semantic_retriever = vector_db.as_retriever( 207 | search_kwargs={"k": top_k} 208 | ) 209 | state["semantic_retriever"] = semantic_retriever 210 | logger.info("Semantic retriever setup complete") 211 | 212 | try: 213 | result = vector_db.get() 214 | 215 | if "documents" in result and result["documents"]: 216 | docs = result["documents"] 217 | metadatas = result.get("metadatas", [None] * len(docs)) 218 | logger.info(f"Retrieved {len(docs)} documents from ChromaDB.") 219 | else: 220 | logger.warning("Could not retrieve documents from ChromaDB. Creating temporary documents.") 221 | docs = ["This is a temporary document for testing purposes."] 222 | metadatas = [None] 223 | 224 | doc_objects = [ 225 | Document( 226 | page_content=text, 227 | metadata=meta if meta else {} 228 | ) 229 | for text, meta in zip(docs, metadatas) 230 | ] 231 | 232 | keyword_retriever = BM25Retriever.from_documents(doc_objects) 233 | keyword_retriever.k = top_k 234 | state["keyword_retriever"] = keyword_retriever 235 | 236 | hybrid_retriever = EnsembleRetriever( 237 | retrievers=[keyword_retriever, semantic_retriever], 238 | weights=[0.5, 0.5] 239 | ) 240 | state["hybrid_retriever"] = hybrid_retriever 241 | 242 | except Exception as inner_e: 243 | logger.error(f"Error during BM25 retriever setup: {str(inner_e)}") 244 | logger.info("Using semantic retriever only.") 245 | state["keyword_retriever"] = semantic_retriever # Fallback 246 | state["hybrid_retriever"] = semantic_retriever # Fallback 247 | 248 | except Exception as e: 249 | logger.error(f"Error during retriever setup: {str(e)}") 250 | raise 251 | 252 | return state 253 | 254 | class PerformRetrieval: 255 | """ 256 | Performs search using the appropriate retriever based on user query. 257 | 258 | """ 259 | 260 | def __call__(self, state: KnowledgeState) -> KnowledgeState: 261 | """ 262 | Execute retrieval process. 263 | 264 | Args: 265 | state: Current graph state 266 | 267 | Returns: 268 | Updated graph state with search results 269 | 270 | """ 271 | 272 | query = state.get("query", "") 273 | search_method = state.get("search_method", "hybrid_search") 274 | top_k = state.get("top_k", 5) 275 | score_threshold = state.get("score_threshold", 0.5) 276 | logger.info(f"Performing search: query='{query}', method={search_method}, top_k={top_k}") 277 | 278 | retriever = None 279 | 280 | if search_method == "keyword_search": 281 | retriever = state.get("keyword_retriever") 282 | elif search_method == "semantic_search": 283 | retriever = state.get("semantic_retriever") 284 | elif search_method == "hybrid_search": 285 | retriever = state.get("hybrid_retriever") 286 | elif search_method == "full_text_search": 287 | retriever = state.get("keyword_retriever") 288 | else: 289 | retriever = state.get("hybrid_retriever") 290 | 291 | if not retriever: 292 | logger.error(f"Retriever not found: {search_method}") 293 | retriever = state.get("hybrid_retriever") 294 | if not retriever: 295 | raise ValueError(f"No retriever available in state") 296 | 297 | logger.warning(f"Could not find {search_method} retriever, using hybrid retriever instead.") 298 | 299 | try: 300 | docs = retriever.get_relevant_documents(query) 301 | docs = docs[:top_k] 302 | 303 | results = [] 304 | for i, doc in enumerate(docs): 305 | metadata = doc.metadata.copy() if hasattr(doc, 'metadata') and doc.metadata else {} 306 | score = max(0.95 - (i * 0.1), score_threshold) 307 | 308 | results.append({ 309 | "metadata": metadata, 310 | "score": score, 311 | "title": doc.metadata.get("Title", doc.metadata.get("title", "Document chunk")), 312 | "content": doc.page_content 313 | }) 314 | 315 | state["results"] = results 316 | 317 | if not results: 318 | logger.warning("No search results. Adding default response.") 319 | 320 | state["results"] = [{ 321 | "metadata": { 322 | "path": str(PDF_PATH), 323 | "description": "Default response" 324 | }, 325 | "score": 0.5, 326 | "title": "Default response", 327 | "content": f"No relevant documents found for query: '{query}'" 328 | }] 329 | 330 | except Exception as e: 331 | logger.error(f"Error during retrieval: {str(e)}") 332 | 333 | state["results"] = [{ 334 | "metadata": { 335 | "path": "error", 336 | "description": "Error occurred during retrieval" 337 | }, 338 | "score": 0.5, 339 | "title": "Error", 340 | "content": f"An error occurred during retrieval: {str(e)}" 341 | }] 342 | 343 | return state 344 | 345 | 346 | ###### STEP 3. Graph Creation and Compilation ###### 347 | 348 | def create_knowledge_graph(): 349 | """ 350 | Creates a LangGraph-based knowledge retrieval graph. 351 | 352 | Returns: 353 | Compiled graph instance 354 | 355 | """ 356 | 357 | graph_builder = StateGraph(KnowledgeState) 358 | 359 | graph_builder.add_node("document_processor", DocumentProcessor()) 360 | graph_builder.add_node("retriever_setup", RetrieverSetup()) 361 | graph_builder.add_node("perform_retrieval", PerformRetrieval()) 362 | 363 | graph_builder.add_edge(START, "document_processor") 364 | graph_builder.add_edge("document_processor", "retriever_setup") 365 | graph_builder.add_edge("retriever_setup", "perform_retrieval") 366 | graph_builder.add_edge("perform_retrieval", END) 367 | 368 | return graph_builder.compile() 369 | 370 | 371 | ###### STEP 4. Graph Instance Creation ###### 372 | 373 | try: 374 | knowledge_graph = create_knowledge_graph() 375 | logger.info("Knowledge graph instance creation complete") 376 | 377 | except Exception as e: 378 | logger.error(f"Error creating knowledge graph: {str(e)}") 379 | knowledge_graph = None 380 | 381 | 382 | ###### STEP 5. API Request and Response Class Definition ###### 383 | 384 | class RetrievalSetting(BaseModel): 385 | """Retrieval settings model""" 386 | 387 | top_k: Annotated[int, "Maximum number of results to return"] 388 | score_threshold: Annotated[float, "Minimum relevance score for inclusion (0.0-1.0)"] 389 | 390 | 391 | class ExternalKnowledgeRequest(BaseModel): 392 | """External knowledge API request model""" 393 | 394 | knowledge_id: Annotated[str, "ID of the knowledge base to search"] 395 | query: Annotated[str, "User search query"] 396 | search_method: Annotated[str, "Search method (semantic_search, keyword_search, hybrid_search)"] = "hybrid_search" 397 | retrieval_setting: Annotated[RetrievalSetting, "Retrieval settings"] 398 | 399 | 400 | ###### STEP 6. API Key Validation Function ###### 401 | 402 | async def verify_api_key(authorization: str = Header(...)): 403 | """API key validation function""" 404 | 405 | if not authorization.startswith("Bearer "): 406 | logger.warning("Invalid Authorization header format") 407 | 408 | raise HTTPException( 409 | status_code=403, 410 | detail={ 411 | "error_code": 1001, 412 | "error_msg": "Invalid Authorization header format. Expected 'Bearer ' format." 413 | } 414 | ) 415 | 416 | token = authorization.replace("Bearer ", "") 417 | 418 | if token != API_KEY: 419 | logger.warning("Authentication failed: Invalid API key") 420 | 421 | raise HTTPException( 422 | status_code=403, 423 | detail={ 424 | "error_code": 1002, 425 | "error_msg": "Authorization failed" 426 | } 427 | ) 428 | 429 | return token 430 | 431 | 432 | ###### STEP 7. API Endpoint Definition ###### 433 | 434 | @app.post("/retrieval") 435 | async def retrieve_knowledge( 436 | request: ExternalKnowledgeRequest, 437 | token: str = Depends(verify_api_key)): 438 | """Document retrieval API endpoint""" 439 | 440 | logger.info(f"API request received: query='{request.query}'") 441 | 442 | if knowledge_graph is None: 443 | logger.error("Knowledge graph is not initialized.") 444 | 445 | raise HTTPException(status_code=500, detail="Knowledge graph is not initialized") 446 | 447 | initial_state = KnowledgeState( 448 | query=request.query, 449 | search_method=request.search_method, 450 | top_k=request.retrieval_setting.top_k, 451 | score_threshold=request.retrieval_setting.score_threshold, 452 | results=[], 453 | vector_db=None, 454 | semantic_retriever=None, 455 | keyword_retriever=None, 456 | hybrid_retriever=None 457 | ) 458 | 459 | try: 460 | final_state = knowledge_graph.invoke(initial_state) 461 | results = final_state.get("results", []) 462 | 463 | response_records = [] 464 | 465 | for r in results: 466 | metadata = r.get("metadata", {}) 467 | if not metadata: 468 | metadata = {"path": "unknown", "description": ""} 469 | 470 | response_records.append({ 471 | "metadata": metadata, 472 | "score": r.get("score", 0.5), 473 | "title": r.get("title", "Document"), 474 | "content": r.get("content", "No content") 475 | }) 476 | 477 | return {"records": response_records} 478 | 479 | except Exception as e: 480 | logger.error(f"Error during knowledge graph execution: {str(e)}") 481 | 482 | return {"records": [{ 483 | "metadata": { 484 | "path": "error", 485 | "description": "Error response" 486 | }, 487 | "score": 0.5, 488 | "title": "Error", 489 | "content": f"An error occurred: {str(e)}" 490 | }]} 491 | 492 | @app.get("/health") 493 | async def health_check(): 494 | """Server health check endpoint""" 495 | 496 | health_status = { 497 | "status": "healthy" if knowledge_graph is not None else "unhealthy", 498 | "knowledge_graph_initialized": knowledge_graph is not None, 499 | "openai_api_key_set": os.getenv("OPENAI_API_KEY") is not None, 500 | "data_directory_exists": DATA_DIR.exists(), 501 | "chroma_db_directory_exists": CHROMA_DB_DIR.exists(), 502 | "pdf_exists": PDF_PATH.exists() 503 | } 504 | 505 | return health_status 506 | 507 | if __name__ == "__main__": 508 | logger.info("Starting server...") 509 | uvicorn.run(app, host="0.0.0.0", port=8000) -------------------------------------------------------------------------------- /case2/mcp_server.py: -------------------------------------------------------------------------------- 1 | import httpx 2 | import os 3 | import json 4 | from typing import Dict 5 | 6 | from mcp.server.fastmcp import FastMCP 7 | from dotenv import load_dotenv 8 | 9 | load_dotenv() 10 | 11 | API_ENDPOINT = os.getenv("DIFY_API_ENDPOINT", "http://localhost:8000/retrieval") 12 | API_KEY = os.getenv("DIFY_API_KEY", "dify-external-knowledge-api-key") 13 | KNOWLEDGE_ID = os.getenv("DIFY_KNOWLEDGE_ID", "test-knowledge-base") 14 | 15 | mcp = FastMCP( 16 | name="Dify External Knowledge API", 17 | version="0.0.1", 18 | description="Three search methods(semantic_search, keyword_search, hybrid_search) using the Dify External Knowledge API specification" 19 | ) 20 | 21 | def format_search_results(data: Dict) -> str: 22 | """Formats search results in a readable form.""" 23 | 24 | records = data.get("records", []) 25 | 26 | if not records: 27 | return "No search results found." 28 | 29 | formatted_results = "# Search Results\n\n" 30 | 31 | for i, record in enumerate(records): 32 | content = record.get("content", "") 33 | score = record.get("score", 0) 34 | title = record.get("title", f"Result {i+1}") 35 | metadata = record.get("metadata", {}) 36 | 37 | # Extract metadata if available 38 | source_info = [] 39 | if "title" in metadata: 40 | source_info.append(f"File: {os.path.basename(metadata['title'])}") 41 | elif "path" in metadata: 42 | source_info.append(f"File: {os.path.basename(metadata['path'])}") 43 | if "page" in metadata: 44 | source_info.append(f"Page: {metadata['page']}") 45 | 46 | source_text = " | ".join(source_info) if source_info else "No source information" 47 | 48 | formatted_results += f"## {title} (Relevance: {score:.2f})\n" 49 | formatted_results += f"{source_text}\n\n" 50 | formatted_results += f"{content}\n\n" 51 | formatted_results += "---\n\n" 52 | 53 | formatted_results += "This information was retrieved via the Dify External Knowledge API." 54 | return formatted_results 55 | 56 | @mcp.tool() 57 | async def dify_ek_search( 58 | query: str, 59 | top_k: int = 5, 60 | score_threshold: float = 0.5, 61 | search_method: str = "hybrid_search", 62 | ctx = None 63 | ) -> str: 64 | """ 65 | Searches for information in Dify External knowledge base. 66 | Returns search results with document content, relevance scores, and source information. 67 | Use when you need to find specific information in enterprise documents, knowledge bases, or specialized content. 68 | 69 | Parameters: 70 | query: Search question or keywords 71 | top_k: Maximum number of results to return 72 | score_threshold: Minimum relevance score for inclusion (0.0-1.0) 73 | search_method: Search method (semantic_search, keyword_search, hybrid_search) 74 | 75 | """ 76 | 77 | if ctx: 78 | ctx.info(f"Search query: {query}") 79 | ctx.info(f"Maximum results: {top_k}") 80 | ctx.info(f"Minimum score: {score_threshold}") 81 | 82 | # Input validation 83 | if not query or not query.strip(): 84 | return "Error: Search query is empty." 85 | 86 | if top_k < 1: 87 | top_k = 1 88 | elif top_k > 20: 89 | top_k = 20 90 | 91 | if score_threshold < 0: 92 | score_threshold = 0 93 | elif score_threshold > 1: 94 | score_threshold = 1 95 | 96 | try: 97 | if ctx: 98 | ctx.info(f"Calling Dify API: {API_ENDPOINT}") 99 | 100 | request_data = { 101 | "knowledge_id": KNOWLEDGE_ID, 102 | "query": query, 103 | "search_method": search_method, 104 | "retrieval_setting": { 105 | "top_k": top_k, 106 | "score_threshold": score_threshold 107 | } 108 | } 109 | 110 | async with httpx.AsyncClient(timeout=30.0) as client: 111 | response = await client.post( 112 | API_ENDPOINT, 113 | headers={ 114 | "Content-Type": "application/json", 115 | "Authorization": f"Bearer {API_KEY}" 116 | }, 117 | json=request_data 118 | ) 119 | 120 | if response.status_code != 200: 121 | error_message = f"Dify API error: HTTP {response.status_code}" 122 | try: 123 | error_detail = response.json() 124 | if isinstance(error_detail, dict) and "error_msg" in error_detail: 125 | error_message += f" - {error_detail['error_msg']}" 126 | except: 127 | error_message += f" - {response.text[:100]}" 128 | 129 | if ctx: 130 | ctx.error(error_message) 131 | return f"Search failed\n\n{error_message}" 132 | 133 | try: 134 | data = response.json() 135 | return format_search_results(data) 136 | 137 | except json.JSONDecodeError: 138 | if ctx: 139 | ctx.error("JSON parsing error") 140 | return "Search failed\n\nCould not parse API response." 141 | 142 | except httpx.RequestError as e: 143 | error_message = f"API request error: {str(e)}" 144 | if ctx: 145 | ctx.error(error_message) 146 | return f"Search failed\n\n{error_message}" 147 | 148 | except Exception as e: 149 | error_message = f"Unexpected error: {str(e)}" 150 | if ctx: 151 | ctx.error(error_message) 152 | return f"Search failed\n\n{error_message}" 153 | 154 | @mcp.prompt() 155 | def ai_trend_learning_guide( 156 | topic: str = "", 157 | learning_level: str = "beginner", 158 | time_horizon: str = "short-term" 159 | ) -> str: 160 | """ 161 | Creates customized AI learning guides based on SPRI monthly AI reports. 162 | Provides latest AI trends and learning roadmaps tailored to learner level and desired timeframe. 163 | Perfect for educational planning, career development in AI, or understanding current AI landscape. 164 | 165 | Parameters: 166 | topic: AI topic of interest (optional - e.g., "generative AI", "computer vision", "NLP") 167 | learning_level: Learner level ("beginner", "intermediate", "advanced") 168 | time_horizon: Learning plan duration ("short-term", "medium-term", "long-term") 169 | 170 | """ 171 | 172 | level_approaches = { 173 | "beginner": "Focuses on understanding basic concepts and principles, with practical learning paths.", 174 | "intermediate": "Focuses on advanced concepts and practical project implementation, with paths to improve application skills.", 175 | "advanced": "Focuses on latest research trends and advanced technology implementation, with innovative approaches and expertise enhancement." 176 | } 177 | 178 | time_plans = { 179 | "short-term": "Proposes intensive learning plans centered on core skills and knowledge that can be acquired within 1-3 months.", 180 | "medium-term": "Proposes step-by-step learning plans to systematically build capabilities over 3-6 months.", 181 | "long-term": "Proposes comprehensive learning plans to develop expertise from a long-term perspective of 6 months to 1 year." 182 | } 183 | 184 | level_approach = level_approaches.get(learning_level, level_approaches["beginner"]) 185 | time_plan = time_plans.get(time_horizon, time_plans["short-term"]) 186 | 187 | output_template = f""" 188 | # {topic if topic else 'AI Trends'} Learning Guide 189 | 190 | ## 1. Trend Analysis 191 | - Key trends 192 | - Technological changes 193 | - Industry impact 194 | 195 | ## 2. Core Knowledge Areas 196 | - Basic concepts 197 | - Core technologies 198 | - Key algorithms/methodologies 199 | 200 | ## 3. Learning Roadmap 201 | - Step-by-step learning plan 202 | - Recommended resources 203 | - Practical projects 204 | 205 | ## 4. Career and Application Opportunities 206 | - Related roles/positions 207 | - Industry use cases 208 | - Future outlook 209 | """ 210 | 211 | # Generate final prompt 212 | prompt = ( 213 | f"You are an AI learning guide expert who analyzes the latest AI trends based on SPRI monthly AI reports " 214 | f"and provides customized learning directions.\n\n" 215 | 216 | f"## Learner Profile\n" 217 | f"- Level: {learning_level} ({level_approach})\n" 218 | f"- Learning Plan: {time_horizon} ({time_plan})\n\n" 219 | 220 | f"## Analysis Target\n" 221 | f"Please analyze based on the March issue of the SPRI monthly AI report. " 222 | f"{'Please focus on ' + topic + '-related content in your analysis.' if topic else 'Please analyze overall AI trends.'}\n\n" 223 | 224 | f"## Information to Provide\n" 225 | f"1. Summary of latest AI trends and their importance\n" 226 | f"2. Core knowledge and technical elements in the field\n" 227 | f"3. Step-by-step learning plan and recommended resources\n" 228 | f"4. Practical application suggestions and career recommendations\n\n" 229 | 230 | f"Please structure your analysis results as follows:\n\n{output_template}\n\n" 231 | 232 | f"Search the report to provide practical and specific information. Suggest learning directions " 233 | f"that align with the latest trends, and create a practical guide that learners can easily follow." 234 | ) 235 | 236 | return prompt 237 | 238 | @mcp.resource("help: hantaek@brain-crew.com") 239 | def get_help() -> str: 240 | """Provides help for Dify knowledge search in Claude Desktop.""" 241 | 242 | return """ 243 | # Dify External Knowledge Search MCP Tool Usage Guide 244 | 245 | Enables Claude to search for information in documents using the Dify External Knowledge API. 246 | 247 | ## Available Tools 248 | 249 | 1. search_knowledge - Search for information in the knowledge base 250 | - `query`: Search query 251 | - `top_k`: Maximum number of results to return (default: 5) 252 | - `score_threshold`: Minimum relevance score (default: 0.5) 253 | - `search_method`: Search method (semantic, keyword, hybrid) (default: hybrid) 254 | 255 | 2. prompt(ai_trend_learning_guide) 256 | - `topic`: AI topic of interest (optional - e.g., "generative AI", "computer vision", "NLP") 257 | - `learning_level`: Learner level ("beginner", "intermediate", "advanced") 258 | - `time_horizon`: Learning plan duration ("short-term", "medium-term", "long-term") 259 | 260 | """ 261 | 262 | if __name__ == "__main__": 263 | mcp.run() -------------------------------------------------------------------------------- /case3/auto_mcp_json.py: -------------------------------------------------------------------------------- 1 | import os 2 | import json 3 | import sys 4 | from pathlib import Path 5 | 6 | from dotenv import load_dotenv 7 | 8 | 9 | def get_env_variables(): 10 | """Loads environment variables and returns them as a dictionary.""" 11 | 12 | load_dotenv() 13 | 14 | required_vars = ["DIFY_BASE_URL", "DIFY_APP_SK"] 15 | 16 | env_dict = {} 17 | 18 | for var in required_vars: 19 | value = os.getenv(var) 20 | if value: 21 | env_dict[var] = value 22 | 23 | return env_dict 24 | 25 | 26 | def create_mcp_json(): 27 | """Creates MCP server configuration JSON file.""" 28 | 29 | project_root = Path(__file__).parent.absolute() 30 | 31 | # .venv python executable path 32 | if os.name == 'nt': # Windows 33 | python_path = str(project_root.parent / ".venv" / "Scripts" / "python.exe") 34 | else: # Mac, Ubuntu etc 35 | python_path = str(project_root.parent / ".venv" / "bin" / "python") 36 | 37 | server_script = project_root / "mcp_server.py" 38 | 39 | env_vars = get_env_variables() 40 | 41 | config = { 42 | "mcpServers": { 43 | "dify-workflow": { 44 | "command": python_path, 45 | "args": [str(server_script)], 46 | "env": env_vars, 47 | } 48 | } 49 | } 50 | 51 | json_path = project_root / "mcp_config.json" 52 | 53 | with open(json_path, "w", encoding="utf-8") as f: 54 | json.dump(config, f, indent=2) 55 | 56 | print(f"MCP configuration file created: {json_path}") 57 | print(f"Generated environment variables: {', '.join(env_vars.keys())}") 58 | 59 | return str(json_path) 60 | 61 | 62 | if __name__ == "__main__": 63 | create_mcp_json() 64 | -------------------------------------------------------------------------------- /case3/mcp_server.py: -------------------------------------------------------------------------------- 1 | import os 2 | import requests 3 | 4 | from dotenv import load_dotenv 5 | from mcp.server.fastmcp import FastMCP 6 | 7 | load_dotenv() 8 | 9 | mcp = FastMCP( 10 | name="Dify Workflow", 11 | version="0.0.1", 12 | description="Retrieve Dify Workflow execution results" 13 | ) 14 | 15 | @mcp.tool() 16 | async def dify_workflow(input: str) -> str: 17 | """ 18 | Executes a Dify workflow and returns the results. 19 | Automates complex AI tasks and provides immediate results. 20 | Useful for text analysis, content generation, or processing user inputs through Dify workflows. 21 | 22 | Parameters: 23 | input: Input text to process 24 | 25 | """ 26 | 27 | dify_base_url = os.getenv("DIFY_BASE_URL") 28 | dify_app_sk = os.getenv("DIFY_APP_SK") 29 | 30 | url = f"{dify_base_url}/workflows/run" 31 | headers = { 32 | "Authorization": f"Bearer {dify_app_sk}", 33 | "Content-Type": "application/json" 34 | } 35 | data = { 36 | "inputs": {"input": input}, 37 | "response_mode": "blocking", 38 | "user": "default_user", 39 | } 40 | 41 | response = requests.post(url, headers=headers, json=data) 42 | response.raise_for_status() 43 | result = response.json() 44 | 45 | outputs = {} 46 | 47 | if "outputs" in result.get("data", {}): 48 | outputs = result["data"]["outputs"] 49 | 50 | return next(iter(outputs.values()), "No output received from Dify workflow.") 51 | 52 | if __name__ == "__main__": 53 | mcp.run() -------------------------------------------------------------------------------- /case4/auto_mcp_json.py: -------------------------------------------------------------------------------- 1 | import os 2 | import json 3 | import sys 4 | from pathlib import Path 5 | 6 | from dotenv import load_dotenv 7 | 8 | 9 | def get_env_variables(): 10 | """Loads environment variables and returns them as a dictionary.""" 11 | 12 | load_dotenv() 13 | 14 | required_vars = [ 15 | "TAVILY_API_KEY", 16 | ] 17 | 18 | env_dict = {} 19 | 20 | for var in required_vars: 21 | value = os.getenv(var) 22 | if value: 23 | env_dict[var] = value 24 | 25 | return env_dict 26 | 27 | 28 | def create_mcp_json(): 29 | """Creates MCP server configuration JSON file.""" 30 | 31 | project_root = Path(__file__).parent.absolute() 32 | 33 | # .venv python executable path 34 | if os.name == 'nt': # Windows 35 | python_path = str(project_root.parent / ".venv" / "Scripts" / "python.exe") 36 | else: # Mac, Ubuntu etc 37 | python_path = str(project_root.parent / ".venv" / "bin" / "python") 38 | 39 | server_script = project_root / "mcp_server.py" 40 | 41 | env_vars = get_env_variables() 42 | 43 | config = { 44 | "mcpServers": { 45 | "tavily-web-search": { 46 | "command": python_path, 47 | "args": [str(server_script)], 48 | "env": env_vars, 49 | } 50 | } 51 | } 52 | 53 | json_path = project_root / "mcp_config.json" 54 | 55 | with open(json_path, "w", encoding="utf-8") as f: 56 | json.dump(config, f, indent=2) 57 | 58 | print(f"MCP configuration file created: {json_path}") 59 | print(f"Generated environment variables: {', '.join(env_vars.keys())}") 60 | 61 | return str(json_path) 62 | 63 | 64 | if __name__ == "__main__": 65 | create_mcp_json() 66 | -------------------------------------------------------------------------------- /case4/mcp_server.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | from dotenv import load_dotenv 4 | from mcp.server.fastmcp import FastMCP 5 | from tavily import TavilyClient 6 | 7 | load_dotenv() 8 | 9 | tavily_api_key = os.getenv("TAVILY_API_KEY") 10 | tavily_client = TavilyClient(api_key = tavily_api_key) 11 | 12 | websearch_config = { 13 | "parameters": { 14 | "default_num_results": 5, 15 | "include_domains": [] 16 | } 17 | } 18 | 19 | mcp = FastMCP( 20 | name="web_search", 21 | version="1.0.0", 22 | description="Web search capability using Tavily API that provides real-time internet search results. Supports both basic and advanced search with filtering options including domain restrictions, text inclusion requirements, and date filtering. Returns formatted results with titles, URLs, publication dates, and content summaries." 23 | ) 24 | 25 | def format_search_results(response): 26 | """ 27 | Converts search results to markdown format. 28 | 29 | Args: 30 | search_results: Tavily search results 31 | 32 | Returns: 33 | String in markdown format 34 | 35 | """ 36 | 37 | if not response.get("results"): 38 | return "No results found." 39 | 40 | markdown_results = "### Search Results:\n\n" 41 | 42 | for idx, result in enumerate(response.get("results", []), 1): 43 | title = result.get("title", "No title") 44 | url = result.get("url", "") 45 | published_date = result.get("published_date", "") 46 | content = result.get("content", "") 47 | score = result.get("score", 0) 48 | 49 | date_info = f" (Published: {published_date})" if published_date else "" 50 | 51 | markdown_results += f"**{idx}.** [{title}]({url}){date_info}\n" 52 | markdown_results += f"**Relevance Score:** {score:.2f}\n" 53 | 54 | if content: 55 | markdown_results += f"> **Content:** {content}\n\n" 56 | else: 57 | markdown_results += "\n" 58 | 59 | if response.get("answer"): 60 | markdown_results += f"\n### Answer:\n{response.get('answer')}\n\n" 61 | 62 | if response.get("response_time"): 63 | markdown_results += f"\n*Search completed in {response.get('response_time'):.2f} seconds*" 64 | 65 | return markdown_results 66 | 67 | @mcp.tool() 68 | async def search_web(query: str, num_results: int = None) -> str: 69 | """ 70 | Performs real-time web search using the Tavily API. 71 | Returns latest search results in markdown format including titles, URLs, and content summaries. 72 | Use when you need current information, recent events, or data not available in your training. 73 | 74 | 75 | Parameters: 76 | query: Search query 77 | num_results: Number of results to return (default: 5) 78 | 79 | """ 80 | 81 | try: 82 | search_args = { 83 | "max_results": num_results or websearch_config["parameters"]["default_num_results"], 84 | "search_depth": "basic" 85 | } 86 | 87 | search_results = tavily_client.search( 88 | query=query, 89 | **search_args 90 | ) 91 | 92 | return format_search_results(search_results) 93 | except Exception as e: 94 | return f"Error occurred during Tavily search: {e}" 95 | 96 | @mcp.resource("help: dev@brain-crew.com") 97 | def get_search_help() -> str: 98 | """Provides help for web search tools.""" 99 | 100 | return """ 101 | # Web Search Tool Usage Guide 102 | 103 | Provides Claude with real-time web search capability through the Tavily API. 104 | 105 | ## Web Search 106 | The `search_web` tool performs simple web searches. 107 | - Parameters: 108 | - query: Search query 109 | - num_results: Number of results to return (optional, default: 5) 110 | 111 | ## Examples 112 | - Web search: "I'm curious about the latest AI development trends" 113 | 114 | """ 115 | 116 | if __name__ == "__main__": 117 | mcp.run() -------------------------------------------------------------------------------- /data/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/teddynote-lab/mcp-usecase/89e147c81802ba5e8c4511aa4d111681161410eb/data/.gitkeep -------------------------------------------------------------------------------- /docs/case1.md: -------------------------------------------------------------------------------- 1 | # RAG(Retrieval Augmented Generation) 예제 2 | 3 | [English](#rag-retrieval-augmented-generation-example) | [한국어](#rag-retrieval-augmented-generation-예제-1) 4 | 5 | ## RAG (Retrieval Augmented Generation) 예제 6 | 7 | 이 예제에서는 PDF 문서를 대상으로 키워드 검색, 시맨틱 검색, 하이브리드 검색 기능을 구현한 MCP 서버를 제공합니다. 8 | 9 | ### 기능 10 | 11 | - **키워드 검색**: 문서 내에서 특정 키워드와 정확히 일치하는 내용을 검색합니다. 12 | - **시맨틱 검색**: 임베딩 모델을 사용하여 의미적으로 유사한 내용을 검색합니다. 13 | - **하이브리드 검색**: 키워드 검색과 시맨틱 검색을 결합하여 보다 정확한 결과를 제공합니다. 14 | 15 | ### 설정 16 | 17 | 다음 환경 변수를 루트 디렉토리의 `.env` 파일에 설정해야 합니다. 18 | 19 | ``` 20 | OPENAI_API_KEY = "sk-" 21 | ``` 22 | 23 | ### 사용 방법 24 | 25 | 1. 환경 설정 확인 26 | ```bash 27 | # case1 디렉토리로 이동 28 | cd case1 29 | 30 | # 필요한 환경 변수 설정 확인 31 | # .env 파일이 올바르게 구성되었는지 확인하세요 32 | ``` 33 | 34 | 2. JSON 파일 생성 35 | ```bash 36 | # 가상 환경 활성화 (아직 활성화하지 않은 경우) 37 | source ../.venv/bin/activate # macOS/Linux 38 | ..\.venv\Scripts\activate # Windows 39 | 40 | # JSON 파일 생성 41 | python auto_mcp_json.py 42 | ``` 43 | 44 | 3. Claude Desktop 또는 Cursor에 적용 45 | - 생성된 JSON 내용을 복사 46 | - Claude Desktop 또는 Cursor의 MCP 설정에 붙여넣기 47 | - 설정 저장 및 적용 48 | 49 | ### 사용 예시 50 | 51 | Claude Desktop 또는 Cursor에서 다음과 같이 사용할 수 있습니다. 52 | 53 | - **키워드 검색**: "키워드 검색으로 AI에 대한 정의를 찾아줄래?" 54 | - **시맨틱 검색**: "시멘틱 검색을 해서 인공지능의 최근 발전 동향을 알려줘." 55 | - **하이브리드 검색**: "하이브리드 검색을 해서 최근 LLM의 활용 사례를 알려줘." 56 | 57 | ### 구현 세부사항 58 | 59 | `case1/mcp_server.py` 파일은 다음과 같은 주요 구성 요소로 이루어져 있습니다. 60 | 61 | 1. PDF 파일 경로 설정 및 RAG 체인 초기화 62 | 2. 검색 결과 포맷팅 함수 63 | 3. 키워드, 시맨틱, 하이브리드 검색 도구 정의 64 | 65 | ### 🚨 도구 Docstring 최적화 66 | 67 | 이 예제에서 제공하는 도구들이 Claude와 같은 AI 에이전트에 의해 효과적으로 사용되기 위해서는 명확하고 맥락적인 docstring을 작성하는 것이 중요합니다. 68 | 69 | #### 왜 Docstring이 중요한가요? 70 | 71 | `@mcp.tool()`로 도구를 정의할 때, 제공하는 docstring은 Claude가 도구를 이해하고 사용하는 방식에 직접적인 영향을 미칩니다. Claude는 다음과 같은 목적으로 docstring을 읽습니다. 72 | 73 | 1. **도구의 목적 이해**: Claude는 docstring을 분석하여 도구가 무엇을 하는지 파악합니다. 74 | 2. **사용 시점 결정**: Claude는 해당 도구를 사용해야 할 상황을 판단합니다. 75 | 3. **매개변수 형식 파악**: Claude는 필수 및 선택적 매개변수를 학습합니다. 76 | 77 | 사용자가 명시적으로 도구 이름을 언급하지 않더라도, 잘 작성된 docstring을 통해 Claude가 상황에 맞게 적절한 도구를 선택할 수 있습니다. 이는 더 자연스러운 대화 흐름과 최상의 결과를 얻는 데 필수적입니다. 78 | 79 | #### 효과적인 Docstring 구조 80 | 81 | 최적의 결과를 위해 docstring을 다음과 같이 구성하세요: 82 | 83 | ```python 84 | @mcp.tool() 85 | async def your_tool_name(param1: str, param2: int = 5) -> str: 86 | """ 87 | 도구가 하는 일에 대한 짧은 설명 (1줄). 88 | 결과 또는 출력 형식에 대한 자세한 내용 (1줄). 89 | 이 도구를 사용해야 하는 상황에 대한 맥락적 힌트 (1줄). 90 | 91 | Parameters: 92 | param1: 첫 번째 매개변수 설명 93 | param2: 기본값이 있는 두 번째 매개변수 설명 94 | """ 95 | # 함수 구현... 96 | ``` 97 | 98 | 99 | 100 | 이러한 docstring을 통해 Claude는 다음과 같은 상황에서 지능적으로 도구를 선택할 수 있습니다. 101 | - 사용자가 "문서에서 X의 정의는 무엇인가요?"라고 물으면 **keyword_search** 선택 102 | - 사용자가 "문서에서 X 개념에 대해 설명해주세요"라고 물으면 **semantic_search** 선택 103 | - 사용자가 "문서에서 X에 대해 무엇이라고 하나요?"라고 물으면 **hybrid_search** 선택 104 | 105 | 이처럼 사용자가 명시적으로 도구 이름을 언급하지 않더라도, 맥락적 힌트를 통해 Claude가 올바른 도구를 선택할 수 있습니다. 106 | 107 | --- 108 | 109 | ## RAG (Retrieval Augmented Generation) Example 110 | 111 | This example provides an MCP server that implements keyword search, semantic search, and hybrid search functionality for PDF documents. 112 | 113 | ### Features 114 | 115 | - **Keyword Search**: Searches for content that exactly matches specific keywords in documents. 116 | - **Semantic Search**: Uses embedding models to search for semantically similar content. 117 | - **Hybrid Search**: Combines keyword and semantic search to provide more accurate results. 118 | 119 | ### Configuration 120 | 121 | Please ensure that the following environment variables are configured in the `.env` file at the root directory. 122 | 123 | ``` 124 | OPENAI_API_KEY = "sk-" 125 | ``` 126 | 127 | ### Usage Instructions 128 | 129 | 1. Check environment configuration 130 | ```bash 131 | # Navigate to case1 directory 132 | cd case1 133 | 134 | # Check the required environment variables 135 | # Make sure the .env file is properly configured 136 | ``` 137 | 138 | 2. Generate JSON file 139 | ```bash 140 | # Activate virtual environment (if not already activated) 141 | source ../.venv/bin/activate # macOS/Linux 142 | ..\.venv\Scripts\activate # Windows 143 | 144 | # Generate JSON file 145 | python auto_mcp_json.py 146 | ``` 147 | 148 | 3. Apply to Claude Desktop or Cursor 149 | - Copy the generated JSON content 150 | - Paste it into the MCP settings of Claude Desktop or Cursor 151 | - Save and apply settings 152 | 153 | ### Usage Examples 154 | 155 | You can use it in Claude Desktop or Cursor as follows: 156 | 157 | - **Keyword Search**: "Can you find the definition of AI using keyword search?" 158 | - **Semantic Search**: "Could you use semantic search to tell me about recent developments in AI?" 159 | - **Hybrid Search**: "Tell me about use cases of LLM using hybrid search." 160 | 161 | ### Implementation Details 162 | 163 | The `case1/mcp_server.py` file consists of the following main components: 164 | 165 | 1. PDF file path setup and RAG chain initialization 166 | 2. Search results formatting function 167 | 3. Definition of keyword, semantic, and hybrid search tools 168 | 169 | The vector database uses Chroma DB to efficiently store and search the contents of PDF documents. 170 | 171 | ### 🚨 Optimizing Tool Docstrings 172 | 173 | For the tools in this example to be effectively used by AI agents like Claude, it's important to write clear and contextual docstrings. 174 | 175 | #### Why Docstrings Matter 176 | 177 | When defining tools with `@mcp.tool()`, the docstring you provide directly influences how Claude understands and uses the tool. Claude reads these docstrings to: 178 | 179 | 1. **Understand the tool's purpose**: Claude analyzes the docstring to know what the tool does 180 | 2. **Decide when to use it**: Claude determines which situations call for this specific tool 181 | 3. **Know how to format parameters**: Claude learns the required and optional parameters 182 | 183 | Even when users don't explicitly mention the tool name, well-written docstrings allow Claude to select the appropriate tool based on the context. This is essential for a more natural conversation flow and optimal results. 184 | 185 | #### Effective Docstring Structure 186 | 187 | For optimal results, structure your docstrings like this: 188 | 189 | ```python 190 | @mcp.tool() 191 | async def your_tool_name(param1: str, param2: int = 5) -> str: 192 | """ 193 | Short description of what the tool does (1 line). 194 | More details about the results or output format (1 line). 195 | Contextual hints about when to use this tool (1 line). 196 | 197 | Parameters: 198 | param1: Description of first parameter 199 | param2: Description of second parameter with default value 200 | """ 201 | # Function implementation... 202 | ``` 203 | 204 | With these docstrings, Claude can intelligently choose: 205 | - **keyword_search** when a user asks "What is the definition of X in the document?" 206 | - **semantic_search** when a user asks "Tell me about the concept of X from the document" 207 | - **hybrid_search** when a user asks "What does the document say about X?" 208 | 209 | This way, even if users don't explicitly name the tool, Claude can select the right one through contextual hints. -------------------------------------------------------------------------------- /docs/case2.md: -------------------------------------------------------------------------------- 1 | # Dify External Knowledge API 예제 2 | 3 | [English](#dify-external-knowledge-api-example) | [한국어](#dify-external-knowledge-api-예제-1) 4 | 5 | ## Dify 외부지식 6 | 7 | 이 예제에서는 Dify 외부지식 API 형식과 동일한 문서 검색 도구를 통해 MCP 서버를 제공합니다. 또한 SPRI 월간 AI 보고서를 기반으로 맞춤형 학습 가이드를 생성하는 `프롬프트 템플릿`도 포함되어 있습니다. `Dify에 등록된 외부지식에 직접 요청을 하는 것이 아니기 때문에` case2를 시도하기 위해서는 반드시 로컬에서 `dify_ek_server.py`를 실행시켜주셔야 합니다. 8 | 9 | Dify에 외부지식을 등록하는 방법이 궁금하신 분들은 [이곳을 클릭해주세요.](https://ballistic-hedgehog-95e.notion.site/How-to-register-External-Knowledge-in-Dify-1bfbeae069358056a878c60c82b4ad0d?pvs=4) -> 아직 익숙하지 않으셔도 괜찮습니다. 10 | 11 | ### 기능 12 | 13 | - **다양한 검색 방법**: 시맨틱 검색, 키워드 검색, 하이브리드 검색을 지원합니다. 14 | - **검색 결과 포맷팅**: 검색 결과를 가독성 있는 형태로 제공합니다. 15 | - **AI 트렌드 학습 가이드**: SPRI 월간 AI 보고서를 기반으로 맞춤형 학습 가이드를 생성합니다. 16 | 17 | ### 설정 18 | 19 | 다음 환경 변수를 루트 디렉토리의 `.env` 파일에 설정해야 합니다. 20 | 21 | ``` 22 | DIFY_API_ENDPOINT = http://localhost:8000/retrieval 23 | DIFY_API_KEY = your-dify-api-key 24 | DIFY_KNOWLEDGE_ID = your-knowledge-base-id 25 | ``` 26 | 27 | - `DIFY_API_ENDPOINT`: Dify API 엔드포인트 URL 28 | - `DIFY_API_KEY`: Dify API 키 29 | - `DIFY_KNOWLEDGE_ID`: 검색할 지식 베이스 ID 30 | 31 | ### 사용 방법 32 | 33 | 1. 환경 설정 확인 34 | ```bash 35 | # case2 디렉토리로 이동 36 | cd case2 37 | 38 | # 필요한 환경 변수 설정 확인 39 | # .env 파일이 올바르게 구성되었는지 확인하세요 40 | ``` 41 | 42 | 2. Dify 외부지식 로컬서버 실행 43 | ```bash 44 | # 로컬서버를 실행하기 전에 data 폴더의 pdf 문서를 확인해주세요. 45 | python dify_ek_server.py 46 | ``` 47 | 48 | 3. JSON 파일 생성 49 | ```bash 50 | # 가상 환경 활성화 (아직 활성화하지 않은 경우) 51 | source ../.venv/bin/activate # macOS/Linux 52 | ..\.venv\Scripts\activate # Windows 53 | 54 | # JSON 파일 생성 55 | python auto_mcp_json.py 56 | ``` 57 | 58 | 4. Claude Desktop 또는 Cursor에 적용 59 | - 생성된 JSON 내용을 복사 60 | - Claude Desktop 또는 Cursor의 MCP 설정에 붙여넣기 61 | - 설정 저장 및 적용 62 | 63 | ### 사용 예시 64 | 65 | Claude Desktop 또는 Cursor에서 다음과 같이 사용할 수 있습니다. 66 | 67 | #### 1. Dify 외부지식 검색 68 | ```bash 69 | # ex 70 | "외부지식을 사용해서 최근 생성형 AI 기술 동향에 대해 검색해줘." 71 | ``` 72 | 73 | #### 2. AI 트렌드 학습 가이드 생성 74 | 클로드 데스크탑에서 프롬프트 템플릿을 클릭해주세요. 75 | ```bash 76 | # ex 77 | " Topic: LLM " 78 | " Learning_level: 초급 " 79 | " Time_horizon: 중기 " 80 | ``` 81 | 82 | ### 구현 세부사항 83 | 84 | `case2/mcp_server.py` 파일에는 다음과 같은 주요 구성 요소가 포함되어 있습니다: 85 | 86 | 1. 문서 검색 도구 87 | 2. AI 트렌드 학습 가이드 프롬프트 템플릿 88 | 3. 도움말 리소스 89 | 90 | --- 91 | 92 | ## Dify External Knowledge API Example 93 | 94 | In this example, we provide an MCP server that follows the same document retrieval format as Dify's External Knowledge API. It also includes a prompt template that generates a customized AI learning guide. 95 | 96 | `Since this does not make direct requests` to the external knowledge registered in Dify, you must run `dify_ek_server.py` locally in order to try Case 2. 97 | 98 | To learn how to register external knowledge in Dify, [click here.](https://ballistic-hedgehog-95e.notion.site/How-to-register-External-Knowledge-in-Dify-1bfbeae069358056a878c60c82b4ad0d) 99 | -> No worries if you’re not familiar with this yet. 100 | 101 | ### Features 102 | 103 | - **Various Search Methods**: Supports semantic search, keyword search, and hybrid search. 104 | - **Search Results Formatting**: Provides search results in a readable format. 105 | - **AI Trends Learning Guide**: Generates customized learning guides based on SPRI monthly AI reports. 106 | 107 | ### Configuration 108 | 109 | Please ensure that the following environment variables are configured in the `.env` file at the root directory. 110 | 111 | ``` 112 | DIFY_API_ENDPOINT = http://localhost:8000/retrieval 113 | DIFY_API_KEY = your-dify-api-key 114 | DIFY_KNOWLEDGE_ID = your-knowledge-base-id 115 | ``` 116 | 117 | - `DIFY_API_ENDPOINT`: Dify API endpoint URL 118 | - `DIFY_API_KEY`: Dify API key 119 | - `DIFY_KNOWLEDGE_ID`: Knowledge base ID to search 120 | 121 | ### Usage Instructions 122 | 123 | 1. Check environment configuration 124 | ```bash 125 | # Navigate to case2 directory 126 | cd case2 127 | 128 | # Check the required environment variables 129 | # Make sure the .env file is properly configured 130 | ``` 131 | 132 | 2. Run Dify external knowledge local server 133 | ```bash 134 | # Please check the PDF documents in the data folder before running the local server. 135 | python dify_ek_server.py 136 | ``` 137 | 138 | 3. Generate JSON file 139 | ```bash 140 | # Activate virtual environment (if not already activated) 141 | source ../.venv/bin/activate # macOS/Linux 142 | ..\.venv\Scripts\activate # Windows 143 | 144 | # Generate JSON file 145 | python auto_mcp_json.py 146 | ``` 147 | 148 | 4. Apply to Claude Desktop or Cursor 149 | - Copy the generated JSON content 150 | - Paste it into the MCP settings of Claude Desktop or Cursor 151 | - Save and apply settings 152 | 153 | ### Usage Examples 154 | 155 | You can use it in Claude Desktop or Cursor as follows: 156 | 157 | #### External Knowledge Search with Dify 158 | ``` 159 | Use external knowledge to search for recent trends in LLM. 160 | ``` 161 | 162 | #### AI Trends Learning Guide Generation 163 | Click the prompt template. 164 | ``` 165 | Topic: LLM 166 | Learning_level: Beginner 167 | Time_horizon: Mid-term 168 | ``` 169 | 170 | ### Implementation Details 171 | 172 | The `case2/mcp_server.py` file includes the following main components: 173 | 174 | 1. Document search tool 175 | 2. AI trends learning guide prompt template 176 | 3. Help resource -------------------------------------------------------------------------------- /docs/case3.md: -------------------------------------------------------------------------------- 1 | # Dify Workflow 예제 2 | 3 | [English](#dify-workflow-example) | [한국어](#dify-workflow-예제-1) 4 | 5 | ## Dify Workflow 예제 6 | 7 | 이 예제에서는 Dify Workflow API를 사용하여 워크플로우를 실행하고 결과를 가져오는 MCP 서버를 제공합니다. Dify의 워크플로우 기능을 통해 복잡한 AI 작업을 자동화하고 그 결과를 Claude에서 활용할 수 있습니다. 8 | 9 | ### 기능 10 | 11 | - **Dify Workflow 실행**: 사용자 입력을 받아 Dify의 워크플로우를 실행합니다. 12 | - **결과 반환**: 워크플로우 실행 결과를 Claude에 반환합니다. 13 | - **에러 처리**: API 요청 및 응답 처리 과정에서 발생할 수 있는 에러를 적절하게 처리합니다. 14 | 15 | ### 설정 16 | 17 | 다음 환경 변수를 루트 디렉토리의 `.env` 파일에 설정해야 합니다. 18 | 19 | ``` 20 | DIFY_BASE_URL = https://api.dify.ai/v1 21 | DIFY_APP_SK = your-dify-app-sk 22 | ``` 23 | 24 | - `DIFY_BASE_URL`: Dify API의 기본 URL 25 | - `DIFY_APP_SK`: Dify 애플리케이션 시크릿 키 26 | 27 | ### 사용 방법 28 | 29 | 1. 환경 설정 확인 30 | ```bash 31 | # case3 디렉토리로 이동 32 | cd case3 33 | 34 | # 필요한 환경 변수 설정 확인 35 | # .env 파일이 올바르게 구성되었는지 확인하세요 36 | ``` 37 | 38 | 2. JSON 파일 생성 39 | ```bash 40 | # 가상 환경 활성화 (아직 활성화하지 않은 경우) 41 | source ../.venv/bin/activate # macOS/Linux 42 | ..\.venv\Scripts\activate # Windows 43 | 44 | # JSON 파일 생성 45 | python auto_mcp_json.py 46 | ``` 47 | 48 | 3. Claude Desktop 또는 Cursor에 적용 49 | - 생성된 JSON 내용을 복사 50 | - Claude Desktop 또는 Cursor의 MCP 설정에 붙여넣기 51 | - 설정 저장 및 적용 52 | 53 | ### 사용 예시 54 | 55 | Claude Desktop 또는 Cursor에서 다음과 같이 사용할 수 있습니다: 56 | 57 | ``` 58 | Dify 워크플로우를 실행해서 "인공지능의 윤리적 문제"에 대한 분석을 해줘. 59 | ``` 60 | 61 | 이 요청은 Dify 워크플로우에 "인공지능의 윤리적 문제"라는 입력을 전달하고, 워크플로우가 실행된 후 결과를 Claude에 표시합니다. 62 | 63 | ### 구현 세부사항 64 | 65 | `case3/mcp_server.py` 파일에는 다음과 같은 주요 구성 요소가 포함되어 있습니다. 66 | 67 | 1. 환경 변수 로드 68 | 2. FastMCP 서버 초기화 69 | 3. Dify 워크플로우 실행 도구 정의 70 | 4. HTTP 요청 및 응답 처리 71 | 5. 에러 처리 72 | 73 | 이 구현은 Dify의 워크플로우 API를 사용하여 사용자 입력을 처리하고 결과를 반환합니다. 워크플로우는 Dify 플랫폼에서 미리 구성되어야 하며, 해당 워크플로우에 접근할 수 있는 권한이 있어야 합니다. 74 | 75 | --- 76 | 77 | ## Dify Workflow Example 78 | 79 | This example provides an MCP server that executes workflows using the Dify Workflow API and retrieves the results. Through Dify's workflow functionality, you can automate complex AI tasks and utilize the results in Claude. 80 | 81 | ### Features 82 | 83 | - **Execute Dify Workflow**: Executes Dify's workflow with user input. 84 | - **Return Results**: Returns workflow execution results to Claude. 85 | - **Error Handling**: Properly handles errors that may occur during API request and response processing. 86 | 87 | ### Configuration 88 | 89 | Please ensure that the following environment variables are configured in the `.env` file at the root directory. 90 | 91 | ``` 92 | DIFY_BASE_URL = https://api.dify.ai/v1 93 | DIFY_APP_SK = your-dify-app-sk 94 | ``` 95 | 96 | - `DIFY_BASE_URL`: Base URL for the Dify API 97 | - `DIFY_APP_SK`: Secret key for your Dify application 98 | 99 | ### Usage Instructions 100 | 101 | 1. Check environment configuration 102 | ```bash 103 | # Navigate to case3 directory 104 | cd case3 105 | 106 | # Check the required environment variables 107 | # Make sure the .env file is properly configured 108 | ``` 109 | 110 | 2. Generate JSON file 111 | ```bash 112 | # Activate virtual environment (if not already activated) 113 | source ../.venv/bin/activate # macOS/Linux 114 | ..\.venv\Scripts\activate # Windows 115 | 116 | # Generate JSON file 117 | python auto_mcp_json.py 118 | ``` 119 | 120 | 3. Apply to Claude Desktop or Cursor 121 | - Copy the generated JSON content 122 | - Paste it into the MCP settings of Claude Desktop or Cursor 123 | - Save and apply settings 124 | 125 | ### Usage Examples 126 | 127 | You can use it in Claude Desktop or Cursor as follows: 128 | 129 | ``` 130 | Execute a Dify workflow to analyze "ethical issues in artificial intelligence". 131 | ``` 132 | 133 | This request passes "ethical issues in artificial intelligence" as input to the Dify workflow, and after the workflow is executed, the results are displayed in Claude. 134 | 135 | ### Implementation Details 136 | 137 | The `case3/mcp_server.py` file includes the following main components. 138 | 139 | 1. Load environment variables 140 | 2. Initialize FastMCP server 141 | 3. Define Dify workflow execution tool 142 | 4. Handle HTTP requests and responses 143 | 5. Error handling 144 | 145 | This implementation uses Dify's workflow API to process user input and return results. The workflow must be pre-configured on the Dify platform, and you must have permission to access that workflow. -------------------------------------------------------------------------------- /docs/case4.md: -------------------------------------------------------------------------------- 1 | # Web Search 예제 2 | 3 | [English](#web-search-example) | [한국어](#web-search-예제-1) 4 | 5 | ## Web Search 예제 6 | 7 | 이 예제에서는 Tavily API를 활용하여 실시간 웹 검색 기능을 제공하는 MCP 서버를 구현합니다. 간단하고 효율적인 웹 검색 기능을 Claude에 제공합니다. 8 | 9 | ### 기능 10 | 11 | - **웹 검색**: 간단한 쿼리로 웹 검색을 수행합니다. 12 | - **결과 포맷팅**: 제목, URL, 발행 날짜, 내용 요약이 포함된 형식으로 결과를 반환합니다. 13 | 14 | ### 설정 15 | 16 | 다음 환경 변수를 루트 디렉토리의 `.env` 파일에 설정해야 합니다. 17 | 18 | ``` 19 | TAVILY_API_KEY = your-tavily-api-key 20 | ``` 21 | 22 | - `TAVILY_API_KEY`: Tavily API 키 (https://tavily.com/ 에서 발급 가능) 23 | 24 | ### 사용 방법 25 | 26 | 1. 환경 설정 확인 27 | ```bash 28 | # case4 디렉토리로 이동 29 | cd case4 30 | 31 | # 필요한 환경 변수 설정 확인 32 | # .env 파일이 올바르게 구성되었는지 확인하세요 33 | ``` 34 | 35 | 2. JSON 파일 생성 36 | ```bash 37 | # 가상 환경 활성화 (아직 활성화하지 않은 경우) 38 | source ../.venv/bin/activate # macOS/Linux 39 | ..\.venv\Scripts\activate # Windows 40 | 41 | # JSON 파일 생성 42 | python auto_mcp_json.py 43 | ``` 44 | 45 | 3. Claude Desktop 또는 Cursor에 적용 46 | - 생성된 JSON 내용을 복사 47 | - Claude Desktop 또는 Cursor의 MCP 설정에 붙여넣기 48 | - 설정 저장 및 적용 49 | 50 | ### 사용 예시 51 | 52 | Claude Desktop 또는 Cursor에서 다음과 같이 사용할 수 있습니다: 53 | 54 | ``` 55 | 최근 생성형 AI 기술 동향에 대해 검색해줘 56 | ``` 57 | 58 | ``` 59 | 인공지능 윤리에 대한 최신 논의를 검색해서 알려줘 60 | ``` 61 | 62 | ### 구현 세부사항 63 | 64 | `case4/mcp_server.py` 파일에는 다음과 같은 주요 구성 요소가 포함되어 있습니다: 65 | 66 | 1. Tavily API 연결 설정 67 | 2. 웹 검색 설정 구성 68 | 3. 검색 결과 포맷팅 함수 69 | 4. 웹 검색 도구 70 | 5. 도움말 리소스 71 | 72 | 이 구현은 Tavily API를 사용하여 검색 결과를 가져오고, 내용 요약을 자동으로 생성합니다. 검색 결과는 마크다운 형식으로 포맷팅되어 Claude에 표시됩니다. 73 | 74 | --- 75 | 76 | ## Web Search Example 77 | 78 | This example implements an MCP server that provides real-time web search functionality using the Tavily API. It offers simple and efficient web search capabilities to Claude. 79 | 80 | ### Features 81 | 82 | - **Web Search**: Performs web searches with simple queries. 83 | - **Result Formatting**: Returns results in a format including titles, URLs, publication dates, and content summaries. 84 | 85 | ### Configuration 86 | 87 | Please ensure that the following environment variables are configured in the `.env` file at the root directory. 88 | 89 | ``` 90 | TAVILY_API_KEY=your-tavily-api-key 91 | ``` 92 | 93 | - `TAVILY_API_KEY`: Tavily API key (can be obtained from https://tavily.com/) 94 | 95 | ### Usage Instructions 96 | 97 | 1. Check environment configuration 98 | ```bash 99 | # Navigate to case4 directory 100 | cd case4 101 | 102 | # Check the required environment variables 103 | # Make sure the .env file is properly configured 104 | ``` 105 | 106 | 2. Generate JSON file 107 | ```bash 108 | # Activate virtual environment (if not already activated) 109 | source ../.venv/bin/activate # macOS/Linux 110 | ..\.venv\Scripts\activate # Windows 111 | 112 | # Generate JSON file 113 | python auto_mcp_json.py 114 | ``` 115 | 116 | 3. Apply to Claude Desktop or Cursor 117 | - Copy the generated JSON content 118 | - Paste it into the MCP settings of Claude Desktop or Cursor 119 | - Save and apply settings 120 | 121 | ### Usage Examples 122 | 123 | You can use it in Claude Desktop or Cursor as follows: 124 | 125 | ``` 126 | Search for recent trends in generative AI technology 127 | ``` 128 | 129 | ``` 130 | Find the latest discussions on AI ethics and provide the results 131 | ``` 132 | 133 | ### Implementation Details 134 | 135 | The `case4/mcp_server.py` file includes the following main components: 136 | 137 | 1. Tavily API connection setup 138 | 2. Web search configuration 139 | 3. Search results formatting function 140 | 4. Web search tool 141 | 5. Help resource 142 | 143 | This implementation uses the Tavily API to fetch search results and automatically generate content summaries. The search results are formatted in markdown format and displayed in Claude. -------------------------------------------------------------------------------- /docs/installation.md: -------------------------------------------------------------------------------- 1 | # 설치 가이드 (Installation Guide) 2 | 3 | [English](#installation-guide) | [한국어](#설치-가이드-1) 4 | 5 | ## 설치 가이드 6 | 7 | 이 문서에서는 Quick-start Auto MCP 프로젝트의 설치 방법에 대해 상세히 설명합니다. 8 | 9 | ### 시스템 요구사항 10 | 11 | - Python 3.11 이상 12 | - Claude Desktop 또는 Cursor (MCP 지원 버전) 13 | - Git (선택 사항) 14 | 15 | ### 설치 단계 16 | 17 | #### 1. 저장소 복제 18 | 19 | Git을 사용하여 저장소를 복제합니다. 20 | 21 | ```bash 22 | git clone https://github.com/teddynote-lab/mcp.git 23 | cd mcp 24 | ``` 25 | 26 | 또는 GitHub에서 ZIP 파일로 다운로드하여 압축을 풀 수도 있습니다. 27 | 28 | #### 2. 가상 환경 설정 29 | 30 | ##### uv 사용 (권장) 31 | 32 | [uv](https://github.com/astral-sh/uv)는 더 빠른 파이썬 패키지 설치 및 환경 관리 도구입니다. 아직 설치하지 않았다면 먼저 설치해 주세요. 33 | 34 | ```bash 35 | # uv 설치 (macOS/Linux) 36 | curl -LsSf https://astral.sh/uv/install.sh | sh 37 | 38 | # uv 설치 (Windows PowerShell) 39 | powershell -c "irm https://astral.sh/uv/install.ps1 | iex" 40 | ``` 41 | 42 | uv를 사용하여 가상 환경을 설정하고 패키지를 설치합니다. 43 | 44 | ```bash 45 | uv venv 46 | uv pip install -r requirements.txt 47 | ``` 48 | 49 | ##### pip 사용 50 | 51 | 기존 Python 도구를 사용하여 가상 환경을 설정할 수도 있습니다. 52 | 53 | ```bash 54 | # 가상 환경 생성 55 | python -m venv .venv 56 | 57 | # 가상 환경 활성화 (Windows) 58 | .venv\Scripts\activate 59 | 60 | # 가상 환경 활성화 (macOS/Linux) 61 | source .venv/bin/activate 62 | 63 | # 패키지 설치 64 | pip install -r requirements.txt 65 | ``` 66 | 67 | #### 3. 환경 변수 설정 68 | 69 | 루트 디렉토리의 `.env.example`에 필요한 환경 변수를 설정하고 파일명을 `.env`로 바꿔주세요. 70 | 71 | ##### example1 (RAG) 72 | ``` 73 | OPENAI_API_KEY = "your-openai-api-key" 74 | ``` 75 | 76 | ##### example2 (Dify External Knowledge API) 77 | ``` 78 | DIFY_API_ENDPOINT = http://localhost:8000/retrieval 79 | DIFY_API_KEY = your-dify-api-key 80 | DIFY_KNOWLEDGE_ID = your-knowledge-base-id 81 | ``` 82 | 83 | ##### example3 (Dify Workflow) 84 | ``` 85 | DIFY_BASE_URL = https://api.dify.ai/v1 86 | DIFY_APP_SK = your-dify-app-sk 87 | ``` 88 | 89 | ##### example4 (Web Search) 90 | ``` 91 | EXA_API_KEY = your-exa-api-key 92 | ``` 93 | 94 | ### 설치 확인 95 | 96 | 설치가 올바르게 이루어졌는지 확인하기 위해 다음 명령을 실행할 수 있습니다. 97 | 98 | ```bash 99 | # 가상 환경이 활성화된 상태에서 100 | cd example1 101 | python auto_mcp_json.py 102 | ``` 103 | 104 | 성공적으로 JSON 파일이 생성되면 설치가 완료된 것입니다. 105 | 106 | --- 107 | 108 | ## Installation Guide 109 | 110 | This document provides detailed instructions for installing the Quick-start Auto MCP project. 111 | 112 | ### System Requirements 113 | 114 | - Python 3.11 or higher 115 | - Claude Desktop or Cursor (MCP supporting version) 116 | - Git (optional) 117 | 118 | ### Installation Steps 119 | 120 | #### 1. Clone the Repository 121 | 122 | Clone the repository using Git: 123 | 124 | ```bash 125 | git clone https://github.com/teddynote-lab/mcp.git 126 | cd mcp 127 | ``` 128 | 129 | Alternatively, you can download and extract the ZIP file from GitHub. 130 | 131 | #### 2. Set Up Virtual Environment 132 | 133 | ##### Using uv (recommended) 134 | 135 | [uv](https://github.com/astral-sh/uv) is a faster Python package installer and environment manager. If you haven't installed it yet, install it first. 136 | 137 | ```bash 138 | # Install uv (macOS/Linux) 139 | curl -LsSf https://astral.sh/uv/install.sh | sh 140 | 141 | # Install uv (Windows PowerShell) 142 | powershell -c "irm https://astral.sh/uv/install.ps1 | iex" 143 | ``` 144 | 145 | Set up a virtual environment and install packages using uv. 146 | 147 | ```bash 148 | uv venv 149 | uv pip install -r requirements.txt 150 | ``` 151 | 152 | ##### Using pip 153 | 154 | You can also set up a virtual environment using traditional Python tools. 155 | 156 | ```bash 157 | # Create virtual environment 158 | python -m venv .venv 159 | 160 | # Activate virtual environment (Windows) 161 | .venv\Scripts\activate 162 | 163 | # Activate virtual environment (macOS/Linux) 164 | source .venv/bin/activate 165 | 166 | # Install packages 167 | pip install -r requirements.txt 168 | ``` 169 | 170 | #### 3. Set Environment Variables for Each Example 171 | 172 | Set the necessary environment variables in the .env.example file found in the root directory, then rename the file to .env. 173 | 174 | ##### example1 (RAG) 175 | ``` 176 | OPENAI_API_KEY = "your-openai-api-key" 177 | ``` 178 | 179 | ##### example2 (Dify External Knowledge API) 180 | ``` 181 | DIFY_API_ENDPOINT = http://localhost:8000/retrieval 182 | DIFY_API_KEY = your-dify-api-key 183 | DIFY_KNOWLEDGE_ID = your-knowledge-base-id 184 | ``` 185 | 186 | ##### example3 (Dify Workflow) 187 | ``` 188 | DIFY_BASE_URL = https://api.dify.ai/v1 189 | DIFY_APP_SK = your-dify-app-sk 190 | ``` 191 | 192 | ##### example4 (Web Search) 193 | ``` 194 | EXA_API_KEY = your-exa-api-key 195 | ``` 196 | 197 | ### Verify Installation 198 | 199 | To verify that the installation has been completed successfully, you can run the following command: 200 | 201 | ```bash 202 | # With the virtual environment activated 203 | cd example1 204 | python auto_mcp_json.py 205 | ``` 206 | 207 | If a JSON file is successfully generated, the installation is complete. -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [project] 2 | name = "mcp" 3 | version = "0.1.0" 4 | description = "Add your description here" 5 | readme = "README.md" 6 | requires-python = ">=3.11" 7 | dependencies = [] 8 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | aiohappyeyeballs==2.6.1 2 | aiohttp==3.11.14 3 | aiosignal==1.3.2 4 | annotated-types==0.7.0 5 | antlr4-python3-runtime==4.9.3 6 | anyio==4.9.0 7 | asgiref==3.8.1 8 | attrs==25.3.0 9 | backoff==2.2.1 10 | bcrypt==4.3.0 11 | build==1.2.2.post1 12 | cachetools==5.5.2 13 | certifi==2025.1.31 14 | cffi==1.17.1 15 | charset-normalizer==3.4.1 16 | chroma-hnswlib==0.7.6 17 | chromadb==0.6.3 18 | click==8.1.8 19 | coloredlogs==15.0.1 20 | cryptography==44.0.2 21 | dataclasses-json==0.6.7 22 | deprecated==1.2.18 23 | distro==1.9.0 24 | durationpy==0.9 25 | fastapi==0.115.11 26 | filelock==3.18.0 27 | flatbuffers==25.2.10 28 | frozenlist==1.5.0 29 | fsspec==2025.3.0 30 | google-auth==2.38.0 31 | googleapis-common-protos==1.69.1 32 | grpcio==1.71.0 33 | h11==0.14.0 34 | httpcore==1.0.7 35 | httptools==0.6.4 36 | httpx==0.28.1 37 | httpx-sse==0.4.0 38 | huggingface-hub==0.29.3 39 | humanfriendly==10.0 40 | idna==3.10 41 | importlib-metadata==8.6.1 42 | importlib-resources==6.5.2 43 | jiter==0.9.0 44 | jsonpatch==1.33 45 | jsonpointer==3.0.0 46 | kubernetes==32.0.1 47 | langchain==0.3.20 48 | langchain-chroma==0.2.2 49 | langchain-community==0.3.19 50 | langchain-core==0.3.45 51 | langchain-openai==0.3.8 52 | langchain-text-splitters==0.3.6 53 | langgraph==0.3.11 54 | langgraph-checkpoint==2.0.20 55 | langgraph-prebuilt==0.1.3 56 | langgraph-sdk==0.1.57 57 | langsmith==0.3.15 58 | markdown-it-py==3.0.0 59 | marshmallow==3.26.1 60 | mcp==1.4.1 61 | mdurl==0.1.2 62 | mmh3==5.1.0 63 | monotonic==1.6 64 | mpmath==1.3.0 65 | msgpack==1.1.0 66 | multidict==6.1.0 67 | mypy-extensions==1.0.0 68 | numpy==1.26.4 69 | oauthlib==3.2.2 70 | omegaconf==2.3.0 71 | onnxruntime==1.21.0 72 | openai==1.66.3 73 | opentelemetry-api==1.31.0 74 | opentelemetry-exporter-otlp-proto-common==1.31.0 75 | opentelemetry-exporter-otlp-proto-grpc==1.31.0 76 | opentelemetry-instrumentation==0.52b0 77 | opentelemetry-instrumentation-asgi==0.52b0 78 | opentelemetry-instrumentation-fastapi==0.52b0 79 | opentelemetry-proto==1.31.0 80 | opentelemetry-sdk==1.31.0 81 | opentelemetry-semantic-conventions==0.52b0 82 | opentelemetry-util-http==0.52b0 83 | orjson==3.10.15 84 | overrides==7.7.0 85 | packaging==24.2 86 | pdfminer-six==20231228 87 | pdfplumber==0.11.5 88 | pillow==11.1.0 89 | posthog==3.20.0 90 | propcache==0.3.0 91 | protobuf==5.29.3 92 | pyasn1==0.6.1 93 | pyasn1-modules==0.4.1 94 | pycparser==2.22 95 | pydantic==2.10.6 96 | pydantic-core==2.27.2 97 | pydantic-settings==2.8.1 98 | pygments==2.19.1 99 | pypdf==5.4.0 100 | pypdfium2==4.30.1 101 | pypika==0.48.9 102 | pyproject-hooks==1.2.0 103 | python-dateutil==2.9.0.post0 104 | python-dotenv==1.0.1 105 | pyyaml==6.0.2 106 | rank-bm25==0.2.2 107 | regex==2024.11.6 108 | requests==2.32.3 109 | requests-oauthlib==2.0.0 110 | requests-toolbelt==1.0.0 111 | rich==13.9.4 112 | rsa==4.9 113 | shellingham==1.5.4 114 | six==1.17.0 115 | sniffio==1.3.1 116 | sqlalchemy==2.0.39 117 | sse-starlette==2.2.1 118 | starlette==0.46.1 119 | sympy==1.13.3 120 | tavily-python==0.5.1 121 | tenacity==9.0.0 122 | tiktoken==0.9.0 123 | tokenizers==0.21.1 124 | tqdm==4.67.1 125 | typer==0.15.2 126 | typing-extensions==4.12.2 127 | typing-inspect==0.9.0 128 | urllib3==2.3.0 129 | uvicorn==0.34.0 130 | uvloop==0.21.0 131 | watchfiles==1.0.4 132 | websocket-client==1.8.0 133 | websockets==15.0.1 134 | wrapt==1.17.2 135 | yarl==1.18.3 136 | zipp==3.21.0 137 | zstandard==0.23.0 138 | -------------------------------------------------------------------------------- /requirements_windows.txt: -------------------------------------------------------------------------------- 1 | aiohappyeyeballs==2.6.1 2 | aiohttp==3.11.14 3 | aiosignal==1.3.2 4 | annotated-types==0.7.0 5 | antlr4-python3-runtime==4.9.3 6 | anyio==4.9.0 7 | asgiref==3.8.1 8 | attrs==25.3.0 9 | backoff==2.2.1 10 | bcrypt==4.3.0 11 | build==1.2.2.post1 12 | cachetools==5.5.2 13 | certifi==2025.1.31 14 | cffi==1.17.1 15 | charset-normalizer==3.4.1 16 | chroma-hnswlib==0.7.6 17 | chromadb==0.6.3 18 | click==8.1.8 19 | coloredlogs==15.0.1 20 | cryptography==44.0.2 21 | dataclasses-json==0.6.7 22 | deprecated==1.2.18 23 | distro==1.9.0 24 | durationpy==0.9 25 | fastapi==0.115.11 26 | filelock==3.18.0 27 | flatbuffers==25.2.10 28 | frozenlist==1.5.0 29 | fsspec==2025.3.0 30 | google-auth==2.38.0 31 | googleapis-common-protos==1.69.1 32 | grpcio==1.71.0 33 | h11==0.14.0 34 | httpcore==1.0.7 35 | httptools==0.6.4 36 | httpx==0.28.1 37 | httpx-sse==0.4.0 38 | huggingface-hub==0.29.3 39 | humanfriendly==10.0 40 | idna==3.10 41 | importlib-metadata==8.6.1 42 | importlib-resources==6.5.2 43 | jiter==0.9.0 44 | jsonpatch==1.33 45 | jsonpointer==3.0.0 46 | kubernetes==32.0.1 47 | langchain==0.3.20 48 | langchain-chroma==0.2.2 49 | langchain-community==0.3.19 50 | langchain-core==0.3.45 51 | langchain-openai==0.3.8 52 | langchain-text-splitters==0.3.6 53 | langgraph==0.3.11 54 | langgraph-checkpoint==2.0.20 55 | langgraph-prebuilt==0.1.3 56 | langgraph-sdk==0.1.57 57 | langsmith==0.3.15 58 | markdown-it-py==3.0.0 59 | marshmallow==3.26.1 60 | mcp==1.4.1 61 | mdurl==0.1.2 62 | mmh3==5.1.0 63 | monotonic==1.6 64 | mpmath==1.3.0 65 | msgpack==1.1.0 66 | multidict==6.1.0 67 | mypy-extensions==1.0.0 68 | numpy==1.26.4 69 | oauthlib==3.2.2 70 | omegaconf==2.3.0 71 | onnxruntime==1.21.0 72 | openai==1.66.3 73 | opentelemetry-api==1.31.0 74 | opentelemetry-exporter-otlp-proto-common==1.31.0 75 | opentelemetry-exporter-otlp-proto-grpc==1.31.0 76 | opentelemetry-instrumentation==0.52b0 77 | opentelemetry-instrumentation-asgi==0.52b0 78 | opentelemetry-instrumentation-fastapi==0.52b0 79 | opentelemetry-proto==1.31.0 80 | opentelemetry-sdk==1.31.0 81 | opentelemetry-semantic-conventions==0.52b0 82 | opentelemetry-util-http==0.52b0 83 | orjson==3.10.15 84 | overrides==7.7.0 85 | packaging==24.2 86 | pdfminer-six==20231228 87 | pdfplumber==0.11.5 88 | pillow==11.1.0 89 | posthog==3.20.0 90 | propcache==0.3.0 91 | protobuf==5.29.3 92 | pyasn1==0.6.1 93 | pyasn1-modules==0.4.1 94 | pycparser==2.22 95 | pydantic==2.10.6 96 | pydantic-core==2.27.2 97 | pydantic-settings==2.8.1 98 | pygments==2.19.1 99 | pypdf==5.4.0 100 | pypdfium2==4.30.1 101 | pypika==0.48.9 102 | pyproject-hooks==1.2.0 103 | python-dateutil==2.9.0.post0 104 | python-dotenv==1.0.1 105 | pyyaml==6.0.2 106 | rank-bm25==0.2.2 107 | regex==2024.11.6 108 | requests==2.32.3 109 | requests-oauthlib==2.0.0 110 | requests-toolbelt==1.0.0 111 | rich==13.9.4 112 | rsa==4.9 113 | shellingham==1.5.4 114 | six==1.17.0 115 | sniffio==1.3.1 116 | sqlalchemy==2.0.39 117 | sse-starlette==2.2.1 118 | starlette==0.46.1 119 | sympy==1.13.3 120 | tavily-python==0.5.1 121 | tenacity==9.0.0 122 | tiktoken==0.9.0 123 | tokenizers==0.21.1 124 | tqdm==4.67.1 125 | typer==0.15.2 126 | typing-extensions==4.12.2 127 | typing-inspect==0.9.0 128 | urllib3==2.3.0 129 | uvicorn==0.34.0 130 | winloop==0.1.8 131 | watchfiles==1.0.4 132 | websocket-client==1.8.0 133 | websockets==15.0.1 134 | wrapt==1.17.2 135 | yarl==1.18.3 136 | zipp==3.21.0 137 | zstandard==0.23.0 138 | -------------------------------------------------------------------------------- /uv.lock: -------------------------------------------------------------------------------- 1 | aiohappyeyeballs==2.6.1 2 | # via 3 | # -r requirements.txt 4 | # aiohttp 5 | aiohttp==3.11.14 6 | # via 7 | # -r requirements.txt 8 | # langchain-community 9 | aiosignal==1.3.2 10 | # via 11 | # -r requirements.txt 12 | # aiohttp 13 | annotated-types==0.7.0 14 | # via 15 | # -r requirements.txt 16 | # pydantic 17 | antlr4-python3-runtime==4.9.3 18 | # via 19 | # -r requirements.txt 20 | # omegaconf 21 | anyio==4.9.0 22 | # via 23 | # -r requirements.txt 24 | # httpx 25 | # mcp 26 | # openai 27 | # sse-starlette 28 | # starlette 29 | # watchfiles 30 | asgiref==3.8.1 31 | # via 32 | # -r requirements.txt 33 | # opentelemetry-instrumentation-asgi 34 | attrs==25.3.0 35 | # via 36 | # -r requirements.txt 37 | # aiohttp 38 | backoff==2.2.1 39 | # via 40 | # -r requirements.txt 41 | # posthog 42 | bcrypt==4.3.0 43 | # via 44 | # -r requirements.txt 45 | # chromadb 46 | build==1.2.2.post1 47 | # via 48 | # -r requirements.txt 49 | # chromadb 50 | cachetools==5.5.2 51 | # via 52 | # -r requirements.txt 53 | # google-auth 54 | certifi==2025.1.31 55 | # via 56 | # -r requirements.txt 57 | # httpcore 58 | # httpx 59 | # kubernetes 60 | # requests 61 | cffi==1.17.1 62 | # via 63 | # -r requirements.txt 64 | # cryptography 65 | charset-normalizer==3.4.1 66 | # via 67 | # -r requirements.txt 68 | # pdfminer-six 69 | # requests 70 | chroma-hnswlib==0.7.6 71 | # via 72 | # -r requirements.txt 73 | # chromadb 74 | chromadb==0.6.3 75 | # via 76 | # -r requirements.txt 77 | # langchain-chroma 78 | click==8.1.8 79 | # via 80 | # -r requirements.txt 81 | # typer 82 | # uvicorn 83 | coloredlogs==15.0.1 84 | # via 85 | # -r requirements.txt 86 | # onnxruntime 87 | cryptography==44.0.2 88 | # via 89 | # -r requirements.txt 90 | # pdfminer-six 91 | dataclasses-json==0.6.7 92 | # via 93 | # -r requirements.txt 94 | # langchain-community 95 | deprecated==1.2.18 96 | # via 97 | # -r requirements.txt 98 | # opentelemetry-api 99 | # opentelemetry-exporter-otlp-proto-grpc 100 | # opentelemetry-semantic-conventions 101 | distro==1.9.0 102 | # via 103 | # -r requirements.txt 104 | # openai 105 | # posthog 106 | durationpy==0.9 107 | # via 108 | # -r requirements.txt 109 | # kubernetes 110 | fastapi==0.115.11 111 | # via 112 | # -r requirements.txt 113 | # chromadb 114 | filelock==3.18.0 115 | # via 116 | # -r requirements.txt 117 | # huggingface-hub 118 | flatbuffers==25.2.10 119 | # via 120 | # -r requirements.txt 121 | # onnxruntime 122 | frozenlist==1.5.0 123 | # via 124 | # -r requirements.txt 125 | # aiohttp 126 | # aiosignal 127 | fsspec==2025.3.0 128 | # via 129 | # -r requirements.txt 130 | # huggingface-hub 131 | google-auth==2.38.0 132 | # via 133 | # -r requirements.txt 134 | # kubernetes 135 | googleapis-common-protos==1.69.1 136 | # via 137 | # -r requirements.txt 138 | # opentelemetry-exporter-otlp-proto-grpc 139 | grpcio==1.71.0 140 | # via 141 | # -r requirements.txt 142 | # chromadb 143 | # opentelemetry-exporter-otlp-proto-grpc 144 | h11==0.14.0 145 | # via 146 | # -r requirements.txt 147 | # httpcore 148 | # uvicorn 149 | httpcore==1.0.7 150 | # via 151 | # -r requirements.txt 152 | # httpx 153 | httptools==0.6.4 154 | # via 155 | # -r requirements.txt 156 | # uvicorn 157 | httpx==0.28.1 158 | # via 159 | # -r requirements.txt 160 | # chromadb 161 | # langgraph-sdk 162 | # langsmith 163 | # mcp 164 | # openai 165 | # tavily-python 166 | httpx-sse==0.4.0 167 | # via 168 | # -r requirements.txt 169 | # langchain-community 170 | # mcp 171 | huggingface-hub==0.29.3 172 | # via 173 | # -r requirements.txt 174 | # tokenizers 175 | humanfriendly==10.0 176 | # via 177 | # -r requirements.txt 178 | # coloredlogs 179 | idna==3.10 180 | # via 181 | # -r requirements.txt 182 | # anyio 183 | # httpx 184 | # requests 185 | # yarl 186 | importlib-metadata==8.6.1 187 | # via 188 | # -r requirements.txt 189 | # opentelemetry-api 190 | importlib-resources==6.5.2 191 | # via 192 | # -r requirements.txt 193 | # chromadb 194 | jiter==0.9.0 195 | # via 196 | # -r requirements.txt 197 | # openai 198 | jsonpatch==1.33 199 | # via 200 | # -r requirements.txt 201 | # langchain-core 202 | jsonpointer==3.0.0 203 | # via 204 | # -r requirements.txt 205 | # jsonpatch 206 | kubernetes==32.0.1 207 | # via 208 | # -r requirements.txt 209 | # chromadb 210 | langchain==0.3.20 211 | # via 212 | # -r requirements.txt 213 | # langchain-community 214 | langchain-chroma==0.2.2 215 | # via -r requirements.txt 216 | langchain-community==0.3.19 217 | # via -r requirements.txt 218 | langchain-core==0.3.45 219 | # via 220 | # -r requirements.txt 221 | # langchain 222 | # langchain-chroma 223 | # langchain-community 224 | # langchain-openai 225 | # langchain-text-splitters 226 | # langgraph 227 | # langgraph-checkpoint 228 | # langgraph-prebuilt 229 | langchain-openai==0.3.8 230 | # via -r requirements.txt 231 | langchain-text-splitters==0.3.6 232 | # via 233 | # -r requirements.txt 234 | # langchain 235 | langgraph==0.3.11 236 | # via -r requirements.txt 237 | langgraph-checkpoint==2.0.20 238 | # via 239 | # -r requirements.txt 240 | # langgraph 241 | # langgraph-prebuilt 242 | langgraph-prebuilt==0.1.3 243 | # via 244 | # -r requirements.txt 245 | # langgraph 246 | langgraph-sdk==0.1.57 247 | # via 248 | # -r requirements.txt 249 | # langgraph 250 | langsmith==0.3.15 251 | # via 252 | # -r requirements.txt 253 | # langchain 254 | # langchain-community 255 | # langchain-core 256 | markdown-it-py==3.0.0 257 | # via 258 | # -r requirements.txt 259 | # rich 260 | marshmallow==3.26.1 261 | # via 262 | # -r requirements.txt 263 | # dataclasses-json 264 | mcp==1.4.1 265 | # via -r requirements.txt 266 | mdurl==0.1.2 267 | # via 268 | # -r requirements.txt 269 | # markdown-it-py 270 | mmh3==5.1.0 271 | # via 272 | # -r requirements.txt 273 | # chromadb 274 | monotonic==1.6 275 | # via 276 | # -r requirements.txt 277 | # posthog 278 | mpmath==1.3.0 279 | # via 280 | # -r requirements.txt 281 | # sympy 282 | msgpack==1.1.0 283 | # via 284 | # -r requirements.txt 285 | # langgraph-checkpoint 286 | multidict==6.1.0 287 | # via 288 | # -r requirements.txt 289 | # aiohttp 290 | # yarl 291 | mypy-extensions==1.0.0 292 | # via 293 | # -r requirements.txt 294 | # typing-inspect 295 | numpy==1.26.4 296 | # via 297 | # -r requirements.txt 298 | # chroma-hnswlib 299 | # chromadb 300 | # langchain-chroma 301 | # langchain-community 302 | # onnxruntime 303 | # rank-bm25 304 | oauthlib==3.2.2 305 | # via 306 | # -r requirements.txt 307 | # kubernetes 308 | # requests-oauthlib 309 | omegaconf==2.3.0 310 | # via -r requirements.txt 311 | onnxruntime==1.21.0 312 | # via 313 | # -r requirements.txt 314 | # chromadb 315 | openai==1.66.3 316 | # via 317 | # -r requirements.txt 318 | # langchain-openai 319 | opentelemetry-api==1.31.0 320 | # via 321 | # -r requirements.txt 322 | # chromadb 323 | # opentelemetry-exporter-otlp-proto-grpc 324 | # opentelemetry-instrumentation 325 | # opentelemetry-instrumentation-asgi 326 | # opentelemetry-instrumentation-fastapi 327 | # opentelemetry-sdk 328 | # opentelemetry-semantic-conventions 329 | opentelemetry-exporter-otlp-proto-common==1.31.0 330 | # via 331 | # -r requirements.txt 332 | # opentelemetry-exporter-otlp-proto-grpc 333 | opentelemetry-exporter-otlp-proto-grpc==1.31.0 334 | # via 335 | # -r requirements.txt 336 | # chromadb 337 | opentelemetry-instrumentation==0.52b0 338 | # via 339 | # -r requirements.txt 340 | # opentelemetry-instrumentation-asgi 341 | # opentelemetry-instrumentation-fastapi 342 | opentelemetry-instrumentation-asgi==0.52b0 343 | # via 344 | # -r requirements.txt 345 | # opentelemetry-instrumentation-fastapi 346 | opentelemetry-instrumentation-fastapi==0.52b0 347 | # via 348 | # -r requirements.txt 349 | # chromadb 350 | opentelemetry-proto==1.31.0 351 | # via 352 | # -r requirements.txt 353 | # opentelemetry-exporter-otlp-proto-common 354 | # opentelemetry-exporter-otlp-proto-grpc 355 | opentelemetry-sdk==1.31.0 356 | # via 357 | # -r requirements.txt 358 | # chromadb 359 | # opentelemetry-exporter-otlp-proto-grpc 360 | opentelemetry-semantic-conventions==0.52b0 361 | # via 362 | # -r requirements.txt 363 | # opentelemetry-instrumentation 364 | # opentelemetry-instrumentation-asgi 365 | # opentelemetry-instrumentation-fastapi 366 | # opentelemetry-sdk 367 | opentelemetry-util-http==0.52b0 368 | # via 369 | # -r requirements.txt 370 | # opentelemetry-instrumentation-asgi 371 | # opentelemetry-instrumentation-fastapi 372 | orjson==3.10.15 373 | # via 374 | # -r requirements.txt 375 | # chromadb 376 | # langgraph-sdk 377 | # langsmith 378 | overrides==7.7.0 379 | # via 380 | # -r requirements.txt 381 | # chromadb 382 | packaging==24.2 383 | # via 384 | # -r requirements.txt 385 | # build 386 | # huggingface-hub 387 | # langchain-core 388 | # langsmith 389 | # marshmallow 390 | # onnxruntime 391 | # opentelemetry-instrumentation 392 | pdfminer-six==20231228 393 | # via 394 | # -r requirements.txt 395 | # pdfplumber 396 | pdfplumber==0.11.5 397 | # via -r requirements.txt 398 | pillow==11.1.0 399 | # via 400 | # -r requirements.txt 401 | # pdfplumber 402 | posthog==3.20.0 403 | # via 404 | # -r requirements.txt 405 | # chromadb 406 | propcache==0.3.0 407 | # via 408 | # -r requirements.txt 409 | # aiohttp 410 | # yarl 411 | protobuf==5.29.3 412 | # via 413 | # -r requirements.txt 414 | # googleapis-common-protos 415 | # onnxruntime 416 | # opentelemetry-proto 417 | pyasn1==0.6.1 418 | # via 419 | # -r requirements.txt 420 | # pyasn1-modules 421 | # rsa 422 | pyasn1-modules==0.4.1 423 | # via 424 | # -r requirements.txt 425 | # google-auth 426 | pycparser==2.22 427 | # via 428 | # -r requirements.txt 429 | # cffi 430 | pydantic==2.10.6 431 | # via 432 | # -r requirements.txt 433 | # chromadb 434 | # fastapi 435 | # langchain 436 | # langchain-core 437 | # langsmith 438 | # mcp 439 | # openai 440 | # pydantic-settings 441 | pydantic-core==2.27.2 442 | # via 443 | # -r requirements.txt 444 | # pydantic 445 | pydantic-settings==2.8.1 446 | # via 447 | # -r requirements.txt 448 | # langchain-community 449 | # mcp 450 | pygments==2.19.1 451 | # via 452 | # -r requirements.txt 453 | # rich 454 | pypdf==5.4.0 455 | # via -r requirements.txt 456 | pypdfium2==4.30.1 457 | # via 458 | # -r requirements.txt 459 | # pdfplumber 460 | pypika==0.48.9 461 | # via 462 | # -r requirements.txt 463 | # chromadb 464 | pyproject-hooks==1.2.0 465 | # via 466 | # -r requirements.txt 467 | # build 468 | python-dateutil==2.9.0.post0 469 | # via 470 | # -r requirements.txt 471 | # kubernetes 472 | # posthog 473 | python-dotenv==1.0.1 474 | # via 475 | # -r requirements.txt 476 | # pydantic-settings 477 | # uvicorn 478 | pyyaml==6.0.2 479 | # via 480 | # -r requirements.txt 481 | # chromadb 482 | # huggingface-hub 483 | # kubernetes 484 | # langchain 485 | # langchain-community 486 | # langchain-core 487 | # omegaconf 488 | # uvicorn 489 | rank-bm25==0.2.2 490 | # via -r requirements.txt 491 | regex==2024.11.6 492 | # via 493 | # -r requirements.txt 494 | # tiktoken 495 | requests==2.32.3 496 | # via 497 | # -r requirements.txt 498 | # huggingface-hub 499 | # kubernetes 500 | # langchain 501 | # langchain-community 502 | # langsmith 503 | # posthog 504 | # requests-oauthlib 505 | # requests-toolbelt 506 | # tavily-python 507 | # tiktoken 508 | requests-oauthlib==2.0.0 509 | # via 510 | # -r requirements.txt 511 | # kubernetes 512 | requests-toolbelt==1.0.0 513 | # via 514 | # -r requirements.txt 515 | # langsmith 516 | rich==13.9.4 517 | # via 518 | # -r requirements.txt 519 | # chromadb 520 | # typer 521 | rsa==4.9 522 | # via 523 | # -r requirements.txt 524 | # google-auth 525 | shellingham==1.5.4 526 | # via 527 | # -r requirements.txt 528 | # typer 529 | six==1.17.0 530 | # via 531 | # -r requirements.txt 532 | # kubernetes 533 | # posthog 534 | # python-dateutil 535 | sniffio==1.3.1 536 | # via 537 | # -r requirements.txt 538 | # anyio 539 | # openai 540 | sqlalchemy==2.0.39 541 | # via 542 | # -r requirements.txt 543 | # langchain 544 | # langchain-community 545 | sse-starlette==2.2.1 546 | # via 547 | # -r requirements.txt 548 | # mcp 549 | starlette==0.46.1 550 | # via 551 | # -r requirements.txt 552 | # fastapi 553 | # mcp 554 | # sse-starlette 555 | sympy==1.13.3 556 | # via 557 | # -r requirements.txt 558 | # onnxruntime 559 | tavily-python==0.5.1 560 | # via -r requirements.txt 561 | tenacity==9.0.0 562 | # via 563 | # -r requirements.txt 564 | # chromadb 565 | # langchain-community 566 | # langchain-core 567 | tiktoken==0.9.0 568 | # via 569 | # -r requirements.txt 570 | # langchain-openai 571 | # tavily-python 572 | tokenizers==0.21.1 573 | # via 574 | # -r requirements.txt 575 | # chromadb 576 | tqdm==4.67.1 577 | # via 578 | # -r requirements.txt 579 | # chromadb 580 | # huggingface-hub 581 | # openai 582 | typer==0.15.2 583 | # via 584 | # -r requirements.txt 585 | # chromadb 586 | typing-extensions==4.12.2 587 | # via 588 | # -r requirements.txt 589 | # anyio 590 | # chromadb 591 | # fastapi 592 | # huggingface-hub 593 | # langchain-core 594 | # openai 595 | # opentelemetry-sdk 596 | # pydantic 597 | # pydantic-core 598 | # sqlalchemy 599 | # typer 600 | # typing-inspect 601 | typing-inspect==0.9.0 602 | # via 603 | # -r requirements.txt 604 | # dataclasses-json 605 | urllib3==2.3.0 606 | # via 607 | # -r requirements.txt 608 | # kubernetes 609 | # requests 610 | uvicorn==0.34.0 611 | # via 612 | # -r requirements.txt 613 | # chromadb 614 | # mcp 615 | uvloop==0.21.0 616 | # via 617 | # -r requirements.txt 618 | # uvicorn 619 | watchfiles==1.0.4 620 | # via 621 | # -r requirements.txt 622 | # uvicorn 623 | websocket-client==1.8.0 624 | # via 625 | # -r requirements.txt 626 | # kubernetes 627 | websockets==15.0.1 628 | # via 629 | # -r requirements.txt 630 | # uvicorn 631 | wrapt==1.17.2 632 | # via 633 | # -r requirements.txt 634 | # deprecated 635 | # opentelemetry-instrumentation 636 | yarl==1.18.3 637 | # via 638 | # -r requirements.txt 639 | # aiohttp 640 | zipp==3.21.0 641 | # via 642 | # -r requirements.txt 643 | # importlib-metadata 644 | zstandard==0.23.0 645 | # via 646 | # -r requirements.txt 647 | # langsmith 648 | --------------------------------------------------------------------------------