├── .github └── workflows │ └── python-publish.yml ├── Dockerfile ├── LICENSE.txt ├── README.md ├── README_ja.md ├── README_zh.md ├── imgs ├── Paper-Arxiv-orange.svg ├── architecture.png ├── claude_desktop.jpg ├── dinggroup_out.png ├── exp_mcpbench.png ├── goose.jpg ├── logo.png └── witsy.jpg ├── pyproject.toml ├── requirements.txt ├── setup.py ├── smithery.yaml └── src └── xiyan_mcp_server ├── __init__.py ├── __main__.py ├── config_demo.yml ├── database_env.py ├── local_model ├── README.md └── local_xiyan_server.py ├── server.py └── utils ├── common_util.py ├── db_config.py ├── db_mschema.py ├── db_source.py ├── db_util.py ├── file_util.py ├── llm_util.py └── logger_util.py /.github/workflows/python-publish.yml: -------------------------------------------------------------------------------- 1 | # This workflow will upload a Python Package to PyPI when a release is created 2 | # For more information see: https://docs.github.com/en/actions/automating-builds-and-tests/building-and-testing-python#publishing-to-package-registries 3 | 4 | # This workflow uses actions that are not certified by GitHub. 5 | # They are provided by a third-party and are governed by 6 | # separate terms of service, privacy policy, and support 7 | # documentation. 8 | 9 | name: Upload Python Package 10 | 11 | on: 12 | release: 13 | types: [published] 14 | 15 | permissions: 16 | contents: read 17 | 18 | jobs: 19 | release-build: 20 | runs-on: ubuntu-latest 21 | 22 | steps: 23 | - uses: actions/checkout@v4 24 | 25 | - uses: actions/setup-python@v5 26 | with: 27 | python-version: "3.x" 28 | 29 | - name: Build release distributions 30 | run: | 31 | # NOTE: put your own distribution build steps here. 32 | python -m pip install build 33 | python -m build 34 | 35 | - name: Upload distributions 36 | uses: actions/upload-artifact@v4 37 | with: 38 | name: release-dists 39 | path: dist/ 40 | 41 | pypi-publish: 42 | runs-on: ubuntu-latest 43 | needs: 44 | - release-build 45 | permissions: 46 | # IMPORTANT: this permission is mandatory for trusted publishing 47 | id-token: write 48 | 49 | # Dedicated environments with protections for publishing are strongly recommended. 50 | # For more information, see: https://docs.github.com/en/actions/deployment/targeting-different-environments/using-environments-for-deployment#deployment-protection-rules 51 | environment: 52 | name: pypi 53 | # OPTIONAL: uncomment and update to include your PyPI project URL in the deployment status: 54 | # url: https://pypi.org/p/YOURPROJECT 55 | # 56 | # ALTERNATIVE: if your GitHub Release name is the PyPI project version string 57 | # ALTERNATIVE: exactly, uncomment the following line instead: 58 | # url: https://pypi.org/project/YOURPROJECT/${{ github.event.release.name }} 59 | 60 | steps: 61 | - name: Retrieve release distributions 62 | uses: actions/download-artifact@v4 63 | with: 64 | name: release-dists 65 | path: dist/ 66 | 67 | - name: Publish release distributions to PyPI 68 | uses: pypa/gh-action-pypi-publish@release/v1 69 | with: 70 | packages-dir: dist/ 71 | -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | # 使用Python 3.11作为基础镜像 2 | FROM python:3.11-slim 3 | 4 | # 设置工作目录 5 | WORKDIR /app 6 | 7 | #COPY requirements.txt . 8 | RUN pip install xiyan-mcp-server 9 | 10 | 11 | # 运行应用 12 | CMD ["python", "-m", "xiyan_mcp_server"] -------------------------------------------------------------------------------- /LICENSE.txt: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "[]" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright [yyyy] [name of copyright owner] 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | 2 |

XiYan MCP Server

3 |

4 | MCP Playwright 5 |

6 |

7 | A Model Context Protocol (MCP) server that enables natural language queries to databases
8 | powered by XiYan-SQL, SOTA of text-to-sql on open benchmarks 9 |

10 |

11 | 💻 XiYan-mcp-server | 12 | 🌐 XiYan-SQL | 13 | 📖 Arxiv | 14 | 📄 PapersWithCode 15 | 🤗 HuggingFace | 16 | 🤖 ModelScope | 17 | 🌕 析言GBI 18 |
19 | MCP Server 20 | 21 | 22 | License: Apache 2.0 23 | 24 | PyPI Downloads 25 | Smithery Installs 26 | 27 | GitHub stars 28 | 29 |
30 | English | 中文 | 日本語
31 | Ding Group钉钉群| 32 | Follow me on Weibo 33 |

34 | 35 | 36 | ## Table of Contents 37 | 38 | - [Features](#features) 39 | - [Preview](#preview) 40 | - [Architecture](#architecture) 41 | - [Best Practice](#best-practice) 42 | - [Tools Preview](#tools-preview) 43 | - [Installation](#installation) 44 | - [Installing from pip](#installing-from-pip) 45 | - [Installing from Smithery.ai](#installing-from-smitheryai) 46 | - [Configuration](#configuration) 47 | - [LLM Configuration](#llm-configuration) 48 | - [General LLMs](#general-llms) 49 | - [Text-to-SQL SOTA model](#text-to-sql-sota-model) 50 | - [Local Model](#local-model) 51 | - [Database Configuration](#database-configuration) 52 | - [MySQL](#mysql) 53 | - [PostgreSQL](#postgresql) 54 | - [Launch](#launch) 55 | - [Claude Desktop](#claude-desktop) 56 | - [Cline](#cline) 57 | - [Goose](#goose) 58 | - [Cursor](#cursor) 59 | - [It Does Not Work](#it-does-not-work) 60 | - [Citation](#citation) 61 | 62 | 63 | ## Features 64 | - 🌐 Fetch data by natural language through [XiYanSQL](https://github.com/XGenerationLab/XiYan-SQL) 65 | - 🤖 Support general LLMs (GPT,qwenmax), Text-to-SQL SOTA model 66 | - 💻 Support pure local mode (high security!) 67 | - 📝 Support MySQL and PostgreSQL. 68 | - 🖱️ List available tables as resources 69 | - 🔧 Read table contents 70 | 71 | ## Preview 72 | ### Architecture 73 | There are two ways to integrate this server in your project, as shown below: 74 | The left is remote mode, which is the default mode. It requires an API key to access the xiyanSQL-qwencoder-32B model from service provider (see [Configuration](#Configuration)). 75 | Another mode is local mode, which is more secure. It does not require the API key. 76 | 77 | ![architecture.png](imgs/architecture.png) 78 | ### Best practice and reports 79 | 80 | ["Build a local data assistant using MCP + Modelscope API-Inference without writing a single line of code"](https://mp.weixin.qq.com/s/tzDelu0W4w6t9C0_yYRbHA) 81 | 82 | ["Xiyan MCP on Modelscope"](https://modelscope.cn/headlines/article/1142) 83 | 84 | ### Evaluation on MCPBench 85 | The following figure illustrates the performance of the XiYan MCP server as measured by the MCPBench benchmark. The XiYan MCP server demonstrates superior performance compared to both the MySQL MCP server and the PostgreSQL MCP server, achieving a lead of 2-22 percentage points. The detailed experiment results can be found at [MCPBench](https://github.com/modelscope/MCPBench) and the report ["Evaluation Report on MCP Servers"](https://arxiv.org/abs/2504.11094). 86 | 87 | ![exp_mcpbench.png](imgs/exp_mcpbench.png) 88 | 89 | ### Tools Preview 90 | - The tool ``get_data`` provides a natural language interface for retrieving data from a database. This server will convert the input natural language into SQL using a built-in model and call the database to return the query results. 91 | 92 | - The ``{dialect}://{table_name}`` resource allows obtaining a portion of sample data from the database for model reference when a specific table_name is specified. 93 | - The ``{dialect}://`` resource will list the names of the current databases 94 | 95 | ## Installation 96 | ### Installing from pip 97 | 98 | Python 3.11+ is required. 99 | You can install the server through pip, and it will install the latest version: 100 | 101 | ```shell 102 | pip install xiyan-mcp-server 103 | ``` 104 | 105 | If you want to install the development version from source, you can install from source code on github: 106 | ```shell 107 | pip install git+https://github.com/XGenerationLab/xiyan_mcp_server.git 108 | ``` 109 | 110 | ### Installing from Smithery.ai 111 | See [@XGenerationLab/xiyan_mcp_server](https://smithery.ai/server/@XGenerationLab/xiyan_mcp_server) 112 | 113 | Not fully tested. 114 | 115 | ## Configuration 116 | 117 | You need a YAML config file to configure the server. 118 | A default config file is provided in config_demo.yml which looks like this: 119 | 120 | ```yaml 121 | mcp: 122 | transport: "stdio" 123 | model: 124 | name: "XGenerationLab/XiYanSQL-QwenCoder-32B-2412" 125 | key: "" 126 | url: "https://api-inference.modelscope.cn/v1/" 127 | database: 128 | host: "localhost" 129 | port: 3306 130 | user: "root" 131 | password: "" 132 | database: "" 133 | ``` 134 | 135 | ### MCP Configuration 136 | You can set the transport protocol to ``stdio`` or ``sse``. 137 | #### STDIO 138 | For stdio protocol, you can set just like this: 139 | ```yaml 140 | mcp: 141 | transport: "stdio" 142 | ``` 143 | #### SSE 144 | For sse protocol, you can set mcp config as below: 145 | ```yaml 146 | mcp: 147 | transport: "sse" 148 | port: 8000 149 | log_level: "INFO" 150 | ``` 151 | The default port is `8000`. You can change the port if needed. 152 | The default log level is `ERROR`. We recommend to set log level to `INFO` for more detailed information. 153 | 154 | Other configurations like `debug`, `host`, `sse_path`, `message_path` can be customized as well, but normally you don't need to modify them. 155 | 156 | ### LLM Configuration 157 | ``Name`` is the name of the model to use, ``key`` is the API key of the model, ``url`` is the API url of the model. We support following models. 158 | 159 | | versions | general LLMs(GPT,qwenmax) | SOTA model by Modelscope | SOTA model by Dashscope | Local LLMs | 160 | |----------|-------------------------------|--------------------------------------------|-----------------------------------------------------------|-----------------------| 161 | | description| basic, easy to use | best performance, stable, recommand | best performance, for trial | slow, high-security | 162 | | name | the official model name (e.g. gpt-3.5-turbo,qwen-max) | XGenerationLab/XiYanSQL-QwenCoder-32B-2412 | xiyansql-qwencoder-32b | xiyansql-qwencoder-3b | 163 | | key | the API key of the service provider (e.g. OpenAI, Alibaba Cloud) | the API key of modelscope | the API key via email | "" | 164 | | url | the endpoint of the service provider (e.g."https://api.openai.com/v1") | https://api-inference.modelscope.cn/v1/ | https://xiyan-stream.biz.aliyun.com/service/api/xiyan-sql | http://localhost:5090 | 165 | 166 | #### General LLMs 167 | If you want to use the general LLMs, e.g. gpt3.5, you can directly config like this: 168 | ```yaml 169 | model: 170 | name: "gpt-3.5-turbo" 171 | key: "YOUR KEY " 172 | url: "https://api.openai.com/v1" 173 | database: 174 | ``` 175 | 176 | If you want to use Qwen from Alibaba, e.g. Qwen-max, you can use following config: 177 | ```yaml 178 | model: 179 | name: "qwen-max" 180 | key: "YOUR KEY " 181 | url: "https://dashscope.aliyuncs.com/compatible-mode/v1" 182 | database: 183 | ``` 184 | #### Text-to-SQL SOTA model 185 | We recommend the XiYanSQL-qwencoder-32B (https://github.com/XGenerationLab/XiYanSQL-QwenCoder), which is the SOTA model in text-to-sql, see [Bird benchmark](https://bird-bench.github.io/). 186 | There are two ways to use the model. You can use either of them. 187 | (1) [Modelscope](https://www.modelscope.cn/models/XGenerationLab/XiYanSQL-QwenCoder-32B-2412), (2) Alibaba Cloud DashScope. 188 | 189 | 190 | ##### (1) Modelscope version 191 | You need to apply a ``key`` of API-inference from Modelscope, https://www.modelscope.cn/docs/model-service/API-Inference/intro 192 | Then you can use the following config: 193 | ```yaml 194 | model: 195 | name: "XGenerationLab/XiYanSQL-QwenCoder-32B-2412" 196 | key: "" 197 | url: "https://api-inference.modelscope.cn/v1/" 198 | ``` 199 | 200 | Read our [model description](https://www.modelscope.cn/models/XGenerationLab/XiYanSQL-QwenCoder-32B-2412) for more details. 201 | 202 | ##### (2) Dashscope version 203 | 204 | We deployed the model on Alibaba Cloud DashScope, so you need to set the following environment variables: 205 | Send me your email to get the ``key``. ( godot.lzl@alibaba-inc.com ) 206 | In the email, please attach the following information: 207 | ```yaml 208 | name: "YOUR NAME", 209 | email: "YOUR EMAIL", 210 | organization: "your college or Company or Organization" 211 | ``` 212 | We will send you a ``key`` according to your email. And you can fill the ``key`` in the yml file. 213 | The ``key`` will be expired by 1 month or 200 queries or other legal restrictions. 214 | 215 | 216 | ```yaml 217 | model: 218 | name: "xiyansql-qwencoder-32b" 219 | key: "KEY" 220 | url: "https://xiyan-stream.biz.aliyun.com/service/api/xiyan-sql" 221 | ``` 222 | 223 | Note: this model service is just for trial, if you need to use it in production, please contact us. 224 | 225 | ##### (3) Local version 226 | Alternatively, you can also deploy the model [XiYanSQL-qwencoder-32B](https://github.com/XGenerationLab/XiYanSQL-QwenCoder) on your own server. 227 | See [Local Model](src/xiyan_mcp_server/local_model/README.md) for more details. 228 | 229 | 230 | ### Database Configuration 231 | ``host``, ``port``, ``user``, ``password``, ``database`` are the connection information of the database. 232 | 233 | You can use local or any remote databases. Now we support MySQL and PostgreSQL(more dialects soon). 234 | 235 | #### MySQL 236 | 237 | ```yaml 238 | database: 239 | host: "localhost" 240 | port: 3306 241 | user: "root" 242 | password: "" 243 | database: "" 244 | ``` 245 | #### PostgreSQL 246 | Step 1: Install Python packages 247 | ```bash 248 | pip install psycopg2 249 | ``` 250 | Step 2: prepare the config.yml like this: 251 | ```yaml 252 | database: 253 | dialect: "postgresql" 254 | host: "localhost" 255 | port: 5432 256 | user: "" 257 | password: "" 258 | database: "" 259 | ``` 260 | 261 | Note that ``dialect`` should be ``postgresql`` for postgresql. 262 | ## Launch 263 | 264 | ### Server Launch 265 | 266 | If you want to launch server with `sse`, you have to run the following command in a terminal: 267 | ```shell 268 | YML=path/to/yml python -m xiyan_mcp_server 269 | ``` 270 | Then you should see the information on http://localhost:8000/sse in your browser. (Defaultly, change if your mcp server runs on other host/port) 271 | 272 | Otherwise, if you use `stdio` transport protocol, you usually declare the mcp server command in specific mcp application instead of launching it in a terminal. 273 | However, you can still debug with this command if needed. 274 | 275 | ### Client Setting 276 | 277 | #### Claude Desktop 278 | Add this in your Claude Desktop config file, ref Claude Desktop config example 279 | ```json 280 | { 281 | "mcpServers": { 282 | "xiyan-mcp-server": { 283 | "command": "/xxx/python", 284 | "args": [ 285 | "-m", 286 | "xiyan_mcp_server" 287 | ], 288 | "env": { 289 | "YML": "PATH/TO/YML" 290 | } 291 | } 292 | } 293 | } 294 | ``` 295 | **Please note that the Python command here requires the complete path to the Python executable (`/xxx/python`); otherwise, the Python interpreter cannot be found. You can determine this path by using the command `which python`. The same applies to other applications as well.** 296 | 297 | Claude Desktop currently does not support the SSE transport protocol. 298 | 299 | #### Cline 300 | Prepare the config like [Claude Desktop](#claude-desktop) 301 | 302 | #### Goose 303 | If you use `stdio`, add following command in the config, ref Goose config example 304 | ```shell 305 | env YML=path/to/yml /xxx/python -m xiyan_mcp_server 306 | ``` 307 | Otherwise, if you use `sse`, change Type to `SSE` and set the endpoint to `http://127.0.0.1:8000/sse` 308 | #### Cursor 309 | Use the similar command as follows. 310 | 311 | For `stdio`: 312 | ```json 313 | { 314 | "mcpServers": { 315 | "xiyan-mcp-server": { 316 | "command": "/xxx/python", 317 | "args": [ 318 | "-m", 319 | "xiyan_mcp_server" 320 | ], 321 | "env": { 322 | "YML": "path/to/yml" 323 | } 324 | } 325 | } 326 | } 327 | ``` 328 | For `sse`: 329 | ```json 330 | { 331 | "mcpServers": { 332 | "xiyan_mcp_server_1": { 333 | "url": "http://localhost:8000/sse" 334 | } 335 | } 336 | } 337 | ``` 338 | 339 | 340 | #### Witsy 341 | Add following in command: 342 | ```shell 343 | /xxx/python -m xiyan_mcp_server 344 | ``` 345 | Add an env: key is YML and value is the path to your yml file. 346 | Ref Witsy config example 347 | 348 | 349 | ## It Does Not Work! 350 | Contact us: 351 | Ding Group钉钉群| 352 | Follow me on Weibo 353 | 354 | 355 | ## Other Related Links 356 | 357 | [![MseeP.ai Security Assessment Badge](https://mseep.net/pr/xgenerationlab-xiyan-mcp-server-badge.png)](https://mseep.ai/app/xgenerationlab-xiyan-mcp-server) 358 | 359 | 360 | 361 | 362 | ## Citation 363 | If you find our work helpful, feel free to give us a cite. 364 | ```bib 365 | @article{xiyansql, 366 | title={A Preview of XiYan-SQL: A Multi-Generator Ensemble Framework for Text-to-SQL}, 367 | author={Yingqi Gao and Yifu Liu and Xiaoxia Li and Xiaorong Shi and Yin Zhu and Yiming Wang and Shiqi Li and Wei Li and Yuntao Hong and Zhiling Luo and Jinyang Gao and Liyu Mou and Yu Li}, 368 | year={2024}, 369 | journal={arXiv preprint arXiv:2411.08599}, 370 | url={https://arxiv.org/abs/2411.08599}, 371 | primaryClass={cs.AI} 372 | } 373 | ``` 374 | -------------------------------------------------------------------------------- /README_ja.md: -------------------------------------------------------------------------------- 1 | # XiYan MCP サーバー 2 | 3 |

4 | MCP Playwright 5 |

6 |

7 | 自然言語でデータベースにクエリを実行できるモデルコンテキストプロトコル(MCP)サーバー
8 | オープンベンチマークでのテキストからSQLへのSOTAを実現するXiYan-SQLによって提供されています 9 |

10 | 11 |

12 | 💻 XiYan-mcp-server | 13 | 🌐 XiYan-SQL | 14 | 📖 Arxiv | 15 | 📄 PapersWithCode 16 | 💻 HuggingFace | 17 | 🤖 ModelScope | 18 | 🌕 析言GBI 19 |
20 | MCP Server 21 | 22 | 23 | License: Apache 2.0 24 | 25 | PyPI Downloads 26 | Smithery Installs 27 | 28 | GitHub stars 29 | 30 |
31 | 英語 | 中国語
32 | Ding Group | 33 | Weiboでフォロー 34 |

35 | 36 | ## 目次 37 | 38 | - [特徴](#特徴) 39 | - [プレビュー](#プレビュー) 40 | - [アーキテクチャ](#アーキテクチャ) 41 | - [ベストプラクティス](#ベストプラクティス) 42 | - [ツールプレビュー](#ツールプレビュー) 43 | - [インストール](#インストール) 44 | - [pipからのインストール](#pipからのインストール) 45 | - [Smithery.aiからのインストール](#smitheryaiからのインストール) 46 | - [設定](#設定) 47 | - [LLM設定](#llm設定) 48 | - [一般的なLLM](#一般的なllm) 49 | - [テキストからSQLへのSOTAモデル](#テキストからsqlへのsotaモデル) 50 | - [ローカルモデル](#ローカルモデル) 51 | - [データベース設定](#データベース設定) 52 | - [MySQL](#mysql) 53 | - [PostgreSQL](#postgresql) 54 | - [起動](#起動) 55 | - [Claude Desktop](#claude-desktop) 56 | - [Cline](#cline) 57 | - [Goose](#goose) 58 | - [Cursor](#cursor) 59 | - [動作しない場合](#動作しない場合) 60 | - [引用](#引用) 61 | 62 | ## 特徴 63 | - 🌐 [XiYanSQL](https://github.com/XGenerationLab/XiYan-SQL)を通じて自然言語でデータを取得 64 | - 🤖 一般的なLLM(GPT, qwenmax)、テキストからSQLへのSOTAモデルをサポート 65 | - 💻 純粋なローカルモードをサポート(高セキュリティ!) 66 | - 📝 MySQLとPostgreSQLをサポート 67 | - 🖱️ 利用可能なテーブルをリソースとしてリスト 68 | - 🔧 テーブル内容を読み取る 69 | 70 | ## プレビュー 71 | ### アーキテクチャ 72 | このサーバーをプロジェクトに統合する方法は2つあります。以下の図に示されています: 73 | 左側はリモートモードで、デフォルトモードです。サービスプロバイダーからxiyanSQL-qwencoder-32BモデルにアクセスするためにAPIキーが必要です([設定](#設定)を参照)。 74 | もう一つのモードはローカルモードで、より安全です。APIキーは必要ありません。 75 | 76 | ![architecture.png](imgs/architecture.png) 77 | 78 | ### ベストプラクティスとレポート 79 | 80 | ["MCP + Modelscope API-Inferenceを使用して、コードを一行も書かずにローカルデータアシスタントを構築する"](https://mp.weixin.qq.com/s/tzDelu0W4w6t9C0_yYRbHA) 81 | 82 | ["Modelscope上のXiyan MCP"](https://modelscope.cn/headlines/article/1142) 83 | 84 | ### MCPBenchでの評価 85 | 以下の図は、MCPBenchベンチマークで測定されたXiYan MCPサーバーのパフォーマンスを示しています。XiYan MCPサーバーは、MySQL MCPサーバーおよびPostgreSQL MCPサーバーと比較して優れたパフォーマンスを示し、2〜22パーセントポイントのリードを達成しています。詳細な実験結果は[MCPBench](https://github.com/modelscope/MCPBench)およびレポート["MCPサーバーの評価レポート"](https://arxiv.org/abs/2504.11094)で確認できます。 86 | 87 | ![exp_mcpbench.png](imgs/exp_mcpbench.png) 88 | 89 | ### ツールプレビュー 90 | - ツール``get_data``は、データベースからデータを取得するための自然言語インターフェースを提供します。このサーバーは、入力された自然言語をSQLに変換し、データベースを呼び出してクエリ結果を返します。 91 | 92 | - ``{dialect}://{table_name}``リソースは、特定のtable_nameを指定した場合に、データベースからモデル参照用のサンプルデータの一部を取得することを許可します。 93 | - ``{dialect}://``リソースは、現在のデータベースの名前をリストします。 94 | 95 | ## インストール 96 | ### pipからのインストール 97 | 98 | Python 3.11以上が必要です。 99 | pipを通じてサーバーをインストールできます。最新バージョンがインストールされます: 100 | 101 | ```bash 102 | pip install xiyan-mcp-server 103 | ``` 104 | 105 | その後、以下のコマンドでサーバーを直接実行できます: 106 | ```bash 107 | python -m xiyan_mcp_server 108 | ``` 109 | ただし、以下の設定を完了するまで機能は提供されません。 110 | ymlファイルを取得します。その後、以下の方法でサーバーを実行できます: 111 | ```yaml 112 | env YML=path/to/yml python -m xiyan_mcp_server 113 | ``` 114 | 115 | ### Smithery.aiからのインストール 116 | [@XGenerationLab/xiyan_mcp_server](https://smithery.ai/server/@XGenerationLab/xiyan_mcp_server)を参照 117 | 118 | 完全にはテストされていません。 119 | 120 | ## 設定 121 | 122 | サーバーを設定するには、YAML設定ファイルが必要です。 123 | デフォルトの設定ファイルconfig_demo.ymlが提供されています。内容は以下の通りです: 124 | 125 | ```yaml 126 | model: 127 | name: "XGenerationLab/XiYanSQL-QwenCoder-32B-2412" 128 | key: "" 129 | url: "https://api-inference.modelscope.cn/v1/" 130 | 131 | database: 132 | host: "localhost" 133 | port: 3306 134 | user: "root" 135 | password: "" 136 | database: "" 137 | ``` 138 | 139 | ### LLM設定 140 | ``Name``は使用するモデルの名前、``key``はモデルのAPIキー、``url``はモデルのAPIアドレスです。以下のモデルをサポートしています。 141 | 142 | | バージョン | 一般的なLLM(GPT, qwenmax) | ModelscopeによるSOTAモデル | DashscopeによるSOTAモデル | ローカルLLM | 143 | |----------|-------------------------------|-----------------------------|----------------------------|----------------| 144 | | 説明 | 基本的で使いやすい | 最高のパフォーマンス、安定、推奨 | 最高のパフォーマンス、試用用 | 遅い、高セキュリティ | 145 | | 名前 | 公式モデル名(例:gpt-3.5-turbo, qwen-max) | XGenerationLab/XiYanSQL-QwenCoder-32B-2412 | xiyansql-qwencoder-32b | xiyansql-qwencoder-3b | 146 | | キー | サービスプロバイダーのAPIキー(例:OpenAI, Alibaba Cloud) | ModelscopeのAPIキー | メールで取得するAPIキー | "" | 147 | | URL | サービスプロバイダーのエンドポイント(例:"https://api.openai.com/v1") | https://api-inference.modelscope.cn/v1/ | https://xiyan-stream.biz.aliyun.com/service/api/xiyan-sql | http://localhost:5090 | 148 | 149 | #### 一般的なLLM 150 | 一般的なLLMを使用する場合、例:gpt3.5、以下のように設定できます: 151 | ```yaml 152 | model: 153 | name: "gpt-3.5-turbo" 154 | key: "YOUR KEY" 155 | url: "https://api.openai.com/v1" 156 | database: 157 | ``` 158 | 159 | AlibabaのQwenを使用する場合、例:Qwen-max、以下の設定を使用できます: 160 | ```yaml 161 | model: 162 | name: "qwen-max" 163 | key: "YOUR KEY" 164 | url: "https://dashscope.aliyuncs.com/compatible-mode/v1" 165 | database: 166 | ``` 167 | #### テキストからSQLへのSOTAモデル 168 | テキストからSQLへのSOTAモデルであるXiYanSQL-qwencoder-32B(https://github.com/XGenerationLab/XiYanSQL-QwenCoder)を推奨します。以下の2つの方法でモデルを使用できます。 169 | (1) [Modelscope](https://www.modelscope.cn/models/XGenerationLab/XiYanSQL-QwenCoder-32B-2412)、(2) Alibaba Cloud DashScope。 170 | 171 | ##### (1) Modelscopeバージョン 172 | ModelscopeからAPI推論の``key``を申請する必要があります。https://www.modelscope.cn/docs/model-service/API-Inference/intro 173 | その後、以下の設定を使用できます: 174 | ```yaml 175 | model: 176 | name: "XGenerationLab/XiYanSQL-QwenCoder-32B-2412" 177 | key: "" 178 | url: "https://api-inference.modelscope.cn/v1/" 179 | ``` 180 | 181 | 詳細については、[モデルの説明](https://www.modelscope.cn/models/XGenerationLab/XiYanSQL-QwenCoder-32B-2412)を参照してください。 182 | 183 | ##### (2) Dashscopeバージョン 184 | 185 | Alibaba Cloud DashScopeにモデルをデプロイしましたので、以下の環境変数を設定する必要があります: 186 | ``key``を取得するためにメールを送信してください。(godot.lzl@alibaba-inc.com) 187 | メールには以下の情報を添付してください: 188 | ```yaml 189 | name: "YOUR NAME", 190 | email: "YOUR EMAIL", 191 | organization: "your college or Company or Organization" 192 | ``` 193 | メールに基づいて``key``を送信します。ymlファイルに``key``を記入できます。 194 | ``key``は1ヶ月または200クエリまたはその他の法的制限で期限切れになります。 195 | 196 | ```yaml 197 | model: 198 | name: "xiyansql-qwencoder-32b" 199 | key: "KEY" 200 | url: "https://xiyan-stream.biz.aliyun.com/service/api/xiyan-sql" 201 | database: 202 | ``` 203 | 204 | 注意:このモデルサービスは試用のみです。生産で使用する場合は、私たちに連絡してください。 205 | 206 | または、モデル[XiYanSQL-qwencoder-32B](https://github.com/XGenerationLab/XiYanSQL-QwenCoder)を自分のサーバーにデプロイすることもできます。 207 | 208 | #### ローカルモデル 209 | 注意:ローカルモデルは遅いです(私のMacBookではクエリごとに約12秒かかります)。 210 | 安定して高速なサービスが必要な場合は、Modelscopeバージョンを使用することをお勧めします。 211 | 212 | ローカルモードでxiyan_mcp_serverを実行するには、以下が必要です: 213 | 1)少なくとも16GBのRAMを持つPC/Mac 214 | 2)6GBのディスクスペース 215 | 216 | ステップ1:追加のPythonパッケージをインストール 217 | ```bash 218 | pip install flask modelscope torch==2.2.2 accelerate>=0.26.0 numpy=2.2.3 219 | ``` 220 | 221 | ステップ2:(オプション)モデルを手動でダウンロード 222 | [xiyansql-qwencoder-3b](https://www.modelscope.cn/models/XGenerationLab/XiYanSQL-QwenCoder-3B-2502/)をお勧めします。 223 | 以下のコマンドでモデルを手動でダウンロードできます: 224 | ```bash 225 | modelscope download --model XGenerationLab/XiYanSQL-QwenCoder-3B-2502 226 | ``` 227 | 6GBのディスクスペースが必要です。 228 | 229 | ステップ3:スクリプトをダウンロードしてサーバーを実行します。ファイルsrc/xiyan_mcp_server/local_xiyan_server.py 230 | 231 | ```bash 232 | python local_xiyan_server.py 233 | ``` 234 | サーバーはhttp://localhost:5090/で実行されます。 235 | 236 | ステップ4:設定を準備してxiyan_mcp_serverを実行 237 | config.ymlは以下のようになります: 238 | ```yaml 239 | model: 240 | name: "xiyansql-qwencoder-3b" 241 | key: "KEY" 242 | url: "http://127.0.0.1:5090" 243 | ``` 244 | 245 | これでローカルモードの準備が整いました。 246 | 247 | ### データベース設定 248 | ``host``、``port``、``user``、``password``、``database``はデータベースの接続情報です。 249 | 250 | ローカルまたは任意のリモートデータベースを使用できます。現在、MySQLとPostgreSQLをサポートしています(他の方言も近日中にサポート予定)。 251 | 252 | #### MySQL 253 | 254 | ```yaml 255 | database: 256 | host: "localhost" 257 | port: 3306 258 | user: "root" 259 | password: "" 260 | database: "" 261 | ``` 262 | #### PostgreSQL 263 | ステップ1:Pythonパッケージをインストール 264 | ```bash 265 | pip install psycopg2 266 | ``` 267 | ステップ2:config.ymlを以下のように準備します: 268 | ```yaml 269 | database: 270 | dialect: "postgresql" 271 | host: "localhost" 272 | port: 5432 273 | user: "" 274 | password: "" 275 | database: "" 276 | ``` 277 | 278 | 注意:PostgreSQLの場合、``dialect``は``postgresql``である必要があります。 279 | 280 | ## 起動 281 | ### Claude Desktop 282 | Claude Desktopの設定ファイルに以下を追加します。Claude Desktop設定例を参照 283 | ```json 284 | { 285 | "mcpServers": { 286 | "xiyan-mcp-server": { 287 | "command": "python", 288 | "args": [ 289 | "-m", 290 | "xiyan_mcp_server" 291 | ], 292 | "env": { 293 | "YML": "PATH/TO/YML" 294 | } 295 | } 296 | } 297 | } 298 | ``` 299 | ### Cline 300 | [Claude Desktop](#claude-desktop)の設定を準備 301 | 302 | ### Goose 303 | 設定に以下のコマンドを追加します。Goose設定例を参照 304 | 305 | ```yaml 306 | env YML=path/to/yml python -m xiyan_mcp_server 307 | ``` 308 | ### Cursor 309 | [Goose](#goose)と同じコマンドを使用 310 | 311 | ### Witsy 312 | コマンドに以下を追加: 313 | ```yaml 314 | python -m xiyan_mcp_server 315 | ``` 316 | 環境変数を追加:キーはYML、値はymlファイルのパス。 317 | Witsy設定例を参照 318 | 319 | ## 動作しない場合 320 | お問い合わせ: 321 | Ding Group | 322 | Weiboでフォロー 323 | 324 | ## 引用 325 | 私たちの仕事が役立つと思われる場合は、自由に引用してください。 326 | ```bib 327 | @article{xiyansql, 328 | title={A Preview of XiYan-SQL: A Multi-Generator Ensemble Framework for Text-to-SQL}, 329 | author={Yingqi Gao and Yifu Liu and Xiaoxia Li and Xiaorong Shi and Yin Zhu and Yiming Wang and Shiqi Li and Wei Li and Yuntao Hong and Zhiling Luo and Jinyang Gao and Liyu Mou and Yu Li}, 330 | year={2024}, 331 | journal={arXiv preprint arXiv:2411.08599}, 332 | url={https://arxiv.org/abs/2411.08599}, 333 | primaryClass={cs.AI} 334 | } 335 | ``` 336 | -------------------------------------------------------------------------------- /README_zh.md: -------------------------------------------------------------------------------- 1 | # XiYan MCP 服务器 2 | 3 |

4 | MCP Playwright 5 |

6 |

7 | 一种模型上下文协议(MCP)服务器,支持通过自然语言查询数据库
8 | XiYan-SQL提供技术支持,该项目在开放基准上实现了文本到SQL的最好性能 9 |

10 | 11 |

12 | 💻 XiYan-mcp-server | 13 | 🌐 XiYan-SQL | 14 | 📖 Arxiv | 15 | 📄 PapersWithCode 16 | 💻 HuggingFace | 17 | 🤖 ModelScope | 18 | 🌕 析言GBI 19 |
20 | MCP Server 21 | 22 | 23 | License: Apache 2.0 24 | 25 | PyPI Downloads 26 | Smithery Installs 27 | 28 | GitHub stars 29 | 30 |
31 | 英文 | 中文 | 日本語
32 | 钉钉群 | 33 | 关注我 34 |

35 | 36 | ## 目录 37 | 38 | - [特性](#特性) 39 | - [预览](#预览) 40 | - [架构](#架构) 41 | - [最佳实践](#最佳实践) 42 | - [工具预览](#工具预览) 43 | - [安装](#安装) 44 | - [从 pip 安装](#从-pip-安装) 45 | - [从 Smithery.ai 安装](#从-smitheryai-安装) 46 | - [配置](#配置) 47 | - [LLM 配置](#llm-配置) 48 | - [通用 LLMs](#通用-llms) 49 | - [Text-to-SQL 最新模型](#text-to-sql-最新模型) 50 | - [本地模型](#本地模型) 51 | - [数据库配置](#数据库配置) 52 | - [MySQL](#mysql) 53 | - [PostgreSQL](#postgresql) 54 | - [启动](#启动) 55 | - [Claude Desktop](#claude-desktop) 56 | - [Cline](#cline) 57 | - [Goose](#goose) 58 | - [Cursor](#cursor) 59 | - [它不起作用](#它不起作用) 60 | - [引用](#引用) 61 | 62 | ## 特性 63 | - 🌐 通过 [XiYanSQL](https://github.com/XGenerationLab/XiYan-SQL) 使用自然语言获取数据 64 | - 🤖 支持通用 LLMs(如 GPT, qwenmax),文本到 SQL 最新模型 65 | - 💻 支持纯本地模式(高安全性!) 66 | - 📝 支持 MySQL 和 PostgreSQL。 67 | - 🖱️ 列出可用表作为资源 68 | - 🔧 读取表内容 69 | 70 | ## 预览 71 | ### 架构 72 | 有两种方式可以将该服务器集成到您的项目中,如下图所示: 73 | 左侧是远程模式,这是默认模式。它需要 API 密钥来访问服务提供商的 xiyanSQL-qwencoder-32B 模型(请参阅[配置](#配置))。 74 | 另一种模式是本地模式,更加安全,不需要 API 密钥。 75 | 76 | ![architecture.png](imgs/architecture.png) 77 | 78 | ### 最佳实践和报告 79 | 80 | ["使用 MCP + Modelscope API 推理构建本地数据助手,无需编写一行代码"](https://mp.weixin.qq.com/s/tzDelu0W4w6t9C0_yYRbHA) 81 | 82 | ["Modelscope 上的 Xiyan MCP"](https://modelscope.cn/headlines/article/1142) 83 | 84 | ### 在 MCPBench 上的评估 85 | 下图展示了 XiYan MCP 服务在 MCPBench 基准测试中的表现。XiYan MCP 服务器的性能优于 MySQL MCP 服务和 PostgreSQL MCP 服务,领先 2-22 个百分点。详细的实验结果可以在 [MCPBench](https://github.com/modelscope/MCPBench) 和报告 ["MCP 服务器评估报告"](https://arxiv.org/abs/2504.11094) 中找到。 86 | 87 | ![exp_mcpbench.png](imgs/exp_mcpbench.png) 88 | 89 | ### 工具预览 90 | - 工具 ``get_data`` 提供了一个自然语言接口,用于从数据库中检索数据。该服务器将输入的自然语言转换为 SQL,并调用数据库返回查询结果。 91 | 92 | - ``{dialect}://{table_name}`` 资源允许在指定特定的 table_name 时从数据库中获取部分样本数据以供模型参考。 93 | - ``{dialect}://`` 资源将列出当前数据库的名称。 94 | 95 | ## 安装 96 | ### 从 pip 安装 97 | 98 | 要求 Python 3.11 或更高版本。 99 | 您可以通过 pip 安装服务器,它将安装最新版本: 100 | 101 | ```bash 102 | pip install xiyan-mcp-server 103 | ``` 104 | 105 | 安装后,您可以直接通过以下命令运行服务器: 106 | ```bash 107 | python -m xiyan_mcp_server 108 | ``` 109 | 但在您完成以下配置之前,它不会提供任何功能。 110 | 您将获得一个 yml 文件。然后您可以通过以下方式运行服务器: 111 | ```yaml 112 | env YML=path/to/yml python -m xiyan_mcp_server 113 | ``` 114 | 115 | ### 从 Smithery.ai 安装 116 | 请参见 [@XGenerationLab/xiyan_mcp_server](https://smithery.ai/server/@XGenerationLab/xiyan_mcp_server) 117 | 118 | 未进行全面测试。 119 | 120 | ## 配置 121 | 122 | 您需要一个 YAML 配置文件来配置服务器。 123 | 提供了一个默认配置文件 config_demo.yml,内容如下: 124 | 125 | ```yaml 126 | model: 127 | name: "XGenerationLab/XiYanSQL-QwenCoder-32B-2412" 128 | key: "" 129 | url: "https://api-inference.modelscope.cn/v1/" 130 | 131 | database: 132 | host: "localhost" 133 | port: 3306 134 | user: "root" 135 | password: "" 136 | database: "" 137 | ``` 138 | 139 | ### LLM 配置 140 | ``Name`` 是要使用的模型名称,``key`` 是模型的 API 密钥,``url`` 是模型的 API 地址。我们支持以下模型。 141 | 142 | | 版本 | 通用 LLMs (GPT, qwenmax) | Modelscope 最新模型 | Dashscope 最新模型 | 本地 LLMs | 143 | |----------|------------------------------------|-----------------------------|----------------------------------|----------------| 144 | | 描述 | 基础,易于使用 | 性能最好,稳定,推荐 | 性能最好,供试用 | 速度慢,高安全性 | 145 | | 名称 | 官方模型名称(例如 gpt-3.5-turbo, qwen-max) | XGenerationLab/XiYanSQL-QwenCoder-32B-2412 | xiyansql-qwencoder-32b | xiyansql-qwencoder-3b | 146 | | 密钥 | 服务提供商的 API 密钥(例如 OpenAI, 阿里云) | modelscope 的 API 密钥 | 通过电子邮件获取的 API 密钥 | "" | 147 | | URL | 服务提供商的端点(例如 "https://api.openai.com/v1") | https://api-inference.modelscope.cn/v1/ | https://xiyan-stream.biz.aliyun.com/service/api/xiyan-sql | http://localhost:5090 | 148 | 149 | #### 通用 LLMs 150 | 如果您想使用通用 LLMs,如 gpt3.5,您可以直接像这样配置: 151 | ```yaml 152 | model: 153 | name: "gpt-3.5-turbo" 154 | key: "YOUR KEY " 155 | url: "https://api.openai.com/v1" 156 | database: 157 | ``` 158 | 159 | 如果您想使用来自阿里巴巴的 Qwen,比如 Qwen-max,您可以使用以下配置: 160 | ```yaml 161 | model: 162 | name: "qwen-max" 163 | key: "YOUR KEY " 164 | url: "https://dashscope.aliyuncs.com/compatible-mode/v1" 165 | database: 166 | ``` 167 | #### Text-to-SQL 最新模型 168 | 我们推荐 XiYanSQL-qwencoder-32B(https://github.com/XGenerationLab/XiYanSQL-QwenCoder),这是文本到 SQL 的最新模型,参见 [Bird benchmark](https://bird-bench.github.io/)。 169 | 您可以有两种方式使用该模型: 170 | (1) [Modelscope](https://www.modelscope.cn/models/XGenerationLab/XiYanSQL-QwenCoder-32B-2412), (2) 阿里云 DashScope。 171 | 172 | ##### (1) Modelscope 版本 173 | 您需要从 Modelscope 申请一个 API 推理的 ``key``,网址: https://www.modelscope.cn/docs/model-service/API-Inference/intro 174 | 然后您可以使用以下配置: 175 | ```yaml 176 | model: 177 | name: "XGenerationLab/XiYanSQL-QwenCoder-32B-2412" 178 | key: "" 179 | url: "https://api-inference.modelscope.cn/v1/" 180 | ``` 181 | 182 | 请阅读我们的 [模型描述](https://www.modelscope.cn/models/XGenerationLab/XiYanSQL-QwenCoder-32B-2412) 获取更多详细信息。 183 | 184 | ##### (2) Dashscope 版本 185 | 186 | 我们在阿里云 DashScope 上部署了模型,因此您需要设置以下环境变量: 187 | 请将您的电子邮件发送给我以获取 ``key``。 (godot.lzl@alibaba-inc.com) 188 | 在电子邮件中,请附上以下信息: 189 | ```yaml 190 | name: "YOUR NAME", 191 | email: "YOUR EMAIL", 192 | organization: "your college or Company or Organization" 193 | ``` 194 | 我们将根据您的电子邮件发送 ``key`` 给您。您可以在 yml 文件中填写该 ``key``。 195 | 该 ``key``将在 1 个月、200 次查询或其他法律限制后过期。 196 | 197 | ```yaml 198 | model: 199 | name: "xiyansql-qwencoder-32b" 200 | key: "KEY" 201 | url: "https://xiyan-stream.biz.aliyun.com/service/api/xiyan-sql" 202 | database: 203 | ``` 204 | 205 | 注意:该模型服务仅供试用,如果您需要在生产中使用,请与我们联系。 206 | 207 | 或者,您也可以在自己的服务器上自行部署模型 [XiYanSQL-qwencoder-32B](https://github.com/XGenerationLab/XiYanSQL-QwenCoder)。 208 | 209 | #### 本地模型 210 | 注意:本地模型速度较慢(在我的 MacBook 上每个查询约 12 秒)。 211 | 如果您需要稳定快速的服务,我们仍然推荐使用 Modelscope 版本。 212 | 213 | 要在本地模式下运行 xiyan_mcp_server,您需要: 214 | 1)一台至少具有 16GB 内存的 PC/Mac 215 | 2)6GB 硬盘空间 216 | 217 | 步骤 1:安装额外的 Python 包 218 | ```bash 219 | pip install flask modelscope torch==2.2.2 accelerate>=0.26.0 numpy=2.2.3 220 | ``` 221 | 222 | 步骤 2:(可选)手动下载模型 223 | 我们推荐 [xiyansql-qwencoder-3b](https://www.modelscope.cn/models/XGenerationLab/XiYanSQL-QwenCoder-3B-2502/)。 224 | 您可以手动下载模型: 225 | ```bash 226 | modelscope download --model XGenerationLab/XiYanSQL-QwenCoder-3B-2502 227 | ``` 228 | 这将占用您 6GB 的磁盘空间。 229 | 230 | 步骤 3:下载脚本并运行服务器。文件 src/xiyan_mcp_server/local_xiyan_server.py 231 | 232 | ```bash 233 | python local_xiyan_server.py 234 | ``` 235 | 服务器将在 http://localhost:5090/ 上运行。 236 | 237 | 步骤 4:准备配置并运行 xiyan_mcp_server 238 | config.yml 应如下所示: 239 | ```yml 240 | model: 241 | name: "xiyansql-qwencoder-3b" 242 | key: "KEY" 243 | url: "http://127.0.0.1:5090" 244 | ``` 245 | 246 | 到目前为止,本地模式准备就绪。 247 | 248 | ### 数据库配置 249 | ``host``、``port``、``user``、``password``、``database`` 是数据库的连接信息。 250 | 251 | 您可以使用本地或任何远程数据库。现在我们支持 MySQL 和 PostgreSQL(很快支持更多方言)。 252 | 253 | #### MySQL 254 | 255 | ```yaml 256 | database: 257 | host: "localhost" 258 | port: 3306 259 | user: "root" 260 | password: "" 261 | database: "" 262 | ``` 263 | #### PostgreSQL 264 | 步骤 1:安装 Python 包 265 | ```bash 266 | pip install psycopg2 267 | ``` 268 | 步骤 2:准备 config.yml 如下: 269 | ```yaml 270 | database: 271 | dialect: "postgresql" 272 | host: "localhost" 273 | port: 5432 274 | user: "" 275 | password: "" 276 | database: "" 277 | ``` 278 | 279 | 注意 ``dialect`` 应为 ``postgresql`` 以适用于 PostgreSQL。 280 | 281 | ## 启动 282 | ### Claude Desktop 283 | 在您的 Claude Desktop 配置文件中添加以下内容,参考 Claude Desktop 配置示例 284 | ```json 285 | { 286 | "mcpServers": { 287 | "xiyan-mcp-server": { 288 | "command": "/xxx/python", 289 | "args": [ 290 | "-m", 291 | "xiyan_mcp_server" 292 | ], 293 | "env": { 294 | "YML": "PATH/TO/YML" 295 | } 296 | } 297 | } 298 | } 299 | ``` 300 | **注意此处的python命令需要完整的python可执行文件路径(`/xxx/python`),否则会找不到python解释器,可以通过`which python`来确定此路径。使用其他非claude应用也是如此。** 301 | ### Cline 302 | 准备配置,参考 [Claude Desktop](#claude-desktop) 303 | 304 | ### Goose 305 | 在配置中添加以下命令,参考 Goose 配置示例 306 | 307 | ```yaml 308 | env YML=path/to/yml /xxx/python -m xiyan_mcp_server 309 | ``` 310 | ### Cursor 311 | 使用与 [Goose](#goose) 相同的命令。 312 | 313 | ### Witsy 314 | 在命令中添加以下内容: 315 | ```yaml 316 | /xxx/python -m xiyan_mcp_server 317 | ``` 318 | 添加一个环境变量:键为 YML,值为您 yml 文件的路径。 319 | 参考 Witsy 配置示例 320 | 321 | ## 它不起作用! 322 | 请联系我们: 323 | 钉钉群| 324 | 关注我 325 | 326 | ## 引用 327 | 如果您觉得我们的工作有帮助,可以随意引用。 328 | ```bib 329 | @article{xiyansql, 330 | title={A Preview of XiYan-SQL: A Multi-Generator Ensemble Framework for Text-to-SQL}, 331 | author={Yingqi Gao and Yifu Liu and Xiaoxia Li and Xiaorong Shi and Yin Zhu and Yiming Wang and Shiqi Li and Wei Li and Yuntao Hong and Zhiling Luo and Jinyang Gao and Liyu Mou and Yu Li}, 332 | year={2024}, 333 | journal={arXiv preprint arXiv:2411.08599}, 334 | url={https://arxiv.org/abs/2411.08599}, 335 | primaryClass={cs.AI} 336 | } 337 | ``` 338 | -------------------------------------------------------------------------------- /imgs/Paper-Arxiv-orange.svg: -------------------------------------------------------------------------------- 1 | Paper: ArxivPaperArxiv -------------------------------------------------------------------------------- /imgs/architecture.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/XGenerationLab/xiyan_mcp_server/d153a94e7fcc508202427f733d5c8589fa80072a/imgs/architecture.png -------------------------------------------------------------------------------- /imgs/claude_desktop.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/XGenerationLab/xiyan_mcp_server/d153a94e7fcc508202427f733d5c8589fa80072a/imgs/claude_desktop.jpg -------------------------------------------------------------------------------- /imgs/dinggroup_out.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/XGenerationLab/xiyan_mcp_server/d153a94e7fcc508202427f733d5c8589fa80072a/imgs/dinggroup_out.png -------------------------------------------------------------------------------- /imgs/exp_mcpbench.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/XGenerationLab/xiyan_mcp_server/d153a94e7fcc508202427f733d5c8589fa80072a/imgs/exp_mcpbench.png -------------------------------------------------------------------------------- /imgs/goose.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/XGenerationLab/xiyan_mcp_server/d153a94e7fcc508202427f733d5c8589fa80072a/imgs/goose.jpg -------------------------------------------------------------------------------- /imgs/logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/XGenerationLab/xiyan_mcp_server/d153a94e7fcc508202427f733d5c8589fa80072a/imgs/logo.png -------------------------------------------------------------------------------- /imgs/witsy.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/XGenerationLab/xiyan_mcp_server/d153a94e7fcc508202427f733d5c8589fa80072a/imgs/witsy.jpg -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [project] 2 | name = "xiyan_mcp_server" 3 | version = "0.1.5.dev0" 4 | description = "A Model Context Protocol (MCP) server that utilizes XiyanSQL with databases. This server enables AI assistants to list tables, read data, and execute natural language queries" 5 | readme = "README.md" 6 | requires-python = ">=3.11" 7 | dependencies = [ 8 | "mcp>=1.0.0", 9 | "mysql-connector-python>=9.1.0", 10 | "llama_index", 11 | "sqlalchemy", 12 | "pymysql" 13 | ] 14 | [[project.authors]] 15 | name = "Zhiling Luo" 16 | email = "godot.lzl@alibaba-inc.com" 17 | 18 | [build-system] 19 | requires = ["hatchling"] 20 | build-backend = "hatchling.build" 21 | 22 | [project.scripts] 23 | mysql_mcp_server = "xiyan_mcp_server:main" -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | mcp 2 | mysql-connector-python>=9.1.0 3 | sqlalchemy 4 | llama_index 5 | yaml 6 | pandas 7 | pymysql -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup, find_packages 2 | 3 | setup( 4 | name='xiyan_mcp_server', # 包的名字 5 | version='0.1.5.dev0', # 版本号 6 | packages=find_packages(), # 自动找到项目中的包 7 | install_requires=[ # 必要的包依赖 8 | # 'numpy', # 示例:若有依赖包,将其列在此 9 | 'mcp', 10 | 'mysql-connector-python>=9.1.0', 11 | 'sqlalchemy', 12 | 'llama_index', 13 | 'yaml', 14 | 'pandas', 15 | 'pymysql' 16 | ], 17 | author='Bruce Luo', # 作者 18 | author_email='godot.lzl@alibaba-inc.com', # 作者邮箱 19 | description='A MCP server of natural language interface to Database', # 简短描述 20 | long_description=open('README.md').read(), # 从 README 文件读取详细描述 21 | long_description_content_type='text/markdown', # 描述内容类型 22 | url='https://github.com/XGenerationLab/xiyan_mcp_server', # 项目主页 23 | classifiers=[ 24 | 'Programming Language :: Python :: 3', 25 | 'License :: OSI Approved :: MIT License', 26 | 'Operating System :: OS Independent', 27 | ], 28 | python_requires='>=3.11', # 支持的 Python 版本 29 | ) 30 | -------------------------------------------------------------------------------- /smithery.yaml: -------------------------------------------------------------------------------- 1 | # Smithery.ai configuration 2 | startCommand: 3 | type: stdio 4 | configSchema: 5 | # JSON Schema defining the configuration options for the MCP. 6 | { 7 | "YML":"src/xiyan_mcp_server/config_demo.yml" 8 | } 9 | commandFunction: 10 | # A function that produces the CLI command to start the MCP on stdio. 11 | |- 12 | (config) => ({ 13 | "command": "python", 14 | "args": [ 15 | "-m", 16 | "xiyan_mcp_server" 17 | ] 18 | }) 19 | -------------------------------------------------------------------------------- /src/xiyan_mcp_server/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/XGenerationLab/xiyan_mcp_server/d153a94e7fcc508202427f733d5c8589fa80072a/src/xiyan_mcp_server/__init__.py -------------------------------------------------------------------------------- /src/xiyan_mcp_server/__main__.py: -------------------------------------------------------------------------------- 1 | 2 | from .server import mcp, global_config, mcp_config 3 | 4 | def main(): 5 | mcp.run(transport=mcp_config.get('transport', 'stdio')) 6 | 7 | 8 | 9 | if __name__ == "__main__": 10 | main() -------------------------------------------------------------------------------- /src/xiyan_mcp_server/config_demo.yml: -------------------------------------------------------------------------------- 1 | model: 2 | name: "pre-xiyan_multi_dialect_v3" 3 | key: "" 4 | url: "https://poc-dashscope.aliyuncs.com/compatible-mode/v1" 5 | 6 | database: 7 | host: "localhost" 8 | port: 3306 9 | user: "root" 10 | password: "" 11 | database: "" -------------------------------------------------------------------------------- /src/xiyan_mcp_server/database_env.py: -------------------------------------------------------------------------------- 1 | from .utils.db_source import HITLSQLDatabase 2 | 3 | class DataBaseEnv: 4 | def __init__(self, database: HITLSQLDatabase): 5 | self.database = database 6 | self.dialect = database.dialect 7 | self.mschema = database.mschema 8 | self.db_name = database.db_name 9 | self.mschema_str = self.mschema.to_mschema() -------------------------------------------------------------------------------- /src/xiyan_mcp_server/local_model/README.md: -------------------------------------------------------------------------------- 1 | ## Local Model Configuration 2 | Note: the local model is slow (about 12 seconds per query on my macbook). 3 | If you need a stable and fast service, we still recommend to use the modelscope version. 4 | 5 | To run xiyan_mcp_server in local mode, you need 6 | 1) a PC/Mac/Machine with at least 16GB RAM 7 | 2) 6GB disk space 8 | 9 | The above setting is for model of size 3B. You can adjust the settings to run a 32B model on a server. 10 | 11 | ### Step 1: Install additional Python packages 12 | ```bash 13 | pip install flask modelscope torch==2.2.2 accelerate>=0.26.0 numpy=2.2.3 14 | ``` 15 | 16 | ### Step 2: (optional) manually download the model 17 | We recommend [xiyansql-qwencoder-3b](https://www.modelscope.cn/models/XGenerationLab/XiYanSQL-QwenCoder-3B-2502/). 18 | You can manually download the model by 19 | ```bash 20 | modelscope download --model XGenerationLab/XiYanSQL-QwenCoder-3B-2502 21 | ``` 22 | It will take you 6GB disk space. 23 | 24 | ### Step 3: download the script and run server. 25 | 26 | Script is located at `src/xiyan_mcp_server/local_model/local_xiyan_server.py` 27 | 28 | ```bash 29 | python local_xiyan_server.py 30 | ``` 31 | The server will be running on http://localhost:5090/ 32 | 33 | ### Step 4: prepare config and run xiyan_mcp_server 34 | the config.yml should be like: 35 | ```yml 36 | model: 37 | name: "xiyansql-qwencoder-3b" 38 | key: "KEY" 39 | url: "http://127.0.0.1:5090" 40 | ``` 41 | 42 | Till now the local model is ready. -------------------------------------------------------------------------------- /src/xiyan_mcp_server/local_model/local_xiyan_server.py: -------------------------------------------------------------------------------- 1 | from flask import Flask, request, jsonify 2 | from modelscope import AutoModelForCausalLM, AutoTokenizer 3 | import torch # require torch==2.2.2,accelerate>=0.26.0,numpy=2.2.3,modelscope 4 | 5 | 6 | model_name = 'XGenerationLab/XiYanSQL-QwenCoder-3B-2502' 7 | local_model = AutoModelForCausalLM.from_pretrained(model_name, device_map='cpu', 8 | torch_dtype=torch.float32) 9 | local_tokenizer = AutoTokenizer.from_pretrained(model_name) 10 | app = Flask(__name__) 11 | 12 | @app.route('/chat/completions', methods=['POST']) 13 | def chat_completions(): 14 | # 获取请求中的数据 15 | input_data = request.json 16 | 17 | # 提取提示(prompt) 18 | messages = input_data.get('messages', []) 19 | 20 | if not messages: 21 | return jsonify({'error': 'No messages provided'}) 22 | 23 | text = local_tokenizer.apply_chat_template( 24 | messages, 25 | tokenize=False, 26 | add_generation_prompt=True 27 | ) 28 | inputs = local_tokenizer([text], return_tensors="pt") 29 | 30 | # 编码输入并生成响应 31 | generated_ids = local_model.generate(inputs['input_ids'], max_new_tokens=1024, 32 | temperature=0.1, 33 | top_p=0.8, 34 | do_sample=True) 35 | 36 | generated_ids = [ 37 | output_ids[len(input_ids):] for input_ids, output_ids in zip(inputs.input_ids, generated_ids) 38 | ] 39 | generated_text = local_tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0] 40 | 41 | 42 | # 生成响应格式 43 | response = { 44 | 'id': 'xiyan', 45 | 'object': 'chat.completion', 46 | 'created': 1234567890, 47 | 'model': model_name, 48 | 'choices': [{ 49 | 'index': 0, 50 | 'message': { 51 | "content":generated_text 52 | }, 53 | 'finish_reason': 'length' 54 | }] 55 | } 56 | print(generated_text) 57 | return jsonify(response) 58 | 59 | 60 | if __name__ == '__main__': 61 | # this flask server runs on http://localhost:5090 62 | app.run(host='0.0.0.0', port=5090) 63 | 64 | -------------------------------------------------------------------------------- /src/xiyan_mcp_server/server.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import logging 3 | import os 4 | from typing import Literal 5 | import yaml # 添加yaml库导入 6 | 7 | from mysql.connector import connect, Error 8 | from mcp.server import FastMCP 9 | from mcp.types import TextContent 10 | 11 | from .utils.db_config import DBConfig 12 | from .database_env import DataBaseEnv 13 | from .utils.db_source import HITLSQLDatabase 14 | from .utils.db_util import init_db_conn 15 | from .utils.file_util import extract_sql_from_qwen 16 | from .utils.llm_util import call_openai_sdk 17 | 18 | 19 | 20 | 21 | # Configure logging 22 | logging.basicConfig( 23 | level=logging.INFO, 24 | format='%(asctime)s - %(name)s - %(levelname)s - %(message)s' 25 | ) 26 | logger = logging.getLogger("xiyan_mcp_server") 27 | 28 | 29 | def get_yml_config(): 30 | config_path = os.getenv("YML", os.path.join(os.path.dirname(__file__), "config_demo.yml")) 31 | logger.info(f"Loading configuration from {config_path}") 32 | try: 33 | with open(config_path, 'r') as file: 34 | config = yaml.safe_load(file) 35 | return config 36 | except FileNotFoundError: 37 | logger.error(f"Configuration file {config_path} not found.") 38 | raise 39 | except yaml.YAMLError as exc: 40 | logger.error(f"Error parsing configuration file {config_path}: {exc}") 41 | raise 42 | 43 | 44 | 45 | def get_xiyan_config(db_config): 46 | dialect = db_config.get('dialect','mysql') 47 | xiyan_db_config = DBConfig(dialect=dialect,db_name=db_config['database'], user_name=db_config['user'], db_pwd=db_config['password'], db_host=db_config['host'], port=db_config['port']) 48 | return xiyan_db_config 49 | 50 | 51 | global_config = get_yml_config() 52 | mcp_config = global_config.get('mcp', {}) 53 | model_config = global_config['model'] 54 | global_db_config = global_config.get('database') 55 | global_xiyan_db_config = get_xiyan_config(global_db_config) 56 | dialect = global_db_config.get('dialect','mysql') 57 | 58 | 59 | 60 | mcp = FastMCP("xiyan", **mcp_config) 61 | 62 | 63 | @mcp.resource(dialect+'://'+global_db_config.get('database','')) 64 | async def read_resource() -> str: 65 | 66 | db_engine = init_db_conn(global_xiyan_db_config) 67 | db_source = HITLSQLDatabase(db_engine) 68 | return db_source.mschema.to_mschema() 69 | 70 | @mcp.resource(dialect+"://{table_name}") 71 | async def read_resource(table_name) -> str: 72 | """Read table contents.""" 73 | config = global_db_config 74 | try: 75 | with connect(**config) as conn: 76 | with conn.cursor() as cursor: 77 | cursor.execute(f"SELECT * FROM {table_name} LIMIT 100") 78 | columns = [desc[0] for desc in cursor.description] 79 | rows = cursor.fetchall() 80 | result = [",".join(map(str, row)) for row in rows] 81 | return "\n".join([",".join(columns)] + result) 82 | 83 | except Error as e: 84 | raise RuntimeError(f"Database error: {str(e)}") 85 | 86 | 87 | def sql_gen_and_execute(db_env: DataBaseEnv, query: str): 88 | """ 89 | Transfers the input natural language question to sql query (known as Text-to-sql) and executes it on the database. 90 | Args: 91 | query: natural language to query the database. e.g. 查询在2024年每个月,卡宴的各经销商销量分别是多少 92 | """ 93 | 94 | #db_env = context_variables.get('db_env', None) 95 | prompt = f"""你现在是一名{db_env.dialect}数据分析专家,你的任务是根据参考的数据库schema和用户的问题,编写正确的SQL来回答用户的问题,生成的SQL用``sql 和```包围起来。 96 | 【数据库schema】 97 | {db_env.mschema_str} 98 | 99 | 【问题】 100 | {query} 101 | """ 102 | #logger.info(f"SQL generation prompt: {prompt}") 103 | 104 | messages = [ 105 | {"role": "system", "content": prompt}, 106 | {"role": "user", "content": f"用户的问题是: {query}"} 107 | ] 108 | param = {"model": model_config['name'], "messages": messages,"key":model_config['key'],"url":model_config['url']} 109 | 110 | try: 111 | response = call_openai_sdk(**param) 112 | content = response.choices[0].message.content 113 | sql_query = extract_sql_from_qwen(content) 114 | status, res = db_env.database.fetch(sql_query) 115 | if not status: 116 | for idx in range(3): 117 | sql_query = sql_fix(db_env.dialect, db_env.mschema_str, query, sql_query, res) 118 | status, res = db_env.database.fetch(sql_query) 119 | if status: 120 | break 121 | 122 | sql_res = db_env.database.fetch_truncated(sql_query,max_rows=100) 123 | markdown_res = db_env.database.trunc_result_to_markdown(sql_res) 124 | logger.info(f"SQL query: {sql_query}\nSQL result: {sql_res}") 125 | return markdown_res.strip() 126 | 127 | except Exception as e: 128 | return str(e) 129 | 130 | 131 | def sql_fix(dialect: str, mschema: str, query: str, sql_query: str, error_info: str): 132 | system_prompt = '''现在你是一个{dialect}数据分析专家,需要阅读一个客户的问题,参考的数据库schema,该问题对应的待检查SQL,以及执行该SQL时数据库返回的语法错误,请你仅针对其中的语法错误进行修复,输出修复后的SQL。 133 | 注意: 134 | 1、仅修复语法错误,不允许改变SQL的逻辑。 135 | 2、生成的SQL用```sql 和```包围起来。 136 | 137 | 【数据库schema】 138 | {schema} 139 | '''.format(dialect=dialect, schema=mschema) 140 | user_prompt = '''【问题】 141 | {question} 142 | 143 | 【待检查SQL】 144 | {sql} 145 | 146 | 【错误信息】 147 | {sql_res}'''.format(question=query, sql=sql_query, sql_res=error_info) 148 | 149 | messages = [ 150 | {"role": "system", "content": system_prompt}, 151 | {"role": "user", "content": user_prompt} 152 | ] 153 | param = {"model": model_config['name'], "messages": messages,"key":model_config['key'],'url':model_config['url']} 154 | 155 | response = call_openai_sdk(**param) 156 | content = response.choices[0].message.content 157 | sql_query = extract_sql_from_qwen(content) 158 | 159 | return sql_query 160 | 161 | def call_xiyan(query: str)-> str: 162 | """Fetch the data from database through a natural language query 163 | 164 | Args: 165 | query: The query in natual language 166 | """ 167 | 168 | logger.info(f"Calling tool with arguments: {query}") 169 | try: 170 | db_engine = init_db_conn(global_xiyan_db_config) 171 | db_source = HITLSQLDatabase(db_engine) 172 | except Exception as e: 173 | 174 | return "数据库连接失败"+str(e) 175 | logger.info(f"Calling xiyan") 176 | env = DataBaseEnv(db_source) 177 | res = sql_gen_and_execute(env,query) 178 | 179 | return str(res) 180 | @mcp.tool() 181 | def get_data(query: str)-> list[TextContent]: 182 | """Fetch the data from database through a natural language query 183 | 184 | Args: 185 | query: The query in natural language 186 | """ 187 | 188 | res=call_xiyan(query) 189 | return [TextContent(type="text", text=res)] 190 | 191 | 192 | 193 | def main(): 194 | parser = argparse.ArgumentParser(description="Run MCP server.") 195 | parser.add_argument('transport', nargs='?', default='stdio', choices=['stdio', 'sse'], 196 | help='Transport type (stdio or sse)') 197 | args = parser.parse_args() 198 | mcp.run(transport=args.transport) 199 | 200 | if __name__ == "__main__": 201 | main() 202 | -------------------------------------------------------------------------------- /src/xiyan_mcp_server/utils/common_util.py: -------------------------------------------------------------------------------- 1 | from _datetime import datetime 2 | 3 | 4 | def get_timestamp() -> str: 5 | timestamp = datetime.now().strftime("%Y-%m-%dT%H:%M:%S") 6 | return timestamp 7 | 8 | 9 | def extract_llm_messages(messages: list) -> list: 10 | messages = [message for message in messages if message['role'] in ['system', 'assistant', 'user', 'tool']] 11 | return messages -------------------------------------------------------------------------------- /src/xiyan_mcp_server/utils/db_config.py: -------------------------------------------------------------------------------- 1 | from dataclasses import dataclass, field 2 | from typing import Optional 3 | 4 | from urllib.parse import quote_plus 5 | @dataclass 6 | class DBConfig: 7 | dialect: str = 'sqlite' 8 | db_path: Optional[str] = None # 仅用于 SQLite 9 | db_name: Optional[str] = None # MySQL/PostgreSQL 通用 10 | user_name: Optional[str] = None # MySQL/PostgreSQL 通用 11 | db_pwd: Optional[str] = None # MySQL/PostgreSQL 通用 12 | db_host: Optional[str] = None # MySQL/PostgreSQL 通用 13 | port: Optional[int] = None # MySQL/PostgreSQL 通用 14 | 15 | def __post_init__(self): 16 | if self.dialect == 'sqlite': 17 | self.db_path = self.db_path or 'book_1.sqlite' 18 | elif self.dialect in ['mysql', 'postgresql']: 19 | self.db_name = self.db_name or 'default_db' 20 | self.user_name = quote_plus(self.user_name) or 'default_user' 21 | self.db_pwd = quote_plus(self.db_pwd) or 'default_password' 22 | self.db_host = self.db_host or 'localhost' 23 | self.port = self.port or (3306 if self.dialect == 'mysql' else 5432) 24 | else: 25 | raise ValueError(f"Unsupported database dialect: {self.dialect}") -------------------------------------------------------------------------------- /src/xiyan_mcp_server/utils/db_mschema.py: -------------------------------------------------------------------------------- 1 | import random 2 | from .file_util import read_json_file, write_json_to_file, save_raw_text 3 | from .db_util import examples_to_str 4 | from typing import Any, Dict, Iterable, List, Optional, Tuple, Union 5 | 6 | 7 | class MSchema: 8 | def __init__(self, db_id: str = 'Anonymous', schema: Optional[str] = None): 9 | self.db_id = db_id 10 | self.schema = schema 11 | self.tables = {} 12 | self.foreign_keys = [] 13 | 14 | def add_table(self, name, fields={}, comment=None): 15 | self.tables[name] = {"fields": fields.copy(), 'examples': [], 'comment': comment} 16 | 17 | def add_field(self, table_name: str, field_name: str, field_type: str = "", 18 | primary_key: bool = False, nullable: bool = True, default: Any = None, 19 | autoincrement: bool = False, comment: str = "", examples: list = [], **kwargs): 20 | self.tables[table_name]["fields"][field_name] = { 21 | "type": field_type, 22 | "primary_key": primary_key, 23 | "nullable": nullable, 24 | "default": default if default is None else f'{default}', 25 | "autoincrement": autoincrement, 26 | "comment": comment, 27 | "examples": examples.copy(), 28 | **kwargs} 29 | 30 | def add_foreign_key(self, table_name, field_name, ref_schema, ref_table_name, ref_field_name): 31 | self.foreign_keys.append([table_name, field_name, ref_schema, ref_table_name, ref_field_name]) 32 | 33 | def get_field_type(self, field_type, simple_mode=True)->str: 34 | if not simple_mode: 35 | return field_type 36 | else: 37 | return field_type.split("(")[0] 38 | 39 | def has_table(self, table_name: str) -> bool: 40 | if table_name in self.tables.keys(): 41 | return True 42 | else: 43 | return False 44 | 45 | def has_column(self, table_name: str, field_name: str) -> bool: 46 | if self.has_table(table_name): 47 | if field_name in self.tables[table_name]["fields"].keys(): 48 | return True 49 | else: 50 | return False 51 | else: 52 | return False 53 | 54 | def get_field_info(self, table_name: str, field_name: str) -> Dict: 55 | try: 56 | return self.tables[table_name]['fields'][field_name] 57 | except: 58 | return {} 59 | 60 | def single_table_mschema(self, table_name: str, selected_columns: List = None, 61 | example_num=3, show_type_detail=False, shuffle=True) -> str: 62 | table_info = self.tables.get(table_name, {}) 63 | output = [] 64 | table_comment = table_info.get('comment', '') 65 | if table_comment is not None and table_comment != 'None' and len(table_comment) > 0: 66 | if self.schema is not None and len(self.schema) > 0: 67 | output.append(f"# Table: {self.schema}.{table_name}, {table_comment}") 68 | else: 69 | output.append(f"# Table: {table_name}, {table_comment}") 70 | else: 71 | if self.schema is not None and len(self.schema) > 0: 72 | output.append(f"# Table: {self.schema}.{table_name}") 73 | else: 74 | output.append(f"# Table: {table_name}") 75 | 76 | field_lines = [] 77 | # 处理表中的每一个字段 78 | for field_name, field_info in table_info['fields'].items(): 79 | if selected_columns is not None and field_name.lower() not in selected_columns: 80 | continue 81 | 82 | raw_type = self.get_field_type(field_info['type'], not show_type_detail) 83 | field_line = f"({field_name}:{raw_type.upper()}" 84 | if field_info['comment'] != '': 85 | field_line += f", {field_info['comment'].strip()}" 86 | else: 87 | pass 88 | 89 | ## 打上主键标识 90 | is_primary_key = field_info.get('primary_key', False) 91 | if is_primary_key: 92 | field_line += f", Primary Key" 93 | 94 | # 如果有示例,添加上 95 | if len(field_info.get('examples', [])) > 0 and example_num > 0: 96 | examples = field_info['examples'] 97 | examples = [s for s in examples if s is not None] 98 | examples = examples_to_str(examples) 99 | if len(examples) > example_num: 100 | examples = examples[:example_num] 101 | 102 | if raw_type in ['DATE', 'TIME', 'DATETIME', 'TIMESTAMP']: 103 | examples = [examples[0]] 104 | elif len(examples) > 0 and max([len(s) for s in examples]) > 20: 105 | if max([len(s) for s in examples]) > 50: 106 | examples = [] 107 | else: 108 | examples = [examples[0]] 109 | else: 110 | pass 111 | if len(examples) > 0: 112 | example_str = ', '.join([str(example) for example in examples]) 113 | field_line += f", Examples: [{example_str}]" 114 | else: 115 | pass 116 | else: 117 | field_line += "" 118 | field_line += ")" 119 | 120 | field_lines.append(field_line) 121 | 122 | if shuffle: 123 | random.shuffle(field_lines) 124 | 125 | output.append('[') 126 | output.append(',\n'.join(field_lines)) 127 | output.append(']') 128 | 129 | return '\n'.join(output) 130 | 131 | def to_mschema(self, selected_tables: List = None, selected_columns: List = None, 132 | example_num=3, show_type_detail=False, shuffle=True) -> str: 133 | """ 134 | convert to a MSchema string. 135 | selected_tables: 默认为None,表示选择所有的表 136 | selected_columns: 默认为None,表示所有列全选,格式['table_name.column_name'] 137 | """ 138 | output = [] 139 | 140 | if selected_tables is not None: 141 | selected_tables = [s.lower() for s in selected_tables] 142 | if selected_columns is not None: 143 | selected_columns = [s.lower() for s in selected_columns] 144 | selected_tables = [s.split('.')[0].lower() for s in selected_columns] 145 | 146 | # 依次处理每一个表 147 | for table_name, table_info in self.tables.items(): 148 | if selected_tables is None or table_name.lower() in selected_tables: 149 | cur_table_type = table_info.get('type', 'table') 150 | column_names = list(table_info['fields'].keys()) 151 | if selected_columns is not None: 152 | cur_selected_columns = [c for c in column_names if f"{table_name}.{c}".lower() in selected_columns] 153 | else: 154 | cur_selected_columns = selected_columns 155 | output.append(self.single_table_mschema(table_name, cur_selected_columns, example_num, show_type_detail, shuffle)) 156 | 157 | if shuffle: 158 | random.shuffle(output) 159 | 160 | output.insert(0, f"【DB_ID】 {self.db_id}") 161 | output.insert(1, f"【Schema】") 162 | 163 | # 添加外键信息,选择table_type为view时不展示外键 164 | if self.foreign_keys: 165 | output.append("【Foreign keys】") 166 | for fk in self.foreign_keys: 167 | ref_schema = fk[2] 168 | table1, column1, _, table2, column2 = fk 169 | if selected_tables is None or \ 170 | (table1.lower() in selected_tables and table2.lower() in selected_tables): 171 | if ref_schema == self.schema: 172 | output.append(f"{fk[0]}.{fk[1]}={fk[3]}.{fk[4]}") 173 | 174 | return '\n'.join(output) 175 | 176 | def dump(self): 177 | schema_dict = { 178 | "db_id": self.db_id, 179 | "schema": self.schema, 180 | "tables": self.tables, 181 | "foreign_keys": self.foreign_keys 182 | } 183 | return schema_dict 184 | 185 | def save(self, file_path: str): 186 | schema_dict = self.dump() 187 | write_json_to_file(file_path, schema_dict, is_json_line=False) 188 | 189 | def load(self, file_path: str): 190 | data = read_json_file(file_path) 191 | self.db_id = data.get("db_id", "Anonymous") 192 | self.schema = data.get("schema", None) 193 | self.tables = data.get("tables", {}) 194 | self.foreign_keys = data.get("foreign_keys", []) 195 | -------------------------------------------------------------------------------- /src/xiyan_mcp_server/utils/db_source.py: -------------------------------------------------------------------------------- 1 | from typing import Any, Dict, List, Optional, Tuple 2 | 3 | from llama_index.core import SQLDatabase 4 | from sqlalchemy import MetaData, Table, select, text 5 | from sqlalchemy.engine import Engine 6 | 7 | from .db_mschema import MSchema 8 | from .db_util import examples_to_str, preprocess_sql_query 9 | 10 | 11 | class HITLSQLDatabase(SQLDatabase): 12 | def __init__(self, engine: Engine, schema: Optional[str] = None, metadata: Optional[MetaData] = None, 13 | ignore_tables: Optional[List[str]] = None, include_tables: Optional[List[str]] = None, 14 | sample_rows_in_table_info: int = 3, indexes_in_table_info: bool = False, 15 | custom_table_info: Optional[dict] = None, view_support: bool = False, max_string_length: int = 300, 16 | mschema: Optional[MSchema] = None, db_name: Optional[str] = ''): 17 | super().__init__(engine, schema, metadata, ignore_tables, include_tables, sample_rows_in_table_info, 18 | indexes_in_table_info, custom_table_info, view_support, max_string_length) 19 | 20 | self._db_name = db_name 21 | self._usable_tables = [table_name for table_name in self._usable_tables if self._inspector.has_table(table_name, schema)] 22 | self._dialect = engine.dialect.name 23 | if mschema is not None: 24 | self._mschema = mschema 25 | else: 26 | self._mschema = MSchema(db_id=db_name, schema=schema) 27 | self.init_mschema() 28 | 29 | @property 30 | def mschema(self) -> MSchema: 31 | """Return M-Schema""" 32 | return self._mschema 33 | 34 | @property 35 | def db_name(self) -> str: 36 | """Return db_name""" 37 | return self._db_name 38 | 39 | def get_pk_constraint(self, table_name: str) -> Dict: 40 | return self._inspector.get_pk_constraint(table_name, self._schema)['constrained_columns'] 41 | 42 | def get_table_comment(self, table_name: str): 43 | try: 44 | return self._inspector.get_table_comment(table_name, self._schema)['text'] 45 | except: # sqlite不支持添加注释 46 | return '' 47 | 48 | def default_schema_name(self) -> Optional[str]: 49 | return self._inspector.default_schema_name 50 | 51 | def get_schema_names(self) -> List[str]: 52 | return self._inspector.get_schema_names() 53 | 54 | def get_foreign_keys(self, table_name: str): 55 | return self._inspector.get_foreign_keys(table_name, self._schema) 56 | 57 | def get_unique_constraints(self, table_name: str): 58 | return self._inspector.get_unique_constraints(table_name, self._schema) 59 | 60 | def fectch_distinct_values(self, table_name: str, column_name: str, max_num: int = 5): 61 | table = Table(table_name, self.metadata_obj, autoload_with=self._engine) 62 | # 构建 SELECT DISTINCT 查询 63 | query = select(table.c[column_name]).distinct().limit(max_num) 64 | values = [] 65 | with self._engine.connect() as connection: 66 | result = connection.execute(query) 67 | distinct_values = result.fetchall() 68 | for value in distinct_values: 69 | if value[0] is not None and value[0] != '': 70 | values.append(value[0]) 71 | return values 72 | 73 | def fetch(self, sql_query: str): 74 | sql_query = preprocess_sql_query(sql_query) 75 | 76 | with self._engine.begin() as connection: 77 | try: 78 | cursor = connection.execute(text(sql_query)) 79 | records = cursor.fetchall() 80 | records = [tuple(row) for row in records] 81 | return True, records 82 | except Exception as e: 83 | # print("An exception occurred during SQL execution.\n", e) 84 | records = str(e) 85 | return False, records 86 | 87 | def fetch_with_column_name(self, sql_query: str): 88 | sql_query = preprocess_sql_query(sql_query) 89 | 90 | with self._engine.begin() as connection: 91 | try: 92 | cursor = connection.execute(text(sql_query)) 93 | columns = cursor.keys() 94 | records = cursor.fetchall() 95 | except Exception as e: 96 | # print("An exception occurred during SQL execution.\n", e) 97 | records = None 98 | columns = [] 99 | return records, columns 100 | 101 | def fetch_with_error_info(self, sql_query: str) -> Tuple[List, str]: 102 | info = '' 103 | sql_query = preprocess_sql_query(sql_query) 104 | with self._engine.begin() as connection: 105 | try: 106 | cursor = connection.execute(text(sql_query)) 107 | records = cursor.fetchall() 108 | except Exception as e: 109 | info = str(e) 110 | records = None 111 | return records, info 112 | 113 | def fetch_truncated(self, sql_query: str, max_rows: Optional[int] = None, max_str_len: int = 30) -> Dict: 114 | sql_query = preprocess_sql_query(sql_query) 115 | with self._engine.begin() as connection: 116 | try: 117 | cursor = connection.execute(text(sql_query)) 118 | result = cursor.fetchall() 119 | truncated_results = [] 120 | if max_rows: 121 | result = result[:max_rows] 122 | for row in result: 123 | truncated_row = tuple( 124 | self.truncate_word(column, length=max_str_len) 125 | for column in row 126 | ) 127 | truncated_results.append(truncated_row) 128 | return {"truncated_results": truncated_results, "fields": list(cursor.keys())} 129 | except Exception as e: 130 | # print("An exception occurred during SQL execution.\n", e) 131 | # records = None 132 | records = str(e) 133 | return {"truncated_results": records, "fields": []} 134 | 135 | def trunc_result_to_markdown(self, sql_res: Dict) -> str: 136 | """ 137 | 数据库查询结果转换成markdown格式 138 | """ 139 | truncated_results = sql_res.get("truncated_results", []) 140 | fields = sql_res.get("fields", []) 141 | 142 | if not isinstance(truncated_results, list): 143 | return str(truncated_results) 144 | 145 | header = "| " + " | ".join(fields) + " |" 146 | separator = "| " + " | ".join(["---"] * len(fields)) + " |" 147 | rows = [] 148 | for row in truncated_results: 149 | rows.append("| " + " | ".join(str(value) for value in row) + " |") 150 | markdown_table = "\n".join([header, separator] + rows) 151 | return markdown_table 152 | 153 | 154 | def execute(self, sql_query: str, timeout=5) -> Any: 155 | # import concurrent.futures 156 | sql_query = preprocess_sql_query(sql_query) 157 | 158 | with self._engine.begin() as connection: 159 | try: 160 | cursor = connection.execute(text(sql_query)) 161 | return True 162 | except Exception as e: 163 | info = str(e) 164 | print("SQL执行异常:", info) 165 | return None 166 | 167 | def init_mschema(self): 168 | for table_name in self._usable_tables: 169 | table_comment = self.get_table_comment(table_name) 170 | table_comment = '' if table_comment is None else table_comment.strip() 171 | self._mschema.add_table(table_name, fields={}, comment=table_comment) 172 | pks = self.get_pk_constraint(table_name) 173 | 174 | fks = self.get_foreign_keys(table_name) 175 | for fk in fks: 176 | referred_schema = fk['referred_schema'] 177 | for c, r in zip(fk['constrained_columns'], fk['referred_columns']): 178 | self._mschema.add_foreign_key(table_name, c, referred_schema, fk['referred_table'], r) 179 | 180 | fields = self._inspector.get_columns(table_name, schema=self._schema) 181 | for field in fields: 182 | field_type = f"{field['type']!s}" 183 | field_name = field['name'] 184 | if field_name in pks: 185 | primary_key = True 186 | else: 187 | primary_key = False 188 | 189 | field_comment = field.get("comment", None) 190 | field_comment = "" if field_comment is None else field_comment.strip() 191 | autoincrement = field.get('autoincrement', False) 192 | default = field.get('default', None) 193 | if default is not None: 194 | default = f'{default}' 195 | 196 | try: 197 | examples = self.fectch_distinct_values(table_name, field_name, 5) 198 | except: 199 | examples = [] 200 | examples = examples_to_str(examples) 201 | 202 | self._mschema.add_field(table_name, field_name, field_type=field_type, primary_key=primary_key, 203 | nullable=field['nullable'], default=default, autoincrement=autoincrement, 204 | comment=field_comment, examples=examples) 205 | 206 | def sync_to_local(self, local_engine: Engine): 207 | """同步数据到本地数据库""" 208 | from sqlalchemy.orm import sessionmaker 209 | 210 | local_metadata = MetaData() 211 | 212 | # # 连接到远程数据库 213 | remote_metadata = MetaData() 214 | remote_metadata.reflect(bind=self._engine) 215 | 216 | remote_metadata.create_all(bind=self._engine) 217 | 218 | print(remote_metadata.tables.keys()) 219 | # 同步表结构和数据 220 | for table_name in remote_metadata.tables: 221 | remote_table = Table(table_name, remote_metadata, autoload_with=self._engine) 222 | print(f"Syncing table {table_name}...") 223 | 224 | # 创建本地表 225 | remote_table.metadata = local_metadata 226 | local_metadata.drop_all(local_engine) 227 | local_metadata.create_all(local_engine, tables=[remote_table]) 228 | 229 | # 将数据同步到本地 230 | Session = sessionmaker(bind=self._engine) 231 | session = Session() 232 | with local_engine.begin() as local_connection: 233 | data = session.query(remote_table).all() 234 | columns = remote_table.columns.keys() 235 | insert_data = [dict(zip(columns, d)) for d in data] 236 | local_connection.execute(remote_table.insert(), insert_data) 237 | 238 | print("Sync complete.") 239 | 240 | 241 | -------------------------------------------------------------------------------- /src/xiyan_mcp_server/utils/db_util.py: -------------------------------------------------------------------------------- 1 | import re 2 | import os 3 | import datetime, decimal 4 | from sqlalchemy import create_engine, MetaData, Table, Column, String, Integer, select, text 5 | from sqlalchemy.engine import Engine 6 | from .db_config import DBConfig 7 | 8 | 9 | def init_db_conn(db_config: DBConfig) -> Engine: 10 | if db_config.dialect.lower() == 'sqlite': 11 | return connect_to_sqlite(db_config.db_path) 12 | elif db_config.dialect.lower() == 'mysql': 13 | return connect_to_mysql(db_config.db_name, db_config.user_name, db_config.db_pwd, db_config.db_host, db_config.port) 14 | elif db_config.dialect.lower() == 'postgresql': 15 | return connect_to_pg(db_config.db_name, db_config.user_name, db_config.db_pwd, db_config.db_host, db_config.port) 16 | else: 17 | raise NotImplementedError 18 | 19 | 20 | def connect_to_sqlite(db_path: str) -> Engine: 21 | assert os.path.exists(db_path) 22 | db_engine = create_engine(f'sqlite:///{os.path.abspath(db_path)}') 23 | return db_engine 24 | 25 | 26 | def connect_to_mysql(db_name, user_name, db_pwd, db_host, port) -> Engine: 27 | db_engine = create_engine(f"mysql+pymysql://{user_name}:{db_pwd}@{db_host}:{port}/{db_name}") 28 | return db_engine 29 | 30 | 31 | def connect_to_pg(db_name, user_name, db_pwd, db_host, port) -> Engine: 32 | db_engine = create_engine(f"postgresql+psycopg2://{user_name}:{db_pwd}@{db_host}:{port}/{db_name}") 33 | return db_engine 34 | 35 | 36 | def remove_sql_comments(sql_query: str) -> str: 37 | # 正则表达式用于匹配 SQL 注释 38 | single_line_comment_pattern = r'--[^\n]*' 39 | multi_line_comment_pattern = r'/\*.*?\*/' 40 | 41 | # 删除单行注释 42 | sql_without_single_comments = re.sub(single_line_comment_pattern, '', sql_query) 43 | 44 | # 删除多行注释 45 | sql_without_comments = re.sub(multi_line_comment_pattern, '', sql_without_single_comments, flags=re.DOTALL) 46 | 47 | return sql_without_comments.strip() 48 | 49 | 50 | def preprocess_sql_query(sql_query: str) -> str: 51 | # 删除注释,加上分号 52 | sql_query = remove_sql_comments(sql_query) 53 | if not sql_query.strip().endswith(';'): 54 | sql_query += ';' 55 | return sql_query 56 | 57 | 58 | def is_email(string): 59 | pattern = r'^[\w\.-]+@[\w\.-]+\.\w+$' 60 | match = re.match(pattern, string) 61 | if match: 62 | return True 63 | else: 64 | return False 65 | 66 | 67 | def examples_to_str(examples: list) -> list[str]: 68 | """ 69 | from examples to a list of str 70 | """ 71 | values = examples 72 | for i in range(len(values)): 73 | if isinstance(values[i], datetime.date): 74 | values = [values[i]] 75 | break 76 | elif isinstance(values[i], datetime.datetime): 77 | values = [values[i]] 78 | break 79 | elif isinstance(values[i], decimal.Decimal): 80 | values[i] = str(float(values[i])) 81 | elif is_email(str(values[i])): 82 | values = [] 83 | break 84 | elif 'http://' in str(values[i]) or 'https://' in str(values[i]): 85 | values = [] 86 | break 87 | elif values[i] is not None and not isinstance(values[i], str): 88 | pass 89 | elif values[i] is not None and '.com' in values[i]: 90 | pass 91 | 92 | return [str(v) for v in values if v is not None and len(str(v)) > 0] 93 | 94 | 95 | def sql_fetcher(db_engine: Engine, sql_query: str): 96 | sql_query = preprocess_sql_query(sql_query) 97 | with db_engine.begin() as connection: 98 | try: 99 | cursor = connection.execute(text(sql_query)) 100 | records = cursor.fetchall() 101 | except Exception as e: 102 | print("An exception occurred during SQL execution.\n", e) 103 | records = None 104 | return records -------------------------------------------------------------------------------- /src/xiyan_mcp_server/utils/file_util.py: -------------------------------------------------------------------------------- 1 | # -*- coding: UTF-8 -*- 2 | import json 3 | import os 4 | import pandas as pd 5 | import re 6 | 7 | def extract_sql_from_qwen(qwen_result) -> str: 8 | sql = qwen_result 9 | pattern = r"```sql(.*?)```" 10 | 11 | # 使用re.DOTALL标志来使得点号(.)可以匹配包括换行符在内的任意字符 12 | sql_code_snippets = re.findall(pattern, qwen_result, re.DOTALL) 13 | 14 | if len(sql_code_snippets) > 0: 15 | sql = sql_code_snippets[-1].strip() 16 | 17 | return sql 18 | 19 | def read_text(filename)->list: 20 | data = [] 21 | with open(filename, 'r', encoding='utf-8') as file: 22 | for line in file.readlines(): 23 | line = line.strip() 24 | data.append(line) 25 | return data 26 | 27 | 28 | def save_raw_text(filename, content): 29 | with open(filename, 'w', encoding='utf-8') as file: 30 | file.write(content) 31 | 32 | 33 | def read_json_file(path, filter_func=None): 34 | if os.path.exists(path): 35 | with open(path, 'r', encoding='utf-8') as f: 36 | try: 37 | json_data = json.load(f) 38 | if filter_func is not None: 39 | json_data = list(filter(filter_func, json_data)) 40 | return json_data 41 | except Exception as e: 42 | f.seek(0) 43 | lines = f.readlines() 44 | json_list = [json.loads(line.strip( 45 | )) for line in lines if filter_func is None or filter_func(json.loads(line.strip()))] 46 | return json_list 47 | else: 48 | return None 49 | 50 | 51 | def write_json_to_file(path: str, data: list, is_json_line: bool = False) -> None: 52 | valid_path(path) 53 | with open(path, 'w', encoding='utf-8') as f: 54 | if is_json_line: 55 | for line in data: 56 | f.write(json.dumps(line, ensure_ascii=False) + '\n') 57 | else: 58 | f.write(json.dumps(data, ensure_ascii=False, indent=4)) 59 | 60 | 61 | def save_as_csv(path: str, data: list): 62 | valid_path(path) 63 | df = pd.DataFrame(data) 64 | df.to_csv(path, index=False, encoding='utf-8') 65 | 66 | 67 | def valid_path(path): 68 | dir = os.path.dirname(path) 69 | if not os.path.exists(dir): 70 | os.makedirs(dir) 71 | 72 | 73 | def find_lasest_timastamp_file(root_path): 74 | pass -------------------------------------------------------------------------------- /src/xiyan_mcp_server/utils/llm_util.py: -------------------------------------------------------------------------------- 1 | from openai import OpenAI 2 | 3 | 4 | def call_openai_sdk(**args): 5 | key = args['key'] 6 | base_url = args['url'] 7 | client = OpenAI( 8 | api_key=key, 9 | base_url=base_url, 10 | ) 11 | del args['key'] 12 | del args['url'] 13 | completion = client.chat.completions.create( 14 | **args 15 | ) 16 | return completion 17 | 18 | -------------------------------------------------------------------------------- /src/xiyan_mcp_server/utils/logger_util.py: -------------------------------------------------------------------------------- 1 | from loguru import logger 2 | 3 | # Configure logger settings 4 | logger.add("xiyan_mcp_server.log", level="INFO") 5 | 6 | # You can define custom formats or sinks as needed 7 | logger_format = "{time} - {level} - {message}" 8 | 9 | logger.configure(handlers=[{"sink": "xiyan_mcp_server.log", "format": logger_format, "level": "INFO"}]) 10 | 11 | # Export the logger instance 12 | logger = logger # This line is optional, but makes it explicitly clear what's being exported 13 | --------------------------------------------------------------------------------