├── .github
    └── workflows
    │   └── python-publish.yml
├── Dockerfile
├── LICENSE.txt
├── README.md
├── README_ja.md
├── README_zh.md
├── imgs
    ├── Paper-Arxiv-orange.svg
    ├── architecture.png
    ├── claude_desktop.jpg
    ├── dinggroup_out.png
    ├── exp_mcpbench.png
    ├── goose.jpg
    ├── logo.png
    └── witsy.jpg
├── pyproject.toml
├── requirements.txt
├── setup.py
├── smithery.yaml
└── src
    └── xiyan_mcp_server
        ├── __init__.py
        ├── __main__.py
        ├── config_demo.yml
        ├── database_env.py
        ├── local_model
            ├── README.md
            └── local_xiyan_server.py
        ├── server.py
        └── utils
            ├── common_util.py
            ├── db_config.py
            ├── db_mschema.py
            ├── db_source.py
            ├── db_util.py
            ├── file_util.py
            ├── llm_util.py
            └── logger_util.py


/.github/workflows/python-publish.yml:
--------------------------------------------------------------------------------
 1 | # This workflow will upload a Python Package to PyPI when a release is created
 2 | # For more information see: https://docs.github.com/en/actions/automating-builds-and-tests/building-and-testing-python#publishing-to-package-registries
 3 | 
 4 | # This workflow uses actions that are not certified by GitHub.
 5 | # They are provided by a third-party and are governed by
 6 | # separate terms of service, privacy policy, and support
 7 | # documentation.
 8 | 
 9 | name: Upload Python Package
10 | 
11 | on:
12 |   release:
13 |     types: [published]
14 | 
15 | permissions:
16 |   contents: read
17 | 
18 | jobs:
19 |   release-build:
20 |     runs-on: ubuntu-latest
21 | 
22 |     steps:
23 |       - uses: actions/checkout@v4
24 | 
25 |       - uses: actions/setup-python@v5
26 |         with:
27 |           python-version: "3.x"
28 | 
29 |       - name: Build release distributions
30 |         run: |
31 |           # NOTE: put your own distribution build steps here.
32 |           python -m pip install build
33 |           python -m build
34 | 
35 |       - name: Upload distributions
36 |         uses: actions/upload-artifact@v4
37 |         with:
38 |           name: release-dists
39 |           path: dist/
40 | 
41 |   pypi-publish:
42 |     runs-on: ubuntu-latest
43 |     needs:
44 |       - release-build
45 |     permissions:
46 |       # IMPORTANT: this permission is mandatory for trusted publishing
47 |       id-token: write
48 | 
49 |     # Dedicated environments with protections for publishing are strongly recommended.
50 |     # For more information, see: https://docs.github.com/en/actions/deployment/targeting-different-environments/using-environments-for-deployment#deployment-protection-rules
51 |     environment:
52 |       name: pypi
53 |       # OPTIONAL: uncomment and update to include your PyPI project URL in the deployment status:
54 |       # url: https://pypi.org/p/YOURPROJECT
55 |       #
56 |       # ALTERNATIVE: if your GitHub Release name is the PyPI project version string
57 |       # ALTERNATIVE: exactly, uncomment the following line instead:
58 |       # url: https://pypi.org/project/YOURPROJECT/${{ github.event.release.name }}
59 | 
60 |     steps:
61 |       - name: Retrieve release distributions
62 |         uses: actions/download-artifact@v4
63 |         with:
64 |           name: release-dists
65 |           path: dist/
66 | 
67 |       - name: Publish release distributions to PyPI
68 |         uses: pypa/gh-action-pypi-publish@release/v1
69 |         with:
70 |           packages-dir: dist/
71 | 


--------------------------------------------------------------------------------
/Dockerfile:
--------------------------------------------------------------------------------
 1 | # 使用Python 3.11作为基础镜像
 2 | FROM python:3.11-slim
 3 | 
 4 | # 设置工作目录
 5 | WORKDIR /app
 6 | 
 7 | #COPY requirements.txt .
 8 | RUN pip install xiyan-mcp-server
 9 | 
10 | 
11 | # 运行应用
12 | CMD ["python", "-m", "xiyan_mcp_server"]


--------------------------------------------------------------------------------
/LICENSE.txt:
--------------------------------------------------------------------------------
  1 |                                  Apache License
  2 |                            Version 2.0, January 2004
  3 |                         http://www.apache.org/licenses/
  4 | 
  5 |    TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
  6 | 
  7 |    1. Definitions.
  8 | 
  9 |       "License" shall mean the terms and conditions for use, reproduction,
 10 |       and distribution as defined by Sections 1 through 9 of this document.
 11 | 
 12 |       "Licensor" shall mean the copyright owner or entity authorized by
 13 |       the copyright owner that is granting the License.
 14 | 
 15 |       "Legal Entity" shall mean the union of the acting entity and all
 16 |       other entities that control, are controlled by, or are under common
 17 |       control with that entity. For the purposes of this definition,
 18 |       "control" means (i) the power, direct or indirect, to cause the
 19 |       direction or management of such entity, whether by contract or
 20 |       otherwise, or (ii) ownership of fifty percent (50%) or more of the
 21 |       outstanding shares, or (iii) beneficial ownership of such entity.
 22 | 
 23 |       "You" (or "Your") shall mean an individual or Legal Entity
 24 |       exercising permissions granted by this License.
 25 | 
 26 |       "Source" form shall mean the preferred form for making modifications,
 27 |       including but not limited to software source code, documentation
 28 |       source, and configuration files.
 29 | 
 30 |       "Object" form shall mean any form resulting from mechanical
 31 |       transformation or translation of a Source form, including but
 32 |       not limited to compiled object code, generated documentation,
 33 |       and conversions to other media types.
 34 | 
 35 |       "Work" shall mean the work of authorship, whether in Source or
 36 |       Object form, made available under the License, as indicated by a
 37 |       copyright notice that is included in or attached to the work
 38 |       (an example is provided in the Appendix below).
 39 | 
 40 |       "Derivative Works" shall mean any work, whether in Source or Object
 41 |       form, that is based on (or derived from) the Work and for which the
 42 |       editorial revisions, annotations, elaborations, or other modifications
 43 |       represent, as a whole, an original work of authorship. For the purposes
 44 |       of this License, Derivative Works shall not include works that remain
 45 |       separable from, or merely link (or bind by name) to the interfaces of,
 46 |       the Work and Derivative Works thereof.
 47 | 
 48 |       "Contribution" shall mean any work of authorship, including
 49 |       the original version of the Work and any modifications or additions
 50 |       to that Work or Derivative Works thereof, that is intentionally
 51 |       submitted to Licensor for inclusion in the Work by the copyright owner
 52 |       or by an individual or Legal Entity authorized to submit on behalf of
 53 |       the copyright owner. For the purposes of this definition, "submitted"
 54 |       means any form of electronic, verbal, or written communication sent
 55 |       to the Licensor or its representatives, including but not limited to
 56 |       communication on electronic mailing lists, source code control systems,
 57 |       and issue tracking systems that are managed by, or on behalf of, the
 58 |       Licensor for the purpose of discussing and improving the Work, but
 59 |       excluding communication that is conspicuously marked or otherwise
 60 |       designated in writing by the copyright owner as "Not a Contribution."
 61 | 
 62 |       "Contributor" shall mean Licensor and any individual or Legal Entity
 63 |       on behalf of whom a Contribution has been received by Licensor and
 64 |       subsequently incorporated within the Work.
 65 | 
 66 |    2. Grant of Copyright License. Subject to the terms and conditions of
 67 |       this License, each Contributor hereby grants to You a perpetual,
 68 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 69 |       copyright license to reproduce, prepare Derivative Works of,
 70 |       publicly display, publicly perform, sublicense, and distribute the
 71 |       Work and such Derivative Works in Source or Object form.
 72 | 
 73 |    3. Grant of Patent License. Subject to the terms and conditions of
 74 |       this License, each Contributor hereby grants to You a perpetual,
 75 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 76 |       (except as stated in this section) patent license to make, have made,
 77 |       use, offer to sell, sell, import, and otherwise transfer the Work,
 78 |       where such license applies only to those patent claims licensable
 79 |       by such Contributor that are necessarily infringed by their
 80 |       Contribution(s) alone or by combination of their Contribution(s)
 81 |       with the Work to which such Contribution(s) was submitted. If You
 82 |       institute patent litigation against any entity (including a
 83 |       cross-claim or counterclaim in a lawsuit) alleging that the Work
 84 |       or a Contribution incorporated within the Work constitutes direct
 85 |       or contributory patent infringement, then any patent licenses
 86 |       granted to You under this License for that Work shall terminate
 87 |       as of the date such litigation is filed.
 88 | 
 89 |    4. Redistribution. You may reproduce and distribute copies of the
 90 |       Work or Derivative Works thereof in any medium, with or without
 91 |       modifications, and in Source or Object form, provided that You
 92 |       meet the following conditions:
 93 | 
 94 |       (a) You must give any other recipients of the Work or
 95 |           Derivative Works a copy of this License; and
 96 | 
 97 |       (b) You must cause any modified files to carry prominent notices
 98 |           stating that You changed the files; and
 99 | 
100 |       (c) You must retain, in the Source form of any Derivative Works
101 |           that You distribute, all copyright, patent, trademark, and
102 |           attribution notices from the Source form of the Work,
103 |           excluding those notices that do not pertain to any part of
104 |           the Derivative Works; and
105 | 
106 |       (d) If the Work includes a "NOTICE" text file as part of its
107 |           distribution, then any Derivative Works that You distribute must
108 |           include a readable copy of the attribution notices contained
109 |           within such NOTICE file, excluding those notices that do not
110 |           pertain to any part of the Derivative Works, in at least one
111 |           of the following places: within a NOTICE text file distributed
112 |           as part of the Derivative Works; within the Source form or
113 |           documentation, if provided along with the Derivative Works; or,
114 |           within a display generated by the Derivative Works, if and
115 |           wherever such third-party notices normally appear. The contents
116 |           of the NOTICE file are for informational purposes only and
117 |           do not modify the License. You may add Your own attribution
118 |           notices within Derivative Works that You distribute, alongside
119 |           or as an addendum to the NOTICE text from the Work, provided
120 |           that such additional attribution notices cannot be construed
121 |           as modifying the License.
122 | 
123 |       You may add Your own copyright statement to Your modifications and
124 |       may provide additional or different license terms and conditions
125 |       for use, reproduction, or distribution of Your modifications, or
126 |       for any such Derivative Works as a whole, provided Your use,
127 |       reproduction, and distribution of the Work otherwise complies with
128 |       the conditions stated in this License.
129 | 
130 |    5. Submission of Contributions. Unless You explicitly state otherwise,
131 |       any Contribution intentionally submitted for inclusion in the Work
132 |       by You to the Licensor shall be under the terms and conditions of
133 |       this License, without any additional terms or conditions.
134 |       Notwithstanding the above, nothing herein shall supersede or modify
135 |       the terms of any separate license agreement you may have executed
136 |       with Licensor regarding such Contributions.
137 | 
138 |    6. Trademarks. This License does not grant permission to use the trade
139 |       names, trademarks, service marks, or product names of the Licensor,
140 |       except as required for reasonable and customary use in describing the
141 |       origin of the Work and reproducing the content of the NOTICE file.
142 | 
143 |    7. Disclaimer of Warranty. Unless required by applicable law or
144 |       agreed to in writing, Licensor provides the Work (and each
145 |       Contributor provides its Contributions) on an "AS IS" BASIS,
146 |       WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 |       implied, including, without limitation, any warranties or conditions
148 |       of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 |       PARTICULAR PURPOSE. You are solely responsible for determining the
150 |       appropriateness of using or redistributing the Work and assume any
151 |       risks associated with Your exercise of permissions under this License.
152 | 
153 |    8. Limitation of Liability. In no event and under no legal theory,
154 |       whether in tort (including negligence), contract, or otherwise,
155 |       unless required by applicable law (such as deliberate and grossly
156 |       negligent acts) or agreed to in writing, shall any Contributor be
157 |       liable to You for damages, including any direct, indirect, special,
158 |       incidental, or consequential damages of any character arising as a
159 |       result of this License or out of the use or inability to use the
160 |       Work (including but not limited to damages for loss of goodwill,
161 |       work stoppage, computer failure or malfunction, or any and all
162 |       other commercial damages or losses), even if such Contributor
163 |       has been advised of the possibility of such damages.
164 | 
165 |    9. Accepting Warranty or Additional Liability. While redistributing
166 |       the Work or Derivative Works thereof, You may choose to offer,
167 |       and charge a fee for, acceptance of support, warranty, indemnity,
168 |       or other liability obligations and/or rights consistent with this
169 |       License. However, in accepting such obligations, You may act only
170 |       on Your own behalf and on Your sole responsibility, not on behalf
171 |       of any other Contributor, and only if You agree to indemnify,
172 |       defend, and hold each Contributor harmless for any liability
173 |       incurred by, or claims asserted against, such Contributor by reason
174 |       of your accepting any such warranty or additional liability.
175 | 
176 |    END OF TERMS AND CONDITIONS
177 | 
178 |    APPENDIX: How to apply the Apache License to your work.
179 | 
180 |       To apply the Apache License to your work, attach the following
181 |       boilerplate notice, with the fields enclosed by brackets "[]"
182 |       replaced with your own identifying information. (Don't include
183 |       the brackets!)  The text should be enclosed in the appropriate
184 |       comment syntax for the file format. We also recommend that a
185 |       file or class name and description of purpose be included on the
186 |       same "printed page" as the copyright notice for easier
187 |       identification within third-party archives.
188 | 
189 |    Copyright [yyyy] [name of copyright owner]
190 | 
191 |    Licensed under the Apache License, Version 2.0 (the "License");
192 |    you may not use this file except in compliance with the License.
193 |    You may obtain a copy of the License at
194 | 
195 |        http://www.apache.org/licenses/LICENSE-2.0
196 | 
197 |    Unless required by applicable law or agreed to in writing, software
198 |    distributed under the License is distributed on an "AS IS" BASIS,
199 |    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200 |    See the License for the specific language governing permissions and
201 |    limitations under the License.
202 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | 
  2 | <h1 align="center">XiYan MCP Server</h1>
  3 | <p align="center">
  4 |   <a href="https://github.com/XGenerationLab/XiYan-SQL"><img alt="MCP Playwright" src="https://raw.githubusercontent.com/XGenerationLab/XiYan-SQL/main/xiyanGBI.png" height="60"/></a>
  5 | </p>
  6 | <p align="center">
  7 |   <b>A Model Context Protocol (MCP) server that enables natural language queries to databases</b><br/>
  8 |   <sub>powered by <a href="https://github.com/XGenerationLab/XiYan-SQL" >XiYan-SQL</a>, SOTA of text-to-sql on open benchmarks</sub>
  9 | </p>
 10 | <p align="center">
 11 | 💻 <a href="https://github.com/XGenerationLab/xiyan_mcp_server" >XiYan-mcp-server</a> | 
 12 | 🌐 <a href="https://github.com/XGenerationLab/XiYan-SQL" >XiYan-SQL</a> |
 13 | 📖 <a href="https://arxiv.org/abs/2411.08599"> Arxiv</a> | 
 14 | 📄 <a href="https://paperswithcode.com/paper/xiyan-sql-a-multi-generator-ensemble" >PapersWithCode</a>
 15 | 🤗 <a href="https://huggingface.co/collections/XGenerationLab/xiyansql-models-67c9844307b49f87436808fc">HuggingFace</a> |
 16 | 🤖 <a href="https://modelscope.cn/collections/XiYanSQL-Models-4483337b614241" >ModelScope</a> |
 17 | 🌕 <a href="https://bailian.console.aliyun.com/xiyan">析言GBI</a> 
 18 | <br />
 19 | <img src="https://badge.mcpx.dev/?type=server%20%27MCP%20Server%27" alt="MCP Server" />
 20 | <a href="https://arxiv.org/abs/2411.08599"><img src="imgs/Paper-Arxiv-orange.svg" ></a>
 21 | <a href="https://opensource.org/licenses/Apache-2.0">
 22 |   <img src="https://img.shields.io/badge/License-Apache%202.0-blue.svg" alt="License: Apache 2.0" />
 23 | </a>
 24 | <a href="https://pepy.tech/projects/xiyan-mcp-server"><img src="https://static.pepy.tech/badge/xiyan-mcp-server" alt="PyPI Downloads"></a>
 25 |   <a href="https://smithery.ai/server/@XGenerationLab/xiyan_mcp_server"><img alt="Smithery Installs" src="https://smithery.ai/badge/@XGenerationLab/xiyan_mcp_server" height="20"/></a>
 26 | <a href="https://github.com/XGenerationLab/xiyan_mcp_server" target="_blank">
 27 |     <img src="https://img.shields.io/github/stars/XGenerationLab/xiyan_mcp_server?style=social" alt="GitHub stars" />
 28 | </a>
 29 | <br />
 30 | <a href="https://github.com/XGenerationLab/xiyan_mcp_server" >English</a> | <a href="https://github.com/XGenerationLab/xiyan_mcp_server/blob/main/README_zh.md"> 中文 </a> | <a href="https://github.com/XGenerationLab/xiyan_mcp_server/blob/main/README_ja.md"> 日本語 </a><br />
 31 | <a href="https://github.com/XGenerationLab/xiyan_mcp_server/blob/main/imgs/dinggroup_out.png">Ding Group钉钉群</a>｜ 
 32 | <a href="https://weibo.com/u/2540915670" target="_blank">Follow me on Weibo</a>
 33 | </p>
 34 | 
 35 | 
 36 | ## Table of Contents
 37 | 
 38 | - [Features](#features)
 39 | - [Preview](#preview)
 40 |   - [Architecture](#architecture)
 41 |   - [Best Practice](#best-practice)
 42 |   - [Tools Preview](#tools-preview)
 43 | - [Installation](#installation)
 44 |   - [Installing from pip](#installing-from-pip)
 45 |   - [Installing from Smithery.ai](#installing-from-smitheryai)
 46 | - [Configuration](#configuration)
 47 |   - [LLM Configuration](#llm-configuration)
 48 |     - [General LLMs](#general-llms)
 49 |     - [Text-to-SQL SOTA model](#text-to-sql-sota-model)
 50 |     - [Local Model](#local-model)
 51 |   - [Database Configuration](#database-configuration)
 52 |     - [MySQL](#mysql)
 53 |     - [PostgreSQL](#postgresql)
 54 | - [Launch](#launch)
 55 |   - [Claude Desktop](#claude-desktop)
 56 |   - [Cline](#cline)
 57 |   - [Goose](#goose)
 58 |   - [Cursor](#cursor)
 59 | - [It Does Not Work](#it-does-not-work)
 60 | - [Citation](#citation)
 61 | 
 62 | 
 63 | ## Features
 64 | - 🌐 Fetch data by natural language through [XiYanSQL](https://github.com/XGenerationLab/XiYan-SQL)
 65 | - 🤖 Support general LLMs (GPT,qwenmax), Text-to-SQL SOTA model
 66 | - 💻 Support pure local mode (high security!)
 67 | - 📝 Support MySQL and PostgreSQL. 
 68 | - 🖱️ List available tables as resources
 69 | - 🔧 Read table contents
 70 | 
 71 | ## Preview
 72 | ### Architecture
 73 | There are two ways to integrate this server in your project, as shown below:
 74 | The left is remote mode, which is the default mode. It requires an API key to access the xiyanSQL-qwencoder-32B model from service provider (see [Configuration](#Configuration)).
 75 | Another mode is local mode, which is more secure. It does not require the API key.
 76 | 
 77 | ![architecture.png](imgs/architecture.png)
 78 | ### Best practice and reports
 79 | 
 80 | ["Build a local data assistant using MCP + Modelscope API-Inference without writing a single line of code"](https://mp.weixin.qq.com/s/tzDelu0W4w6t9C0_yYRbHA)
 81 | 
 82 | ["Xiyan MCP on Modelscope"](https://modelscope.cn/headlines/article/1142)
 83 | 
 84 | ### Evaluation on MCPBench
 85 | The following figure illustrates the performance of the XiYan MCP server as measured by the MCPBench benchmark. The XiYan MCP server demonstrates superior performance compared to both the MySQL MCP server and the PostgreSQL MCP server, achieving a lead of 2-22 percentage points. The detailed experiment results can be found at [MCPBench](https://github.com/modelscope/MCPBench) and the report ["Evaluation Report on MCP Servers"](https://arxiv.org/abs/2504.11094).
 86 | 
 87 | ![exp_mcpbench.png](imgs/exp_mcpbench.png)
 88 | 
 89 | ### Tools Preview
 90 |  - The tool ``get_data`` provides a natural language interface for retrieving data from a database. This server will convert the input natural language into SQL using a built-in model and call the database to return the query results.
 91 | 
 92 |  - The ``{dialect}://{table_name}`` resource allows obtaining a portion of sample data from the database for model reference when a specific table_name is specified. 
 93 | - The ``{dialect}://`` resource will list the names of the current databases
 94 | 
 95 | ## Installation
 96 | ### Installing from pip
 97 | 
 98 | Python 3.11+ is required. 
 99 | You can install the server through pip, and it will install the latest version:
100 | 
101 | ```shell
102 | pip install xiyan-mcp-server
103 | ```
104 | 
105 | If you want to install the development version from source, you can install from source code on github:
106 | ```shell
107 | pip install git+https://github.com/XGenerationLab/xiyan_mcp_server.git
108 | ```
109 | 
110 | ### Installing from Smithery.ai
111 | See [@XGenerationLab/xiyan_mcp_server](https://smithery.ai/server/@XGenerationLab/xiyan_mcp_server)
112 | 
113 | Not fully tested.
114 | 
115 | ## Configuration
116 | 
117 | You need a YAML config file to configure the server.
118 | A default config file is provided in config_demo.yml which looks like this:
119 | 
120 | ```yaml
121 | mcp:
122 |   transport: "stdio"
123 | model:
124 |   name: "XGenerationLab/XiYanSQL-QwenCoder-32B-2412"
125 |   key: ""
126 |   url: "https://api-inference.modelscope.cn/v1/"
127 | database:
128 |   host: "localhost"
129 |   port: 3306
130 |   user: "root"
131 |   password: ""
132 |   database: ""
133 | ```
134 | 
135 | ### MCP Configuration
136 | You can set the transport protocol to ``stdio`` or ``sse``.
137 | #### STDIO
138 | For stdio protocol, you can set just like this:
139 | ```yaml
140 | mcp:
141 |   transport: "stdio"
142 | ```
143 | #### SSE
144 | For sse protocol, you can set mcp config as below:
145 | ```yaml
146 | mcp:
147 |   transport: "sse"
148 |   port: 8000
149 |   log_level: "INFO"
150 | ```
151 | The default port is `8000`. You can change the port if needed. 
152 | The default log level is `ERROR`. We recommend to set log level to `INFO` for more detailed information.
153 | 
154 | Other configurations like `debug`, `host`, `sse_path`, `message_path` can be customized as well, but normally you don't need to modify them.
155 | 
156 | ### LLM Configuration
157 | ``Name`` is the name of the model to use, ``key`` is the API key of the model, ``url`` is the API url of the model. We support following models.
158 | 
159 | | versions | general LLMs(GPT,qwenmax)                                             | SOTA model by Modelscope                   | SOTA model by Dashscope                                   | Local LLMs            |
160 | |----------|-------------------------------|--------------------------------------------|-----------------------------------------------------------|-----------------------|
161 | | description| basic, easy to use | best performance, stable, recommand        | best performance, for trial                               | slow, high-security   |
162 | | name     | the official model name (e.g. gpt-3.5-turbo,qwen-max)                 | XGenerationLab/XiYanSQL-QwenCoder-32B-2412 | xiyansql-qwencoder-32b                                    | xiyansql-qwencoder-3b |
163 | | key      | the API key of the service provider (e.g. OpenAI, Alibaba Cloud)      | the API key of modelscope                  | the API key via email                                     | ""                    |
164 | | url      | the endpoint of the service provider (e.g."https://api.openai.com/v1") | https://api-inference.modelscope.cn/v1/    | https://xiyan-stream.biz.aliyun.com/service/api/xiyan-sql | http://localhost:5090 |
165 | 
166 | #### General LLMs
167 | If you want to use the general LLMs, e.g. gpt3.5, you can directly config like this:
168 | ```yaml
169 | model:
170 |   name: "gpt-3.5-turbo"
171 |   key: "YOUR KEY "
172 |   url: "https://api.openai.com/v1"
173 | database:
174 | ```
175 | 
176 | If you want to use Qwen from Alibaba, e.g. Qwen-max, you can use following config:
177 | ```yaml
178 | model:
179 |   name: "qwen-max"
180 |   key: "YOUR KEY "
181 |   url: "https://dashscope.aliyuncs.com/compatible-mode/v1"
182 | database:
183 | ```
184 | #### Text-to-SQL SOTA model
185 | We recommend the XiYanSQL-qwencoder-32B (https://github.com/XGenerationLab/XiYanSQL-QwenCoder), which is the SOTA model in text-to-sql, see [Bird benchmark](https://bird-bench.github.io/).
186 | There are two ways to use the model. You can use either of them.
187 | (1) [Modelscope](https://www.modelscope.cn/models/XGenerationLab/XiYanSQL-QwenCoder-32B-2412),  (2) Alibaba Cloud DashScope.
188 | 
189 | 
190 | ##### (1) Modelscope version
191 | You need to apply a ``key`` of API-inference from Modelscope, https://www.modelscope.cn/docs/model-service/API-Inference/intro
192 | Then you can use the following config:
193 | ```yaml
194 | model:
195 |   name: "XGenerationLab/XiYanSQL-QwenCoder-32B-2412"
196 |   key: ""
197 |   url: "https://api-inference.modelscope.cn/v1/"
198 | ```
199 | 
200 | Read our [model description](https://www.modelscope.cn/models/XGenerationLab/XiYanSQL-QwenCoder-32B-2412) for more details. 
201 | 
202 | ##### (2) Dashscope version
203 | 
204 | We deployed the model on Alibaba Cloud DashScope, so you need to set the following environment variables:
205 | Send me your email to get the ``key``. ( godot.lzl@alibaba-inc.com )
206 | In the email, please attach the following information:
207 | ```yaml
208 | name: "YOUR NAME",
209 | email: "YOUR EMAIL",
210 | organization: "your college or Company or Organization"
211 | ```
212 | We will send you a ``key`` according to your email. And you can fill the ``key`` in the yml file.
213 | The ``key`` will be expired by  1 month or 200 queries or other legal restrictions.
214 | 
215 | 
216 | ```yaml
217 | model:
218 |   name: "xiyansql-qwencoder-32b"
219 |   key: "KEY"
220 |   url: "https://xiyan-stream.biz.aliyun.com/service/api/xiyan-sql"
221 | ```
222 | 
223 | Note: this model service is just for trial, if you need to use it in production, please contact us.
224 | 
225 | ##### (3) Local version
226 | Alternatively, you can also deploy the model [XiYanSQL-qwencoder-32B](https://github.com/XGenerationLab/XiYanSQL-QwenCoder) on your own server.
227 | See [Local Model](src/xiyan_mcp_server/local_model/README.md) for more details.
228 | 
229 | 
230 | ### Database Configuration
231 | ``host``, ``port``, ``user``, ``password``, ``database`` are the connection information of the database.
232 | 
233 | You can use local or any remote databases. Now we support MySQL and PostgreSQL(more dialects soon).
234 | 
235 | #### MySQL
236 | 
237 | ```yaml
238 | database:
239 |   host: "localhost"
240 |   port: 3306
241 |   user: "root"
242 |   password: ""
243 |   database: ""
244 | ```
245 | #### PostgreSQL
246 | Step 1: Install Python packages
247 | ```bash
248 | pip install psycopg2
249 | ```
250 | Step 2: prepare the config.yml like this:
251 | ```yaml
252 | database:
253 |   dialect: "postgresql"
254 |   host: "localhost"
255 |   port: 5432
256 |   user: ""
257 |   password: ""
258 |   database: ""
259 | ```
260 | 
261 | Note that ``dialect`` should be ``postgresql`` for postgresql.
262 | ## Launch
263 | 
264 | ### Server Launch
265 | 
266 | If you want to launch server with `sse`, you have to run the following command in a terminal:
267 | ```shell
268 | YML=path/to/yml python -m xiyan_mcp_server
269 | ```
270 | Then you should see the information on http://localhost:8000/sse in your browser. (Defaultly, change if your mcp server runs on other host/port)
271 | 
272 | Otherwise, if you use `stdio` transport protocol, you usually declare the mcp server command in specific mcp application instead of launching it in a terminal.
273 | However, you can still debug with this command if needed.
274 | 
275 | ### Client Setting
276 | 
277 | #### Claude Desktop
278 | Add this in your Claude Desktop config file, ref <a href="https://github.com/XGenerationLab/xiyan_mcp_server/blob/main/imgs/claude_desktop.jpg">Claude Desktop config example</a>
279 | ```json
280 | {
281 |     "mcpServers": {
282 |         "xiyan-mcp-server": {
283 |             "command": "/xxx/python",
284 |             "args": [
285 |                 "-m",
286 |                 "xiyan_mcp_server"
287 |             ],
288 |             "env": {
289 |                 "YML": "PATH/TO/YML"
290 |             }
291 |         }
292 |     }
293 | }
294 | ```
295 | **Please note that the Python command here requires the complete path to the Python executable (`/xxx/python`); otherwise, the Python interpreter cannot be found. You can determine this path by using the command `which python`. The same applies to other applications as well.**
296 | 
297 | Claude Desktop currently does not support the SSE transport protocol.
298 | 
299 | #### Cline
300 | Prepare the config like [Claude Desktop](#claude-desktop)
301 | 
302 | #### Goose
303 | If you use `stdio`, add following command in the config, ref <a href="https://github.com/XGenerationLab/xiyan_mcp_server/blob/main/imgs/goose.jpg">Goose config example</a>
304 | ```shell
305 | env YML=path/to/yml /xxx/python -m xiyan_mcp_server
306 | ```
307 | Otherwise, if you use `sse`, change Type to `SSE` and set the endpoint to `http://127.0.0.1:8000/sse`
308 | #### Cursor
309 | Use the similar command as follows.
310 | 
311 | For `stdio`:
312 | ```json
313 | {
314 |   "mcpServers": {
315 |     "xiyan-mcp-server": {
316 |       "command": "/xxx/python",
317 |       "args": [
318 |         "-m",
319 |         "xiyan_mcp_server"
320 |       ],
321 |       "env": {
322 |         "YML": "path/to/yml"
323 |       }
324 |     }
325 |   }
326 | }
327 | ```
328 | For `sse`:
329 | ```json
330 | {
331 |   "mcpServers": {
332 |     "xiyan_mcp_server_1": {
333 |       "url": "http://localhost:8000/sse"
334 |     }
335 |   }
336 | }
337 | ```
338 | 
339 | 
340 | #### Witsy
341 | Add following in command:
342 | ```shell
343 | /xxx/python -m xiyan_mcp_server
344 | ```
345 | Add an env: key is YML and value is the path to your yml file.
346 | Ref <a href="https://github.com/XGenerationLab/xiyan_mcp_server/blob/main/imgs/witsy.jpg">Witsy config example</a>
347 | 
348 | 
349 | ## It Does Not Work!
350 | Contact us:
351 | <a href="https://github.com/XGenerationLab/xiyan_mcp_server/blob/main/imgs/dinggroup_out.png">Ding Group钉钉群</a>｜ 
352 | <a href="https://weibo.com/u/2540915670" target="_blank">Follow me on Weibo</a>
353 | 
354 | 
355 | ## Other Related Links
356 | 
357 | [![MseeP.ai Security Assessment Badge](https://mseep.net/pr/xgenerationlab-xiyan-mcp-server-badge.png)](https://mseep.ai/app/xgenerationlab-xiyan-mcp-server)
358 | 
359 | 
360 | 
361 | 
362 | ## Citation
363 | If you find our work helpful, feel free to give us a cite.
364 | ```bib
365 | @article{xiyansql,
366 |       title={A Preview of XiYan-SQL: A Multi-Generator Ensemble Framework for Text-to-SQL}, 
367 |       author={Yingqi Gao and Yifu Liu and Xiaoxia Li and Xiaorong Shi and Yin Zhu and Yiming Wang and Shiqi Li and Wei Li and Yuntao Hong and Zhiling Luo and Jinyang Gao and Liyu Mou and Yu Li},
368 |       year={2024},
369 |       journal={arXiv preprint arXiv:2411.08599},
370 |       url={https://arxiv.org/abs/2411.08599},
371 |       primaryClass={cs.AI}
372 | }
373 | ```
374 | 


--------------------------------------------------------------------------------
/README_ja.md:
--------------------------------------------------------------------------------
  1 | # XiYan MCP サーバー
  2 | 
  3 | <p align="center">
  4 |   <a href="https://github.com/XGenerationLab/XiYan-SQL"><img alt="MCP Playwright" src="https://raw.githubusercontent.com/XGenerationLab/XiYan-SQL/main/xiyanGBI.png" height="60"/></a>
  5 | </p>
  6 | <p align="center">
  7 |   <b>自然言語でデータベースにクエリを実行できるモデルコンテキストプロトコル（MCP）サーバー</b><br/>
  8 |   <sub>オープンベンチマークでのテキストからSQLへのSOTAを実現する<a href="https://github.com/XGenerationLab/XiYan-SQL">XiYan-SQL</a>によって提供されています</sub>
  9 | </p>
 10 | 
 11 | <p align="center">
 12 | 💻 <a href="https://github.com/XGenerationLab/xiyan_mcp_server">XiYan-mcp-server</a> | 
 13 | 🌐 <a href="https://github.com/XGenerationLab/XiYan-SQL">XiYan-SQL</a> |
 14 | 📖 <a href="https://arxiv.org/abs/2411.08599">Arxiv</a> | 
 15 | 📄 <a href="https://paperswithcode.com/paper/xiyan-sql-a-multi-generator-ensemble">PapersWithCode</a>
 16 | 💻 <a href="https://huggingface.co/collections/XGenerationLab/xiyansql-models-67c9844307b49f87436808fc">HuggingFace</a> |
 17 | 🤖 <a href="https://modelscope.cn/collections/XiYanSQL-Models-4483337b614241">ModelScope</a> |
 18 | 🌕 <a href="https://bailian.console.aliyun.com/xiyan">析言GBI</a> 
 19 | <br />
 20 | <img src="https://badge.mcpx.dev/?type=server%20%27MCP%20Server%27" alt="MCP Server" />
 21 | <a href="https://arxiv.org/abs/2411.08599"><img src="imgs/Paper-Arxiv-orange.svg"></a>
 22 | <a href="https://opensource.org/licenses/Apache-2.0">
 23 |   <img src="https://img.shields.io/badge/License-Apache%202.0-blue.svg" alt="License: Apache 2.0" />
 24 | </a>
 25 | <a href="https://pepy.tech/projects/xiyan-mcp-server"><img src="https://static.pepy.tech/badge/xiyan-mcp-server" alt="PyPI Downloads"></a>
 26 |   <a href="https://smithery.ai/server/@XGenerationLab/xiyan_mcp_server"><img alt="Smithery Installs" src="https://smithery.ai/badge/@XGenerationLab/xiyan_mcp_server" height="20"/></a>
 27 | <a href="https://github.com/XGenerationLab/xiyan_mcp_server" target="_blank">
 28 |     <img src="https://img.shields.io/github/stars/XGenerationLab/xiyan_mcp_server?style=social" alt="GitHub stars" />
 29 | </a>
 30 | <br />
 31 | <a href="https://github.com/XGenerationLab/xiyan_mcp_server">英語</a> | <a href="https://github.com/XGenerationLab/xiyan_mcp_server/blob/main/README_zh.md">中国語</a><br />
 32 | <a href="https://github.com/XGenerationLab/xiyan_mcp_server/blob/main/imgs/dinggroup_out.png">Ding Group</a> | 
 33 | <a href="https://weibo.com/u/2540915670" target="_blank">Weiboでフォロー</a>
 34 | </p>
 35 | 
 36 | ## 目次
 37 | 
 38 | - [特徴](#特徴)
 39 | - [プレビュー](#プレビュー)
 40 |   - [アーキテクチャ](#アーキテクチャ)
 41 |   - [ベストプラクティス](#ベストプラクティス)
 42 |   - [ツールプレビュー](#ツールプレビュー)
 43 | - [インストール](#インストール)
 44 |   - [pipからのインストール](#pipからのインストール)
 45 |   - [Smithery.aiからのインストール](#smitheryaiからのインストール)
 46 | - [設定](#設定)
 47 |   - [LLM設定](#llm設定)
 48 |     - [一般的なLLM](#一般的なllm)
 49 |     - [テキストからSQLへのSOTAモデル](#テキストからsqlへのsotaモデル)
 50 |     - [ローカルモデル](#ローカルモデル)
 51 |   - [データベース設定](#データベース設定)
 52 |     - [MySQL](#mysql)
 53 |     - [PostgreSQL](#postgresql)
 54 | - [起動](#起動)
 55 |   - [Claude Desktop](#claude-desktop)
 56 |   - [Cline](#cline)
 57 |   - [Goose](#goose)
 58 |   - [Cursor](#cursor)
 59 | - [動作しない場合](#動作しない場合)
 60 | - [引用](#引用)
 61 | 
 62 | ## 特徴
 63 | - 🌐 [XiYanSQL](https://github.com/XGenerationLab/XiYan-SQL)を通じて自然言語でデータを取得
 64 | - 🤖 一般的なLLM（GPT, qwenmax）、テキストからSQLへのSOTAモデルをサポート
 65 | - 💻 純粋なローカルモードをサポート（高セキュリティ！）
 66 | - 📝 MySQLとPostgreSQLをサポート
 67 | - 🖱️ 利用可能なテーブルをリソースとしてリスト
 68 | - 🔧 テーブル内容を読み取る
 69 | 
 70 | ## プレビュー
 71 | ### アーキテクチャ
 72 | このサーバーをプロジェクトに統合する方法は2つあります。以下の図に示されています：
 73 | 左側はリモートモードで、デフォルトモードです。サービスプロバイダーからxiyanSQL-qwencoder-32BモデルにアクセスするためにAPIキーが必要です（[設定](#設定)を参照）。
 74 | もう一つのモードはローカルモードで、より安全です。APIキーは必要ありません。
 75 | 
 76 | ![architecture.png](imgs/architecture.png)
 77 | 
 78 | ### ベストプラクティスとレポート
 79 | 
 80 | ["MCP + Modelscope API-Inferenceを使用して、コードを一行も書かずにローカルデータアシスタントを構築する"](https://mp.weixin.qq.com/s/tzDelu0W4w6t9C0_yYRbHA)
 81 | 
 82 | ["Modelscope上のXiyan MCP"](https://modelscope.cn/headlines/article/1142)
 83 | 
 84 | ### MCPBenchでの評価
 85 | 以下の図は、MCPBenchベンチマークで測定されたXiYan MCPサーバーのパフォーマンスを示しています。XiYan MCPサーバーは、MySQL MCPサーバーおよびPostgreSQL MCPサーバーと比較して優れたパフォーマンスを示し、2〜22パーセントポイントのリードを達成しています。詳細な実験結果は[MCPBench](https://github.com/modelscope/MCPBench)およびレポート["MCPサーバーの評価レポート"](https://arxiv.org/abs/2504.11094)で確認できます。
 86 | 
 87 | ![exp_mcpbench.png](imgs/exp_mcpbench.png)
 88 | 
 89 | ### ツールプレビュー
 90 |  - ツール``get_data``は、データベースからデータを取得するための自然言語インターフェースを提供します。このサーバーは、入力された自然言語をSQLに変換し、データベースを呼び出してクエリ結果を返します。
 91 | 
 92 |  - ``{dialect}://{table_name}``リソースは、特定のtable_nameを指定した場合に、データベースからモデル参照用のサンプルデータの一部を取得することを許可します。
 93 | - ``{dialect}://``リソースは、現在のデータベースの名前をリストします。
 94 | 
 95 | ## インストール
 96 | ### pipからのインストール
 97 | 
 98 | Python 3.11以上が必要です。
 99 | pipを通じてサーバーをインストールできます。最新バージョンがインストールされます：
100 | 
101 | ```bash
102 | pip install xiyan-mcp-server
103 | ```
104 | 
105 | その後、以下のコマンドでサーバーを直接実行できます：
106 | ```bash
107 | python -m xiyan_mcp_server
108 | ```
109 | ただし、以下の設定を完了するまで機能は提供されません。
110 | ymlファイルを取得します。その後、以下の方法でサーバーを実行できます：
111 | ```yaml
112 | env YML=path/to/yml python -m xiyan_mcp_server
113 | ```
114 | 
115 | ### Smithery.aiからのインストール
116 | [@XGenerationLab/xiyan_mcp_server](https://smithery.ai/server/@XGenerationLab/xiyan_mcp_server)を参照
117 | 
118 | 完全にはテストされていません。
119 | 
120 | ## 設定
121 | 
122 | サーバーを設定するには、YAML設定ファイルが必要です。
123 | デフォルトの設定ファイルconfig_demo.ymlが提供されています。内容は以下の通りです：
124 | 
125 | ```yaml
126 | model:
127 |   name: "XGenerationLab/XiYanSQL-QwenCoder-32B-2412"
128 |   key: ""
129 |   url: "https://api-inference.modelscope.cn/v1/"
130 | 
131 | database:
132 |   host: "localhost"
133 |   port: 3306
134 |   user: "root"
135 |   password: ""
136 |   database: ""
137 | ```
138 | 
139 | ### LLM設定
140 | ``Name``は使用するモデルの名前、``key``はモデルのAPIキー、``url``はモデルのAPIアドレスです。以下のモデルをサポートしています。
141 | 
142 | | バージョン | 一般的なLLM（GPT, qwenmax） | ModelscopeによるSOTAモデル | DashscopeによるSOTAモデル | ローカルLLM |
143 | |----------|-------------------------------|-----------------------------|----------------------------|----------------|
144 | | 説明     | 基本的で使いやすい           | 最高のパフォーマンス、安定、推奨 | 最高のパフォーマンス、試用用 | 遅い、高セキュリティ |
145 | | 名前     | 公式モデル名（例：gpt-3.5-turbo, qwen-max） | XGenerationLab/XiYanSQL-QwenCoder-32B-2412 | xiyansql-qwencoder-32b | xiyansql-qwencoder-3b |
146 | | キー     | サービスプロバイダーのAPIキー（例：OpenAI, Alibaba Cloud） | ModelscopeのAPIキー | メールで取得するAPIキー | "" |
147 | | URL      | サービスプロバイダーのエンドポイント（例："https://api.openai.com/v1"） | https://api-inference.modelscope.cn/v1/ | https://xiyan-stream.biz.aliyun.com/service/api/xiyan-sql | http://localhost:5090 |
148 | 
149 | #### 一般的なLLM
150 | 一般的なLLMを使用する場合、例：gpt3.5、以下のように設定できます：
151 | ```yaml
152 | model:
153 |   name: "gpt-3.5-turbo"
154 |   key: "YOUR KEY"
155 |   url: "https://api.openai.com/v1"
156 | database:
157 | ```
158 | 
159 | AlibabaのQwenを使用する場合、例：Qwen-max、以下の設定を使用できます：
160 | ```yaml
161 | model:
162 |   name: "qwen-max"
163 |   key: "YOUR KEY"
164 |   url: "https://dashscope.aliyuncs.com/compatible-mode/v1"
165 | database:
166 | ```
167 | #### テキストからSQLへのSOTAモデル
168 | テキストからSQLへのSOTAモデルであるXiYanSQL-qwencoder-32B（https://github.com/XGenerationLab/XiYanSQL-QwenCoder）を推奨します。以下の2つの方法でモデルを使用できます。
169 | (1) [Modelscope](https://www.modelscope.cn/models/XGenerationLab/XiYanSQL-QwenCoder-32B-2412)、(2) Alibaba Cloud DashScope。
170 | 
171 | ##### (1) Modelscopeバージョン
172 | ModelscopeからAPI推論の``key``を申請する必要があります。https://www.modelscope.cn/docs/model-service/API-Inference/intro
173 | その後、以下の設定を使用できます：
174 | ```yaml
175 | model:
176 |   name: "XGenerationLab/XiYanSQL-QwenCoder-32B-2412"
177 |   key: ""
178 |   url: "https://api-inference.modelscope.cn/v1/"
179 | ```
180 | 
181 | 詳細については、[モデルの説明](https://www.modelscope.cn/models/XGenerationLab/XiYanSQL-QwenCoder-32B-2412)を参照してください。
182 | 
183 | ##### (2) Dashscopeバージョン
184 | 
185 | Alibaba Cloud DashScopeにモデルをデプロイしましたので、以下の環境変数を設定する必要があります：
186 | ``key``を取得するためにメールを送信してください。（godot.lzl@alibaba-inc.com）
187 | メールには以下の情報を添付してください：
188 | ```yaml
189 | name: "YOUR NAME",
190 | email: "YOUR EMAIL",
191 | organization: "your college or Company or Organization"
192 | ```
193 | メールに基づいて``key``を送信します。ymlファイルに``key``を記入できます。
194 | ``key``は1ヶ月または200クエリまたはその他の法的制限で期限切れになります。
195 | 
196 | ```yaml
197 | model:
198 |   name: "xiyansql-qwencoder-32b"
199 |   key: "KEY"
200 |   url: "https://xiyan-stream.biz.aliyun.com/service/api/xiyan-sql"
201 | database:
202 | ```
203 | 
204 | 注意：このモデルサービスは試用のみです。生産で使用する場合は、私たちに連絡してください。
205 | 
206 | または、モデル[XiYanSQL-qwencoder-32B](https://github.com/XGenerationLab/XiYanSQL-QwenCoder)を自分のサーバーにデプロイすることもできます。
207 | 
208 | #### ローカルモデル
209 | 注意：ローカルモデルは遅いです（私のMacBookではクエリごとに約12秒かかります）。
210 | 安定して高速なサービスが必要な場合は、Modelscopeバージョンを使用することをお勧めします。
211 | 
212 | ローカルモードでxiyan_mcp_serverを実行するには、以下が必要です：
213 | 1）少なくとも16GBのRAMを持つPC/Mac
214 | 2）6GBのディスクスペース
215 | 
216 | ステップ1：追加のPythonパッケージをインストール
217 | ```bash
218 | pip install flask modelscope torch==2.2.2 accelerate>=0.26.0 numpy=2.2.3
219 | ```
220 | 
221 | ステップ2：（オプション）モデルを手動でダウンロード
222 | [xiyansql-qwencoder-3b](https://www.modelscope.cn/models/XGenerationLab/XiYanSQL-QwenCoder-3B-2502/)をお勧めします。
223 | 以下のコマンドでモデルを手動でダウンロードできます：
224 | ```bash
225 | modelscope download --model XGenerationLab/XiYanSQL-QwenCoder-3B-2502
226 | ```
227 | 6GBのディスクスペースが必要です。
228 | 
229 | ステップ3：スクリプトをダウンロードしてサーバーを実行します。ファイルsrc/xiyan_mcp_server/local_xiyan_server.py
230 | 
231 | ```bash
232 | python local_xiyan_server.py
233 | ```
234 | サーバーはhttp://localhost:5090/で実行されます。
235 | 
236 | ステップ4：設定を準備してxiyan_mcp_serverを実行
237 | config.ymlは以下のようになります：
238 | ```yaml
239 | model:
240 |   name: "xiyansql-qwencoder-3b"
241 |   key: "KEY"
242 |   url: "http://127.0.0.1:5090"
243 | ```
244 | 
245 | これでローカルモードの準備が整いました。
246 | 
247 | ### データベース設定
248 | ``host``、``port``、``user``、``password``、``database``はデータベースの接続情報です。
249 | 
250 | ローカルまたは任意のリモートデータベースを使用できます。現在、MySQLとPostgreSQLをサポートしています（他の方言も近日中にサポート予定）。
251 | 
252 | #### MySQL
253 | 
254 | ```yaml
255 | database:
256 |   host: "localhost"
257 |   port: 3306
258 |   user: "root"
259 |   password: ""
260 |   database: ""
261 | ```
262 | #### PostgreSQL
263 | ステップ1：Pythonパッケージをインストール
264 | ```bash
265 | pip install psycopg2
266 | ```
267 | ステップ2：config.ymlを以下のように準備します：
268 | ```yaml
269 | database:
270 |   dialect: "postgresql"
271 |   host: "localhost"
272 |   port: 5432
273 |   user: ""
274 |   password: ""
275 |   database: ""
276 | ```
277 | 
278 | 注意：PostgreSQLの場合、``dialect``は``postgresql``である必要があります。
279 | 
280 | ## 起動
281 | ### Claude Desktop
282 | Claude Desktopの設定ファイルに以下を追加します。<a href="https://github.com/XGenerationLab/xiyan_mcp_server/blob/main/imgs/claude_desktop.jpg">Claude Desktop設定例</a>を参照
283 | ```json
284 | {
285 |     "mcpServers": {
286 |         "xiyan-mcp-server": {
287 |             "command": "python",
288 |             "args": [
289 |                 "-m",
290 |                 "xiyan_mcp_server"
291 |             ],
292 |             "env": {
293 |                 "YML": "PATH/TO/YML"
294 |             }
295 |         }
296 |     }
297 | }
298 | ```
299 | ### Cline
300 | [Claude Desktop](#claude-desktop)の設定を準備
301 | 
302 | ### Goose
303 | 設定に以下のコマンドを追加します。<a href="https://github.com/XGenerationLab/xiyan_mcp_server/blob/main/imgs/goose.jpg">Goose設定例</a>を参照
304 | 
305 | ```yaml
306 | env YML=path/to/yml python -m xiyan_mcp_server
307 | ```
308 | ### Cursor
309 | [Goose](#goose)と同じコマンドを使用
310 | 
311 | ### Witsy
312 | コマンドに以下を追加：
313 | ```yaml
314 | python -m xiyan_mcp_server
315 | ```
316 | 環境変数を追加：キーはYML、値はymlファイルのパス。
317 | <a href="https://github.com/XGenerationLab/xiyan_mcp_server/blob/main/imgs/witsy.jpg">Witsy設定例</a>を参照
318 | 
319 | ## 動作しない場合
320 | お問い合わせ：
321 | <a href="https://github.com/XGenerationLab/xiyan_mcp_server/blob/main/imgs/dinggroup_out.png">Ding Group</a> | 
322 | <a href="https://weibo.com/u/2540915670" target="_blank">Weiboでフォロー</a>
323 | 
324 | ## 引用
325 | 私たちの仕事が役立つと思われる場合は、自由に引用してください。
326 | ```bib
327 | @article{xiyansql,
328 |       title={A Preview of XiYan-SQL: A Multi-Generator Ensemble Framework for Text-to-SQL}, 
329 |       author={Yingqi Gao and Yifu Liu and Xiaoxia Li and Xiaorong Shi and Yin Zhu and Yiming Wang and Shiqi Li and Wei Li and Yuntao Hong and Zhiling Luo and Jinyang Gao and Liyu Mou and Yu Li},
330 |       year={2024},
331 |       journal={arXiv preprint arXiv:2411.08599},
332 |       url={https://arxiv.org/abs/2411.08599},
333 |       primaryClass={cs.AI}
334 | }
335 | ```
336 | 


--------------------------------------------------------------------------------
/README_zh.md:
--------------------------------------------------------------------------------
  1 | # XiYan MCP 服务器
  2 | 
  3 | <p align="center">
  4 |   <a href="https://github.com/XGenerationLab/XiYan-SQL"><img alt="MCP Playwright" src="https://raw.githubusercontent.com/XGenerationLab/XiYan-SQL/main/xiyanGBI.png" height="60"/></a>
  5 | </p>
  6 | <p align="center">
  7 |   <b>一种模型上下文协议（MCP）服务器，支持通过自然语言查询数据库</b><br/>
  8 |   <sub>由<a href="https://github.com/XGenerationLab/XiYan-SQL">XiYan-SQL</a>提供技术支持，该项目在开放基准上实现了文本到SQL的最好性能</sub>
  9 | </p>
 10 | 
 11 | <p align="center">
 12 | 💻 <a href="https://github.com/XGenerationLab/xiyan_mcp_server" >XiYan-mcp-server</a> | 
 13 | 🌐 <a href="https://github.com/XGenerationLab/XiYan-SQL" >XiYan-SQL</a> |
 14 | 📖 <a href="https://arxiv.org/abs/2411.08599"> Arxiv</a> | 
 15 | 📄 <a href="https://paperswithcode.com/paper/xiyan-sql-a-multi-generator-ensemble" >PapersWithCode</a>
 16 | 💻 <a href="https://huggingface.co/collections/XGenerationLab/xiyansql-models-67c9844307b49f87436808fc">HuggingFace</a> |
 17 | 🤖 <a href="https://modelscope.cn/collections/XiYanSQL-Models-4483337b614241" >ModelScope</a> |
 18 | 🌕 <a href="https://bailian.console.aliyun.com/xiyan">析言GBI</a> 
 19 | <br />
 20 | <img src="https://badge.mcpx.dev/?type=server%20%27MCP%20Server%27" alt="MCP Server" />
 21 | <a href="https://arxiv.org/abs/2411.08599"><img src="imgs/Paper-Arxiv-orange.svg" ></a>
 22 | <a href="https://opensource.org/licenses/Apache-2.0">
 23 |   <img src="https://img.shields.io/badge/License-Apache%202.0-blue.svg" alt="License: Apache 2.0" />
 24 | </a>
 25 | <a href="https://pepy.tech/projects/xiyan-mcp-server"><img src="https://static.pepy.tech/badge/xiyan-mcp-server" alt="PyPI Downloads"></a>
 26 |   <a href="https://smithery.ai/server/@XGenerationLab/xiyan_mcp_server"><img alt="Smithery Installs" src="https://smithery.ai/badge/@XGenerationLab/xiyan_mcp_server" height="20"/></a>
 27 | <a href="https://github.com/XGenerationLab/xiyan_mcp_server" target="_blank">
 28 |     <img src="https://img.shields.io/github/stars/XGenerationLab/xiyan_mcp_server?style=social" alt="GitHub stars" />
 29 | </a>
 30 | <br />
 31 | <a href="https://github.com/XGenerationLab/xiyan_mcp_server">英文</a> | <a href="https://github.com/XGenerationLab/xiyan_mcp_server/blob/main/README_zh.md">中文</a> | <a href="https://github.com/XGenerationLab/xiyan_mcp_server/blob/main/README_ja.md">日本語</a><br />
 32 | <a href="https://github.com/XGenerationLab/xiyan_mcp_server/blob/main/imgs/dinggroup_out.png">钉钉群</a> | 
 33 | <a href="https://weibo.com/u/2540915670" target="_blank">关注我</a>
 34 | </p>
 35 | 
 36 | ## 目录
 37 | 
 38 | - [特性](#特性)
 39 | - [预览](#预览)
 40 |   - [架构](#架构)
 41 |   - [最佳实践](#最佳实践)
 42 |   - [工具预览](#工具预览)
 43 | - [安装](#安装)
 44 |   - [从 pip 安装](#从-pip-安装)
 45 |   - [从 Smithery.ai 安装](#从-smitheryai-安装)
 46 | - [配置](#配置)
 47 |   - [LLM 配置](#llm-配置)
 48 |     - [通用 LLMs](#通用-llms)
 49 |     - [Text-to-SQL 最新模型](#text-to-sql-最新模型)
 50 |     - [本地模型](#本地模型)
 51 |   - [数据库配置](#数据库配置)
 52 |     - [MySQL](#mysql)
 53 |     - [PostgreSQL](#postgresql)
 54 | - [启动](#启动)
 55 |   - [Claude Desktop](#claude-desktop)
 56 |   - [Cline](#cline)
 57 |   - [Goose](#goose)
 58 |   - [Cursor](#cursor)
 59 | - [它不起作用](#它不起作用)
 60 | - [引用](#引用)
 61 | 
 62 | ## 特性
 63 | - 🌐 通过 [XiYanSQL](https://github.com/XGenerationLab/XiYan-SQL) 使用自然语言获取数据
 64 | - 🤖 支持通用 LLMs（如 GPT, qwenmax），文本到 SQL 最新模型
 65 | - 💻 支持纯本地模式（高安全性！）
 66 | - 📝 支持 MySQL 和 PostgreSQL。
 67 | - 🖱️ 列出可用表作为资源
 68 | - 🔧 读取表内容
 69 | 
 70 | ## 预览
 71 | ### 架构
 72 | 有两种方式可以将该服务器集成到您的项目中，如下图所示：
 73 | 左侧是远程模式，这是默认模式。它需要 API 密钥来访问服务提供商的 xiyanSQL-qwencoder-32B 模型（请参阅[配置](#配置)）。
 74 | 另一种模式是本地模式，更加安全，不需要 API 密钥。
 75 | 
 76 | ![architecture.png](imgs/architecture.png)
 77 | 
 78 | ### 最佳实践和报告
 79 | 
 80 | ["使用 MCP + Modelscope API 推理构建本地数据助手，无需编写一行代码"](https://mp.weixin.qq.com/s/tzDelu0W4w6t9C0_yYRbHA)
 81 | 
 82 | ["Modelscope 上的 Xiyan MCP"](https://modelscope.cn/headlines/article/1142)
 83 | 
 84 | ### 在 MCPBench 上的评估
 85 | 下图展示了 XiYan MCP 服务在 MCPBench 基准测试中的表现。XiYan MCP 服务器的性能优于 MySQL MCP 服务和 PostgreSQL MCP 服务，领先 2-22 个百分点。详细的实验结果可以在 [MCPBench](https://github.com/modelscope/MCPBench) 和报告 ["MCP 服务器评估报告"](https://arxiv.org/abs/2504.11094) 中找到。
 86 | 
 87 | ![exp_mcpbench.png](imgs/exp_mcpbench.png)
 88 | 
 89 | ### 工具预览
 90 |  - 工具 ``get_data`` 提供了一个自然语言接口，用于从数据库中检索数据。该服务器将输入的自然语言转换为 SQL，并调用数据库返回查询结果。
 91 | 
 92 |  - ``{dialect}://{table_name}`` 资源允许在指定特定的 table_name 时从数据库中获取部分样本数据以供模型参考。
 93 | - ``{dialect}://`` 资源将列出当前数据库的名称。
 94 | 
 95 | ## 安装
 96 | ### 从 pip 安装
 97 | 
 98 | 要求 Python 3.11 或更高版本。
 99 | 您可以通过 pip 安装服务器，它将安装最新版本：
100 | 
101 | ```bash
102 | pip install xiyan-mcp-server
103 | ```
104 | 
105 | 安装后，您可以直接通过以下命令运行服务器：
106 | ```bash
107 | python -m xiyan_mcp_server
108 | ```
109 | 但在您完成以下配置之前，它不会提供任何功能。
110 | 您将获得一个 yml 文件。然后您可以通过以下方式运行服务器：
111 | ```yaml
112 | env YML=path/to/yml python -m xiyan_mcp_server
113 | ```
114 | 
115 | ### 从 Smithery.ai 安装
116 | 请参见 [@XGenerationLab/xiyan_mcp_server](https://smithery.ai/server/@XGenerationLab/xiyan_mcp_server)
117 | 
118 | 未进行全面测试。
119 | 
120 | ## 配置
121 | 
122 | 您需要一个 YAML 配置文件来配置服务器。
123 | 提供了一个默认配置文件 config_demo.yml，内容如下：
124 | 
125 | ```yaml
126 | model:
127 |   name: "XGenerationLab/XiYanSQL-QwenCoder-32B-2412"
128 |   key: ""
129 |   url: "https://api-inference.modelscope.cn/v1/"
130 | 
131 | database:
132 |   host: "localhost"
133 |   port: 3306
134 |   user: "root"
135 |   password: ""
136 |   database: ""
137 | ```
138 | 
139 | ### LLM 配置
140 | ``Name`` 是要使用的模型名称，``key`` 是模型的 API 密钥，``url`` 是模型的 API 地址。我们支持以下模型。
141 | 
142 | | 版本 | 通用 LLMs (GPT, qwenmax) | Modelscope 最新模型 | Dashscope 最新模型 | 本地 LLMs |
143 | |----------|------------------------------------|-----------------------------|----------------------------------|----------------|
144 | | 描述     | 基础，易于使用                     | 性能最好，稳定，推荐       | 性能最好，供试用                | 速度慢，高安全性 |
145 | | 名称     | 官方模型名称（例如 gpt-3.5-turbo, qwen-max） | XGenerationLab/XiYanSQL-QwenCoder-32B-2412 | xiyansql-qwencoder-32b          | xiyansql-qwencoder-3b |
146 | | 密钥     | 服务提供商的 API 密钥（例如 OpenAI, 阿里云） | modelscope 的 API 密钥 | 通过电子邮件获取的 API 密钥 | ""               |
147 | | URL      | 服务提供商的端点（例如 "https://api.openai.com/v1"） | https://api-inference.modelscope.cn/v1/ | https://xiyan-stream.biz.aliyun.com/service/api/xiyan-sql | http://localhost:5090 |
148 | 
149 | #### 通用 LLMs
150 | 如果您想使用通用 LLMs，如 gpt3.5，您可以直接像这样配置：
151 | ```yaml
152 | model:
153 |   name: "gpt-3.5-turbo"
154 |   key: "YOUR KEY "
155 |   url: "https://api.openai.com/v1"
156 | database:
157 | ```
158 | 
159 | 如果您想使用来自阿里巴巴的 Qwen，比如 Qwen-max，您可以使用以下配置：
160 | ```yaml
161 | model:
162 |   name: "qwen-max"
163 |   key: "YOUR KEY "
164 |   url: "https://dashscope.aliyuncs.com/compatible-mode/v1"
165 | database:
166 | ```
167 | #### Text-to-SQL 最新模型
168 | 我们推荐 XiYanSQL-qwencoder-32B（https://github.com/XGenerationLab/XiYanSQL-QwenCoder），这是文本到 SQL 的最新模型，参见 [Bird benchmark](https://bird-bench.github.io/)。
169 | 您可以有两种方式使用该模型：
170 | (1) [Modelscope](https://www.modelscope.cn/models/XGenerationLab/XiYanSQL-QwenCoder-32B-2412)， (2) 阿里云 DashScope。
171 | 
172 | ##### (1) Modelscope 版本
173 | 您需要从 Modelscope 申请一个 API 推理的 ``key``，网址: https://www.modelscope.cn/docs/model-service/API-Inference/intro
174 | 然后您可以使用以下配置：
175 | ```yaml
176 | model:
177 |   name: "XGenerationLab/XiYanSQL-QwenCoder-32B-2412"
178 |   key: ""
179 |   url: "https://api-inference.modelscope.cn/v1/"
180 | ```
181 | 
182 | 请阅读我们的 [模型描述](https://www.modelscope.cn/models/XGenerationLab/XiYanSQL-QwenCoder-32B-2412) 获取更多详细信息。
183 | 
184 | ##### (2) Dashscope 版本
185 | 
186 | 我们在阿里云 DashScope 上部署了模型，因此您需要设置以下环境变量：
187 | 请将您的电子邮件发送给我以获取 ``key``。 (godot.lzl@alibaba-inc.com)
188 | 在电子邮件中，请附上以下信息：
189 | ```yaml
190 | name: "YOUR NAME",
191 | email: "YOUR EMAIL",
192 | organization: "your college or Company or Organization"
193 | ```
194 | 我们将根据您的电子邮件发送 ``key`` 给您。您可以在 yml 文件中填写该 ``key``。
195 | 该 ``key``将在 1 个月、200 次查询或其他法律限制后过期。
196 | 
197 | ```yaml
198 | model:
199 |   name: "xiyansql-qwencoder-32b"
200 |   key: "KEY"
201 |   url: "https://xiyan-stream.biz.aliyun.com/service/api/xiyan-sql"
202 | database:
203 | ```
204 | 
205 | 注意：该模型服务仅供试用，如果您需要在生产中使用，请与我们联系。
206 | 
207 | 或者，您也可以在自己的服务器上自行部署模型 [XiYanSQL-qwencoder-32B](https://github.com/XGenerationLab/XiYanSQL-QwenCoder)。
208 | 
209 | #### 本地模型
210 | 注意：本地模型速度较慢（在我的 MacBook 上每个查询约 12 秒）。
211 | 如果您需要稳定快速的服务，我们仍然推荐使用 Modelscope 版本。
212 | 
213 | 要在本地模式下运行 xiyan_mcp_server，您需要： 
214 | 1）一台至少具有 16GB 内存的 PC/Mac
215 | 2）6GB 硬盘空间
216 | 
217 | 步骤 1：安装额外的 Python 包
218 | ```bash
219 | pip install flask modelscope torch==2.2.2 accelerate>=0.26.0 numpy=2.2.3
220 | ```
221 | 
222 | 步骤 2：（可选）手动下载模型
223 | 我们推荐 [xiyansql-qwencoder-3b](https://www.modelscope.cn/models/XGenerationLab/XiYanSQL-QwenCoder-3B-2502/)。
224 | 您可以手动下载模型：
225 | ```bash
226 | modelscope download --model XGenerationLab/XiYanSQL-QwenCoder-3B-2502
227 | ```
228 | 这将占用您 6GB 的磁盘空间。
229 | 
230 | 步骤 3：下载脚本并运行服务器。文件 src/xiyan_mcp_server/local_xiyan_server.py
231 | 
232 | ```bash
233 | python local_xiyan_server.py
234 | ```
235 | 服务器将在 http://localhost:5090/ 上运行。
236 | 
237 | 步骤 4：准备配置并运行 xiyan_mcp_server
238 | config.yml 应如下所示：
239 | ```yml
240 | model:
241 |   name: "xiyansql-qwencoder-3b"
242 |   key: "KEY"
243 |   url: "http://127.0.0.1:5090"
244 | ```
245 | 
246 | 到目前为止，本地模式准备就绪。
247 | 
248 | ### 数据库配置
249 | ``host``、``port``、``user``、``password``、``database`` 是数据库的连接信息。
250 | 
251 | 您可以使用本地或任何远程数据库。现在我们支持 MySQL 和 PostgreSQL（很快支持更多方言）。
252 | 
253 | #### MySQL
254 | 
255 | ```yaml
256 | database:
257 |   host: "localhost"
258 |   port: 3306
259 |   user: "root"
260 |   password: ""
261 |   database: ""
262 | ```
263 | #### PostgreSQL
264 | 步骤 1：安装 Python 包
265 | ```bash
266 | pip install psycopg2
267 | ```
268 | 步骤 2：准备 config.yml 如下：
269 | ```yaml
270 | database:
271 |   dialect: "postgresql"
272 |   host: "localhost"
273 |   port: 5432
274 |   user: ""
275 |   password: ""
276 |   database: ""
277 | ```
278 | 
279 | 注意 ``dialect`` 应为 ``postgresql`` 以适用于 PostgreSQL。
280 | 
281 | ## 启动
282 | ### Claude Desktop
283 | 在您的 Claude Desktop 配置文件中添加以下内容，参考 <a href="https://github.com/XGenerationLab/xiyan_mcp_server/blob/main/imgs/claude_desktop.jpg">Claude Desktop 配置示例</a>
284 | ```json
285 | {
286 |     "mcpServers": {
287 |         "xiyan-mcp-server": {
288 |             "command": "/xxx/python",
289 |             "args": [
290 |                 "-m",
291 |                 "xiyan_mcp_server"
292 |             ],
293 |             "env": {
294 |                 "YML": "PATH/TO/YML"
295 |             }
296 |         }
297 |     }
298 | }
299 | ```
300 | **注意此处的python命令需要完整的python可执行文件路径（`/xxx/python`），否则会找不到python解释器，可以通过`which python`来确定此路径。使用其他非claude应用也是如此。**
301 | ### Cline
302 | 准备配置，参考 [Claude Desktop](#claude-desktop)
303 | 
304 | ### Goose
305 | 在配置中添加以下命令，参考 <a href="https://github.com/XGenerationLab/xiyan_mcp_server/blob/main/imgs/goose.jpg">Goose 配置示例</a>
306 | 
307 | ```yaml
308 | env YML=path/to/yml /xxx/python -m xiyan_mcp_server
309 | ```
310 | ### Cursor
311 | 使用与 [Goose](#goose) 相同的命令。
312 | 
313 | ### Witsy
314 | 在命令中添加以下内容：
315 | ```yaml
316 | /xxx/python -m xiyan_mcp_server
317 | ```
318 | 添加一个环境变量：键为 YML，值为您 yml 文件的路径。
319 | 参考 <a href="https://github.com/XGenerationLab/xiyan_mcp_server/blob/main/imgs/witsy.jpg">Witsy 配置示例</a>
320 | 
321 | ## 它不起作用！
322 | 请联系我们：
323 | <a href="https://github.com/XGenerationLab/xiyan_mcp_server/blob/main/imgs/dinggroup_out.png">钉钉群</a>｜ 
324 | <a href="https://weibo.com/u/2540915670" target="_blank">关注我</a>
325 | 
326 | ## 引用
327 | 如果您觉得我们的工作有帮助，可以随意引用。
328 | ```bib
329 | @article{xiyansql,
330 |       title={A Preview of XiYan-SQL: A Multi-Generator Ensemble Framework for Text-to-SQL}, 
331 |       author={Yingqi Gao and Yifu Liu and Xiaoxia Li and Xiaorong Shi and Yin Zhu and Yiming Wang and Shiqi Li and Wei Li and Yuntao Hong and Zhiling Luo and Jinyang Gao and Liyu Mou and Yu Li},
332 |       year={2024},
333 |       journal={arXiv preprint arXiv:2411.08599},
334 |       url={https://arxiv.org/abs/2411.08599},
335 |       primaryClass={cs.AI}
336 | }
337 | ```
338 | 


--------------------------------------------------------------------------------
/imgs/Paper-Arxiv-orange.svg:
--------------------------------------------------------------------------------
1 | <svg xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" width="80" height="20" role="img" aria-label="Paper: Arxiv"><title>Paper: Arxiv</title><linearGradient id="s" x2="0" y2="100%"><stop offset="0" stop-color="#bbb" stop-opacity=".1"/><stop offset="1" stop-opacity=".1"/></linearGradient><clipPath id="r"><rect width="80" height="20" rx="3" fill="#fff"/></clipPath><g clip-path="url(#r)"><rect width="41" height="20" fill="#555"/><rect x="41" width="39" height="20" fill="#fe7d37"/><rect width="80" height="20" fill="url(#s)"/></g><g fill="#fff" text-anchor="middle" font-family="Verdana,Geneva,DejaVu Sans,sans-serif" text-rendering="geometricPrecision" font-size="110"><text aria-hidden="true" x="215" y="150" fill="#010101" fill-opacity=".3" transform="scale(.1)" textLength="310">Paper</text><text x="215" y="140" transform="scale(.1)" fill="#fff" textLength="310">Paper</text><text aria-hidden="true" x="595" y="150" fill="#010101" fill-opacity=".3" transform="scale(.1)" textLength="290">Arxiv</text><text x="595" y="140" transform="scale(.1)" fill="#fff" textLength="290">Arxiv</text></g></svg>


--------------------------------------------------------------------------------
/imgs/architecture.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XGenerationLab/xiyan_mcp_server/d153a94e7fcc508202427f733d5c8589fa80072a/imgs/architecture.png


--------------------------------------------------------------------------------
/imgs/claude_desktop.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XGenerationLab/xiyan_mcp_server/d153a94e7fcc508202427f733d5c8589fa80072a/imgs/claude_desktop.jpg


--------------------------------------------------------------------------------
/imgs/dinggroup_out.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XGenerationLab/xiyan_mcp_server/d153a94e7fcc508202427f733d5c8589fa80072a/imgs/dinggroup_out.png


--------------------------------------------------------------------------------
/imgs/exp_mcpbench.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XGenerationLab/xiyan_mcp_server/d153a94e7fcc508202427f733d5c8589fa80072a/imgs/exp_mcpbench.png


--------------------------------------------------------------------------------
/imgs/goose.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XGenerationLab/xiyan_mcp_server/d153a94e7fcc508202427f733d5c8589fa80072a/imgs/goose.jpg


--------------------------------------------------------------------------------
/imgs/logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XGenerationLab/xiyan_mcp_server/d153a94e7fcc508202427f733d5c8589fa80072a/imgs/logo.png


--------------------------------------------------------------------------------
/imgs/witsy.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XGenerationLab/xiyan_mcp_server/d153a94e7fcc508202427f733d5c8589fa80072a/imgs/witsy.jpg


--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
 1 | [project]
 2 | name = "xiyan_mcp_server"
 3 | version = "0.1.5.dev0"
 4 | description = "A Model Context Protocol (MCP) server that utilizes XiyanSQL with databases. This server enables AI assistants to list tables, read data, and execute natural language queries"
 5 | readme = "README.md"
 6 | requires-python = ">=3.11"
 7 | dependencies = [
 8 |     "mcp>=1.0.0",
 9 |     "mysql-connector-python>=9.1.0",
10 |     "llama_index",
11 |     "sqlalchemy",
12 |     "pymysql"
13 | ]
14 | [[project.authors]]
15 | name = "Zhiling Luo"
16 | email = "godot.lzl@alibaba-inc.com"
17 | 
18 | [build-system]
19 | requires = ["hatchling"]
20 | build-backend = "hatchling.build"
21 | 
22 | [project.scripts]
23 | mysql_mcp_server = "xiyan_mcp_server:main"


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | mcp
2 | mysql-connector-python>=9.1.0
3 | sqlalchemy
4 | llama_index
5 | yaml
6 | pandas
7 | pymysql


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | from setuptools import setup, find_packages
 2 | 
 3 | setup(
 4 |     name='xiyan_mcp_server',  # 包的名字
 5 |     version='0.1.5.dev0',  # 版本号
 6 |     packages=find_packages(),  # 自动找到项目中的包
 7 |     install_requires=[  # 必要的包依赖
 8 |         # 'numpy',  # 示例：若有依赖包，将其列在此
 9 | 'mcp',
10 | 'mysql-connector-python>=9.1.0',
11 | 'sqlalchemy',
12 | 'llama_index',
13 | 'yaml',
14 | 'pandas',
15 | 'pymysql'
16 |     ],
17 |     author='Bruce Luo',  # 作者
18 |     author_email='godot.lzl@alibaba-inc.com',  # 作者邮箱
19 |     description='A MCP server of natural language interface to Database',  # 简短描述
20 |     long_description=open('README.md').read(),  # 从 README 文件读取详细描述
21 |     long_description_content_type='text/markdown',  # 描述内容类型
22 |     url='https://github.com/XGenerationLab/xiyan_mcp_server',  # 项目主页
23 |     classifiers=[
24 |         'Programming Language :: Python :: 3',
25 |         'License :: OSI Approved :: MIT License',
26 |         'Operating System :: OS Independent',
27 |     ],
28 |     python_requires='>=3.11',  # 支持的 Python 版本
29 | )
30 | 


--------------------------------------------------------------------------------
/smithery.yaml:
--------------------------------------------------------------------------------
 1 | # Smithery.ai configuration
 2 | startCommand:
 3 |   type: stdio
 4 |   configSchema:
 5 |     # JSON Schema defining the configuration options for the MCP.
 6 |     {
 7 |       "YML":"src/xiyan_mcp_server/config_demo.yml"
 8 |     }
 9 |   commandFunction:
10 |     # A function that produces the CLI command to start the MCP on stdio.
11 |     |-
12 |     (config) => ({
13 |       "command": "python",
14 |       "args": [
15 |         "-m",
16 |         "xiyan_mcp_server"
17 |       ]
18 |     })
19 | 


--------------------------------------------------------------------------------
/src/xiyan_mcp_server/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/XGenerationLab/xiyan_mcp_server/d153a94e7fcc508202427f733d5c8589fa80072a/src/xiyan_mcp_server/__init__.py


--------------------------------------------------------------------------------
/src/xiyan_mcp_server/__main__.py:
--------------------------------------------------------------------------------
 1 | 
 2 | from .server import mcp, global_config, mcp_config
 3 | 
 4 | def main():
 5 |     mcp.run(transport=mcp_config.get('transport', 'stdio'))
 6 | 
 7 | 
 8 | 
 9 | if __name__ == "__main__":
10 |     main()


--------------------------------------------------------------------------------
/src/xiyan_mcp_server/config_demo.yml:
--------------------------------------------------------------------------------
 1 | model:
 2 |   name: "pre-xiyan_multi_dialect_v3"
 3 |   key: ""
 4 |   url: "https://poc-dashscope.aliyuncs.com/compatible-mode/v1"
 5 | 
 6 | database:
 7 |   host: "localhost"
 8 |   port: 3306
 9 |   user: "root"
10 |   password: ""
11 |   database: ""


--------------------------------------------------------------------------------
/src/xiyan_mcp_server/database_env.py:
--------------------------------------------------------------------------------
1 | from .utils.db_source import HITLSQLDatabase
2 | 
3 | class DataBaseEnv:
4 |     def __init__(self, database: HITLSQLDatabase):
5 |         self.database = database
6 |         self.dialect = database.dialect
7 |         self.mschema = database.mschema
8 |         self.db_name = database.db_name
9 |         self.mschema_str = self.mschema.to_mschema()


--------------------------------------------------------------------------------
/src/xiyan_mcp_server/local_model/README.md:
--------------------------------------------------------------------------------
 1 | ## Local Model Configuration
 2 | Note: the local model is slow (about 12 seconds per query on my macbook).
 3 | If you need a stable and fast service, we still recommend to use the modelscope version.
 4 | 
 5 | To run xiyan_mcp_server in local mode, you need 
 6 | 1) a PC/Mac/Machine with at least 16GB RAM
 7 | 2) 6GB disk space
 8 | 
 9 | The above setting is for model of size 3B. You can adjust the settings to run a 32B model on a server.
10 | 
11 | ### Step 1: Install additional Python packages
12 | ```bash
13 | pip install flask modelscope torch==2.2.2 accelerate>=0.26.0 numpy=2.2.3
14 | ```
15 | 
16 | ### Step 2: (optional) manually download the model
17 | We recommend [xiyansql-qwencoder-3b](https://www.modelscope.cn/models/XGenerationLab/XiYanSQL-QwenCoder-3B-2502/).
18 | You can manually download the model by
19 | ```bash
20 | modelscope download --model XGenerationLab/XiYanSQL-QwenCoder-3B-2502
21 | ```
22 | It will take you 6GB disk space.
23 | 
24 | ### Step 3: download the script and run server. 
25 | 
26 | Script is located at `src/xiyan_mcp_server/local_model/local_xiyan_server.py`
27 | 
28 | ```bash
29 | python local_xiyan_server.py
30 | ```
31 | The server will be running on http://localhost:5090/
32 | 
33 | ### Step 4: prepare config and run xiyan_mcp_server
34 | the config.yml should be like:
35 | ```yml
36 | model:
37 |   name: "xiyansql-qwencoder-3b"
38 |   key: "KEY"
39 |   url: "http://127.0.0.1:5090"
40 | ```
41 | 
42 | Till now the local model is ready.


--------------------------------------------------------------------------------
/src/xiyan_mcp_server/local_model/local_xiyan_server.py:
--------------------------------------------------------------------------------
 1 | from flask import Flask, request, jsonify
 2 | from modelscope import AutoModelForCausalLM, AutoTokenizer
 3 | import torch # require torch==2.2.2,accelerate>=0.26.0,numpy=2.2.3,modelscope
 4 | 
 5 | 
 6 | model_name = 'XGenerationLab/XiYanSQL-QwenCoder-3B-2502'
 7 | local_model = AutoModelForCausalLM.from_pretrained(model_name, device_map='cpu',
 8 |                                                    torch_dtype=torch.float32)
 9 | local_tokenizer = AutoTokenizer.from_pretrained(model_name)
10 | app = Flask(__name__)
11 | 
12 | @app.route('/chat/completions', methods=['POST'])
13 | def chat_completions():
14 |     # 获取请求中的数据
15 |     input_data = request.json
16 | 
17 |     # 提取提示（prompt）
18 |     messages = input_data.get('messages', [])
19 | 
20 |     if not messages:
21 |         return jsonify({'error': 'No messages provided'})
22 | 
23 |     text = local_tokenizer.apply_chat_template(
24 |         messages,
25 |         tokenize=False,
26 |         add_generation_prompt=True
27 |     )
28 |     inputs = local_tokenizer([text], return_tensors="pt")
29 | 
30 |     # 编码输入并生成响应
31 |     generated_ids = local_model.generate(inputs['input_ids'], max_new_tokens=1024,
32 |     temperature=0.1,
33 |     top_p=0.8,
34 |     do_sample=True)
35 | 
36 |     generated_ids = [
37 |         output_ids[len(input_ids):] for input_ids, output_ids in zip(inputs.input_ids, generated_ids)
38 |     ]
39 |     generated_text = local_tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
40 | 
41 | 
42 |     # 生成响应格式
43 |     response = {
44 |         'id': 'xiyan',
45 |         'object': 'chat.completion',
46 |         'created': 1234567890,
47 |         'model': model_name,
48 |         'choices': [{
49 |             'index': 0,
50 |             'message': {
51 |                 "content":generated_text
52 |             },
53 |             'finish_reason': 'length'
54 |         }]
55 |     }
56 |     print(generated_text)
57 |     return jsonify(response)
58 | 
59 | 
60 | if __name__ == '__main__':
61 |     # this flask server runs on http://localhost:5090
62 |     app.run(host='0.0.0.0', port=5090)
63 | 
64 | 


--------------------------------------------------------------------------------
/src/xiyan_mcp_server/server.py:
--------------------------------------------------------------------------------
  1 | import argparse
  2 | import logging
  3 | import os
  4 | from typing import Literal
  5 | import yaml  # 添加yaml库导入
  6 | 
  7 | from mysql.connector import connect, Error
  8 | from mcp.server import  FastMCP
  9 | from mcp.types import TextContent
 10 | 
 11 | from .utils.db_config import DBConfig
 12 | from .database_env import DataBaseEnv
 13 | from .utils.db_source import HITLSQLDatabase
 14 | from .utils.db_util import init_db_conn
 15 | from .utils.file_util import extract_sql_from_qwen
 16 | from .utils.llm_util import call_openai_sdk
 17 | 
 18 | 
 19 | 
 20 | 
 21 | # Configure logging
 22 | logging.basicConfig(
 23 |     level=logging.INFO,
 24 |     format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
 25 | )
 26 | logger = logging.getLogger("xiyan_mcp_server")
 27 | 
 28 | 
 29 | def get_yml_config():
 30 |     config_path = os.getenv("YML", os.path.join(os.path.dirname(__file__), "config_demo.yml"))
 31 |     logger.info(f"Loading configuration from {config_path}")
 32 |     try:
 33 |         with open(config_path, 'r') as file:
 34 |             config = yaml.safe_load(file)
 35 |         return config
 36 |     except FileNotFoundError:
 37 |         logger.error(f"Configuration file {config_path} not found.")
 38 |         raise
 39 |     except yaml.YAMLError as exc:
 40 |         logger.error(f"Error parsing configuration file {config_path}: {exc}")
 41 |         raise
 42 | 
 43 | 
 44 | 
 45 | def get_xiyan_config(db_config):
 46 |     dialect = db_config.get('dialect','mysql')
 47 |     xiyan_db_config = DBConfig(dialect=dialect,db_name=db_config['database'], user_name=db_config['user'], db_pwd=db_config['password'], db_host=db_config['host'], port=db_config['port'])
 48 |     return xiyan_db_config
 49 | 
 50 | 
 51 | global_config = get_yml_config()
 52 | mcp_config = global_config.get('mcp', {})
 53 | model_config = global_config['model']
 54 | global_db_config = global_config.get('database')
 55 | global_xiyan_db_config = get_xiyan_config(global_db_config)
 56 | dialect = global_db_config.get('dialect','mysql')
 57 | 
 58 | 
 59 | 
 60 | mcp = FastMCP("xiyan", **mcp_config)
 61 | 
 62 | 
 63 | @mcp.resource(dialect+'://'+global_db_config.get('database',''))
 64 | async def read_resource() -> str:
 65 | 
 66 |     db_engine = init_db_conn(global_xiyan_db_config)
 67 |     db_source = HITLSQLDatabase(db_engine)
 68 |     return db_source.mschema.to_mschema()
 69 | 
 70 | @mcp.resource(dialect+"://{table_name}")
 71 | async def read_resource(table_name) -> str:
 72 |     """Read table contents."""
 73 |     config = global_db_config
 74 |     try:
 75 |         with connect(**config) as conn:
 76 |             with conn.cursor() as cursor:
 77 |                 cursor.execute(f"SELECT * FROM {table_name} LIMIT 100")
 78 |                 columns = [desc[0] for desc in cursor.description]
 79 |                 rows = cursor.fetchall()
 80 |                 result = [",".join(map(str, row)) for row in rows]
 81 |                 return "\n".join([",".join(columns)] + result)
 82 |                 
 83 |     except Error as e:
 84 |         raise RuntimeError(f"Database error: {str(e)}")
 85 | 
 86 | 
 87 | def sql_gen_and_execute(db_env: DataBaseEnv, query: str):
 88 |     """
 89 |     Transfers the input natural language question to sql query (known as Text-to-sql) and executes it on the database.
 90 |      Args:
 91 |         query: natural language to query the database. e.g. 查询在2024年每个月，卡宴的各经销商销量分别是多少
 92 |     """
 93 | 
 94 |     #db_env = context_variables.get('db_env', None)
 95 |     prompt = f"""你现在是一名{db_env.dialect}数据分析专家，你的任务是根据参考的数据库schema和用户的问题，编写正确的SQL来回答用户的问题，生成的SQL用``sql 和```包围起来。
 96 | 【数据库schema】
 97 | {db_env.mschema_str}
 98 | 
 99 | 【问题】
100 | {query}
101 | """
102 |     #logger.info(f"SQL generation prompt: {prompt}")
103 | 
104 |     messages = [
105 |         {"role": "system", "content": prompt},
106 |         {"role": "user", "content": f"用户的问题是: {query}"}
107 |     ]
108 |     param = {"model": model_config['name'], "messages": messages,"key":model_config['key'],"url":model_config['url']}
109 | 
110 |     try:
111 |         response = call_openai_sdk(**param)
112 |         content = response.choices[0].message.content
113 |         sql_query = extract_sql_from_qwen(content)
114 |         status, res = db_env.database.fetch(sql_query)
115 |         if not status:
116 |             for idx in range(3):
117 |                 sql_query = sql_fix(db_env.dialect, db_env.mschema_str, query, sql_query, res)
118 |                 status, res = db_env.database.fetch(sql_query)
119 |                 if status:
120 |                     break
121 | 
122 |         sql_res = db_env.database.fetch_truncated(sql_query,max_rows=100)
123 |         markdown_res = db_env.database.trunc_result_to_markdown(sql_res)
124 |         logger.info(f"SQL query: {sql_query}\nSQL result: {sql_res}")
125 |         return markdown_res.strip()
126 | 
127 |     except Exception as e:
128 |         return str(e)
129 | 
130 | 
131 | def sql_fix(dialect: str, mschema: str, query: str, sql_query: str, error_info: str):
132 |     system_prompt = '''现在你是一个{dialect}数据分析专家，需要阅读一个客户的问题，参考的数据库schema，该问题对应的待检查SQL，以及执行该SQL时数据库返回的语法错误，请你仅针对其中的语法错误进行修复，输出修复后的SQL。
133 | 注意：
134 | 1、仅修复语法错误，不允许改变SQL的逻辑。
135 | 2、生成的SQL用```sql 和```包围起来。
136 | 
137 | 【数据库schema】
138 | {schema}
139 | '''.format(dialect=dialect, schema=mschema)
140 |     user_prompt = '''【问题】
141 | {question}
142 | 
143 | 【待检查SQL】
144 | {sql}
145 | 
146 | 【错误信息】
147 | {sql_res}'''.format(question=query, sql=sql_query, sql_res=error_info)
148 | 
149 |     messages = [
150 |         {"role": "system", "content": system_prompt},
151 |         {"role": "user", "content": user_prompt}
152 |     ]
153 |     param = {"model": model_config['name'], "messages": messages,"key":model_config['key'],'url':model_config['url']}
154 | 
155 |     response = call_openai_sdk(**param)
156 |     content = response.choices[0].message.content
157 |     sql_query = extract_sql_from_qwen(content)
158 | 
159 |     return sql_query
160 | 
161 | def call_xiyan(query: str)-> str:
162 |     """Fetch the data from database through a natural language query
163 | 
164 |     Args:
165 |         query: The query in natual language
166 |     """
167 | 
168 |     logger.info(f"Calling tool with arguments: {query}")
169 |     try:
170 |         db_engine = init_db_conn(global_xiyan_db_config)
171 |         db_source = HITLSQLDatabase(db_engine)
172 |     except Exception as  e:
173 | 
174 |         return "数据库连接失败"+str(e)
175 |     logger.info(f"Calling xiyan")
176 |     env = DataBaseEnv(db_source)
177 |     res = sql_gen_and_execute(env,query)
178 | 
179 |     return str(res)
180 | @mcp.tool()
181 | def get_data(query: str)-> list[TextContent]:
182 |     """Fetch the data from database through a natural language query
183 | 
184 |     Args:
185 |         query: The query in natural language
186 |     """
187 | 
188 |     res=call_xiyan(query)
189 |     return [TextContent(type="text", text=res)]
190 | 
191 | 
192 | 
193 | def main():
194 |     parser = argparse.ArgumentParser(description="Run MCP server.")
195 |     parser.add_argument('transport', nargs='?', default='stdio', choices=['stdio', 'sse'],
196 |                         help='Transport type (stdio or sse)')
197 |     args = parser.parse_args()
198 |     mcp.run(transport=args.transport)
199 | 
200 | if __name__ == "__main__":
201 |     main()
202 | 


--------------------------------------------------------------------------------
/src/xiyan_mcp_server/utils/common_util.py:
--------------------------------------------------------------------------------
 1 | from _datetime import datetime
 2 | 
 3 | 
 4 | def get_timestamp() -> str:
 5 |     timestamp = datetime.now().strftime("%Y-%m-%dT%H:%M:%S")
 6 |     return timestamp
 7 | 
 8 | 
 9 | def extract_llm_messages(messages: list) -> list:
10 |     messages = [message for message in messages if message['role'] in ['system', 'assistant', 'user', 'tool']]
11 |     return messages


--------------------------------------------------------------------------------
/src/xiyan_mcp_server/utils/db_config.py:
--------------------------------------------------------------------------------
 1 | from dataclasses import dataclass, field
 2 | from typing import Optional
 3 | 
 4 | from urllib.parse import quote_plus
 5 | @dataclass
 6 | class DBConfig:
 7 |     dialect: str = 'sqlite'
 8 |     db_path: Optional[str] = None  # 仅用于 SQLite
 9 |     db_name: Optional[str] = None  # MySQL/PostgreSQL 通用
10 |     user_name: Optional[str] = None  # MySQL/PostgreSQL 通用
11 |     db_pwd: Optional[str] = None  # MySQL/PostgreSQL 通用
12 |     db_host: Optional[str] = None  # MySQL/PostgreSQL 通用
13 |     port: Optional[int] = None  # MySQL/PostgreSQL 通用
14 | 
15 |     def __post_init__(self):
16 |         if self.dialect == 'sqlite':
17 |             self.db_path = self.db_path or 'book_1.sqlite'
18 |         elif self.dialect in ['mysql', 'postgresql']:
19 |             self.db_name = self.db_name or 'default_db'
20 |             self.user_name = quote_plus(self.user_name) or 'default_user'
21 |             self.db_pwd = quote_plus(self.db_pwd) or 'default_password'
22 |             self.db_host = self.db_host or 'localhost'
23 |             self.port = self.port or (3306 if self.dialect == 'mysql' else 5432)
24 |         else:
25 |             raise ValueError(f"Unsupported database dialect: {self.dialect}")


--------------------------------------------------------------------------------
/src/xiyan_mcp_server/utils/db_mschema.py:
--------------------------------------------------------------------------------
  1 | import random
  2 | from .file_util import read_json_file, write_json_to_file, save_raw_text
  3 | from .db_util import examples_to_str
  4 | from typing import Any, Dict, Iterable, List, Optional, Tuple, Union
  5 | 
  6 | 
  7 | class MSchema:
  8 |     def __init__(self, db_id: str = 'Anonymous', schema: Optional[str] = None):
  9 |         self.db_id = db_id
 10 |         self.schema = schema
 11 |         self.tables = {}
 12 |         self.foreign_keys = []
 13 | 
 14 |     def add_table(self, name, fields={}, comment=None):
 15 |         self.tables[name] = {"fields": fields.copy(), 'examples': [], 'comment': comment}
 16 | 
 17 |     def add_field(self, table_name: str, field_name: str, field_type: str = "",
 18 |             primary_key: bool = False, nullable: bool = True, default: Any = None,
 19 |             autoincrement: bool = False, comment: str = "", examples: list = [], **kwargs):
 20 |         self.tables[table_name]["fields"][field_name] = {
 21 |             "type": field_type,
 22 |             "primary_key": primary_key,
 23 |             "nullable": nullable,
 24 |             "default": default if default is None else f'{default}',
 25 |             "autoincrement": autoincrement,
 26 |             "comment": comment,
 27 |             "examples": examples.copy(),
 28 |             **kwargs}
 29 | 
 30 |     def add_foreign_key(self, table_name, field_name, ref_schema, ref_table_name, ref_field_name):
 31 |         self.foreign_keys.append([table_name, field_name, ref_schema, ref_table_name, ref_field_name])
 32 | 
 33 |     def get_field_type(self, field_type, simple_mode=True)->str:
 34 |         if not simple_mode:
 35 |             return field_type
 36 |         else:
 37 |             return field_type.split("(")[0]
 38 | 
 39 |     def has_table(self, table_name: str) -> bool:
 40 |         if table_name in self.tables.keys():
 41 |             return True
 42 |         else:
 43 |             return False
 44 | 
 45 |     def has_column(self, table_name: str, field_name: str) -> bool:
 46 |         if self.has_table(table_name):
 47 |             if field_name in self.tables[table_name]["fields"].keys():
 48 |                 return True
 49 |             else:
 50 |                 return False
 51 |         else:
 52 |             return False
 53 | 
 54 |     def get_field_info(self, table_name: str, field_name: str) -> Dict:
 55 |         try:
 56 |             return self.tables[table_name]['fields'][field_name]
 57 |         except:
 58 |             return {}
 59 | 
 60 |     def single_table_mschema(self, table_name: str, selected_columns: List = None,
 61 |                              example_num=3, show_type_detail=False, shuffle=True) -> str:
 62 |         table_info = self.tables.get(table_name, {})
 63 |         output = []
 64 |         table_comment = table_info.get('comment', '')
 65 |         if table_comment is not None and table_comment != 'None' and len(table_comment) > 0:
 66 |             if self.schema is not None and len(self.schema) > 0:
 67 |                 output.append(f"# Table: {self.schema}.{table_name}, {table_comment}")
 68 |             else:
 69 |                 output.append(f"# Table: {table_name}, {table_comment}")
 70 |         else:
 71 |             if self.schema is not None and len(self.schema) > 0:
 72 |                 output.append(f"# Table: {self.schema}.{table_name}")
 73 |             else:
 74 |                 output.append(f"# Table: {table_name}")
 75 | 
 76 |         field_lines = []
 77 |         # 处理表中的每一个字段
 78 |         for field_name, field_info in table_info['fields'].items():
 79 |             if selected_columns is not None and field_name.lower() not in selected_columns:
 80 |                 continue
 81 | 
 82 |             raw_type = self.get_field_type(field_info['type'], not show_type_detail)
 83 |             field_line = f"({field_name}:{raw_type.upper()}"
 84 |             if field_info['comment'] != '':
 85 |                 field_line += f", {field_info['comment'].strip()}"
 86 |             else:
 87 |                 pass
 88 | 
 89 |             ## 打上主键标识
 90 |             is_primary_key = field_info.get('primary_key', False)
 91 |             if is_primary_key:
 92 |                 field_line += f", Primary Key"
 93 | 
 94 |             # 如果有示例，添加上
 95 |             if len(field_info.get('examples', [])) > 0 and example_num > 0:
 96 |                 examples = field_info['examples']
 97 |                 examples = [s for s in examples if s is not None]
 98 |                 examples = examples_to_str(examples)
 99 |                 if len(examples) > example_num:
100 |                     examples = examples[:example_num]
101 | 
102 |                 if raw_type in ['DATE', 'TIME', 'DATETIME', 'TIMESTAMP']:
103 |                     examples = [examples[0]]
104 |                 elif len(examples) > 0 and max([len(s) for s in examples]) > 20:
105 |                     if max([len(s) for s in examples]) > 50:
106 |                         examples = []
107 |                     else:
108 |                         examples = [examples[0]]
109 |                 else:
110 |                     pass
111 |                 if len(examples) > 0:
112 |                     example_str = ', '.join([str(example) for example in examples])
113 |                     field_line += f", Examples: [{example_str}]"
114 |                 else:
115 |                     pass
116 |             else:
117 |                 field_line += ""
118 |             field_line += ")"
119 | 
120 |             field_lines.append(field_line)
121 | 
122 |         if shuffle:
123 |             random.shuffle(field_lines)
124 | 
125 |         output.append('[')
126 |         output.append(',\n'.join(field_lines))
127 |         output.append(']')
128 | 
129 |         return '\n'.join(output)
130 | 
131 |     def to_mschema(self, selected_tables: List = None, selected_columns: List = None,
132 |                    example_num=3, show_type_detail=False, shuffle=True) -> str:
133 |         """
134 |         convert to a MSchema string.
135 |         selected_tables: 默认为None，表示选择所有的表
136 |         selected_columns: 默认为None，表示所有列全选，格式['table_name.column_name']
137 |         """
138 |         output = []
139 | 
140 |         if selected_tables is not None:
141 |             selected_tables = [s.lower() for s in selected_tables]
142 |         if selected_columns is not None:
143 |             selected_columns = [s.lower() for s in selected_columns]
144 |             selected_tables = [s.split('.')[0].lower() for s in selected_columns]
145 | 
146 |         # 依次处理每一个表
147 |         for table_name, table_info in self.tables.items():
148 |             if selected_tables is None or table_name.lower() in selected_tables:
149 |                 cur_table_type = table_info.get('type', 'table')
150 |                 column_names = list(table_info['fields'].keys())
151 |                 if selected_columns is not None:
152 |                     cur_selected_columns = [c for c in column_names if f"{table_name}.{c}".lower() in selected_columns]
153 |                 else:
154 |                     cur_selected_columns = selected_columns
155 |                 output.append(self.single_table_mschema(table_name, cur_selected_columns, example_num, show_type_detail, shuffle))
156 | 
157 |         if shuffle:
158 |             random.shuffle(output)
159 | 
160 |         output.insert(0, f"【DB_ID】 {self.db_id}")
161 |         output.insert(1, f"【Schema】")
162 | 
163 |         # 添加外键信息，选择table_type为view时不展示外键
164 |         if self.foreign_keys:
165 |             output.append("【Foreign keys】")
166 |             for fk in self.foreign_keys:
167 |                 ref_schema = fk[2]
168 |                 table1, column1, _, table2, column2 = fk
169 |                 if selected_tables is None or \
170 |                         (table1.lower() in selected_tables and table2.lower() in selected_tables):
171 |                     if ref_schema == self.schema:
172 |                         output.append(f"{fk[0]}.{fk[1]}={fk[3]}.{fk[4]}")
173 | 
174 |         return '\n'.join(output)
175 | 
176 |     def dump(self):
177 |         schema_dict = {
178 |             "db_id": self.db_id,
179 |             "schema": self.schema,
180 |             "tables": self.tables,
181 |             "foreign_keys": self.foreign_keys
182 |         }
183 |         return schema_dict
184 | 
185 |     def save(self, file_path: str):
186 |         schema_dict = self.dump()
187 |         write_json_to_file(file_path, schema_dict, is_json_line=False)
188 | 
189 |     def load(self, file_path: str):
190 |         data = read_json_file(file_path)
191 |         self.db_id = data.get("db_id", "Anonymous")
192 |         self.schema = data.get("schema", None)
193 |         self.tables = data.get("tables", {})
194 |         self.foreign_keys = data.get("foreign_keys", [])
195 | 


--------------------------------------------------------------------------------
/src/xiyan_mcp_server/utils/db_source.py:
--------------------------------------------------------------------------------
  1 | from typing import Any, Dict, List, Optional, Tuple
  2 | 
  3 | from llama_index.core import SQLDatabase
  4 | from sqlalchemy import MetaData, Table, select, text
  5 | from sqlalchemy.engine import Engine
  6 | 
  7 | from .db_mschema import MSchema
  8 | from .db_util import examples_to_str, preprocess_sql_query
  9 | 
 10 | 
 11 | class HITLSQLDatabase(SQLDatabase):
 12 |     def __init__(self, engine: Engine, schema: Optional[str] = None, metadata: Optional[MetaData] = None,
 13 |                  ignore_tables: Optional[List[str]] = None, include_tables: Optional[List[str]] = None,
 14 |                  sample_rows_in_table_info: int = 3, indexes_in_table_info: bool = False,
 15 |                  custom_table_info: Optional[dict] = None, view_support: bool = False, max_string_length: int = 300,
 16 |                  mschema: Optional[MSchema] = None, db_name: Optional[str] = ''):
 17 |         super().__init__(engine, schema, metadata, ignore_tables, include_tables, sample_rows_in_table_info,
 18 |                          indexes_in_table_info, custom_table_info, view_support, max_string_length)
 19 | 
 20 |         self._db_name = db_name
 21 |         self._usable_tables = [table_name for table_name in self._usable_tables if self._inspector.has_table(table_name, schema)]
 22 |         self._dialect = engine.dialect.name
 23 |         if mschema is not None:
 24 |             self._mschema = mschema
 25 |         else:
 26 |             self._mschema = MSchema(db_id=db_name, schema=schema)
 27 |             self.init_mschema()
 28 | 
 29 |     @property
 30 |     def mschema(self) -> MSchema:
 31 |         """Return M-Schema"""
 32 |         return self._mschema
 33 | 
 34 |     @property
 35 |     def db_name(self) -> str:
 36 |         """Return db_name"""
 37 |         return self._db_name
 38 | 
 39 |     def get_pk_constraint(self, table_name: str) -> Dict:
 40 |         return self._inspector.get_pk_constraint(table_name, self._schema)['constrained_columns']
 41 | 
 42 |     def get_table_comment(self, table_name: str):
 43 |         try:
 44 |             return self._inspector.get_table_comment(table_name, self._schema)['text']
 45 |         except:    # sqlite不支持添加注释
 46 |             return ''
 47 | 
 48 |     def default_schema_name(self) -> Optional[str]:
 49 |         return self._inspector.default_schema_name
 50 | 
 51 |     def get_schema_names(self) -> List[str]:
 52 |         return self._inspector.get_schema_names()
 53 | 
 54 |     def get_foreign_keys(self, table_name: str):
 55 |         return self._inspector.get_foreign_keys(table_name, self._schema)
 56 | 
 57 |     def get_unique_constraints(self, table_name: str):
 58 |         return self._inspector.get_unique_constraints(table_name, self._schema)
 59 |     
 60 |     def fectch_distinct_values(self, table_name: str, column_name: str, max_num: int = 5):
 61 |         table = Table(table_name, self.metadata_obj, autoload_with=self._engine)
 62 |         # 构建 SELECT DISTINCT 查询
 63 |         query = select(table.c[column_name]).distinct().limit(max_num)
 64 |         values = []
 65 |         with self._engine.connect() as connection:
 66 |             result = connection.execute(query)
 67 |             distinct_values = result.fetchall()
 68 |             for value in distinct_values:
 69 |                 if value[0] is not None and value[0] != '':
 70 |                     values.append(value[0])
 71 |         return values
 72 |     
 73 |     def fetch(self, sql_query: str):
 74 |         sql_query = preprocess_sql_query(sql_query)
 75 | 
 76 |         with self._engine.begin() as connection:
 77 |             try:
 78 |                 cursor = connection.execute(text(sql_query))
 79 |                 records = cursor.fetchall()
 80 |                 records = [tuple(row) for row in records]
 81 |                 return True, records
 82 |             except Exception as e:
 83 |                 # print("An exception occurred during SQL execution.\n", e)
 84 |                 records = str(e)
 85 |             return False, records
 86 | 
 87 |     def fetch_with_column_name(self, sql_query: str):
 88 |         sql_query = preprocess_sql_query(sql_query)
 89 | 
 90 |         with self._engine.begin() as connection:
 91 |             try:
 92 |                 cursor = connection.execute(text(sql_query))
 93 |                 columns = cursor.keys()
 94 |                 records = cursor.fetchall()
 95 |             except Exception as e:
 96 |                 # print("An exception occurred during SQL execution.\n", e)
 97 |                 records = None
 98 |                 columns = []
 99 |             return records, columns
100 | 
101 |     def fetch_with_error_info(self, sql_query: str) -> Tuple[List, str]:
102 |         info = ''
103 |         sql_query = preprocess_sql_query(sql_query)
104 |         with self._engine.begin() as connection:
105 |             try:
106 |                 cursor = connection.execute(text(sql_query))
107 |                 records = cursor.fetchall()
108 |             except Exception as e:
109 |                 info = str(e)
110 |                 records = None
111 |         return records, info
112 | 
113 |     def fetch_truncated(self, sql_query: str, max_rows: Optional[int] = None, max_str_len: int = 30) -> Dict:
114 |         sql_query = preprocess_sql_query(sql_query)
115 |         with self._engine.begin() as connection:
116 |             try:
117 |                 cursor = connection.execute(text(sql_query))
118 |                 result = cursor.fetchall()
119 |                 truncated_results = []
120 |                 if max_rows:
121 |                     result = result[:max_rows]
122 |                 for row in result:
123 |                     truncated_row = tuple(
124 |                         self.truncate_word(column, length=max_str_len)
125 |                         for column in row
126 |                     )
127 |                     truncated_results.append(truncated_row)
128 |                 return {"truncated_results": truncated_results, "fields": list(cursor.keys())}
129 |             except Exception as e:
130 |                 # print("An exception occurred during SQL execution.\n", e)
131 |                 # records = None
132 |                 records = str(e)
133 |                 return {"truncated_results": records, "fields": []}
134 | 
135 |     def trunc_result_to_markdown(self, sql_res: Dict) -> str:
136 |         """
137 |         数据库查询结果转换成markdown格式
138 |         """
139 |         truncated_results = sql_res.get("truncated_results", [])
140 |         fields = sql_res.get("fields", [])
141 | 
142 |         if not isinstance(truncated_results, list):
143 |             return str(truncated_results)
144 | 
145 |         header = "| " + " | ".join(fields) + " |"
146 |         separator = "| " + " | ".join(["---"] * len(fields)) + " |"
147 |         rows = []
148 |         for row in truncated_results:
149 |             rows.append("| " + " | ".join(str(value) for value in row) + " |")
150 |         markdown_table = "\n".join([header, separator] + rows)
151 |         return markdown_table
152 |     
153 | 
154 |     def execute(self, sql_query: str, timeout=5) -> Any:
155 |         # import concurrent.futures
156 |         sql_query = preprocess_sql_query(sql_query)
157 | 
158 |         with self._engine.begin() as connection:
159 |             try:
160 |                 cursor = connection.execute(text(sql_query))
161 |                 return True
162 |             except Exception as e:
163 |                 info = str(e)
164 |                 print("SQL执行异常：", info)
165 |                 return None
166 | 
167 |     def init_mschema(self):
168 |         for table_name in self._usable_tables:
169 |             table_comment = self.get_table_comment(table_name)
170 |             table_comment = '' if table_comment is None else table_comment.strip()
171 |             self._mschema.add_table(table_name, fields={}, comment=table_comment)
172 |             pks = self.get_pk_constraint(table_name)
173 | 
174 |             fks = self.get_foreign_keys(table_name)
175 |             for fk in fks:
176 |                 referred_schema = fk['referred_schema']
177 |                 for c, r in zip(fk['constrained_columns'], fk['referred_columns']):
178 |                     self._mschema.add_foreign_key(table_name, c, referred_schema, fk['referred_table'], r)
179 | 
180 |             fields = self._inspector.get_columns(table_name, schema=self._schema)
181 |             for field in fields:
182 |                 field_type = f"{field['type']!s}"
183 |                 field_name = field['name']
184 |                 if field_name in pks:
185 |                     primary_key = True
186 |                 else:
187 |                     primary_key = False
188 | 
189 |                 field_comment = field.get("comment", None)
190 |                 field_comment = "" if field_comment is None else field_comment.strip()
191 |                 autoincrement = field.get('autoincrement', False)
192 |                 default = field.get('default', None)
193 |                 if default is not None:
194 |                     default = f'{default}'
195 | 
196 |                 try:
197 |                     examples = self.fectch_distinct_values(table_name, field_name, 5)
198 |                 except:
199 |                     examples = []
200 |                 examples = examples_to_str(examples)
201 | 
202 |                 self._mschema.add_field(table_name, field_name, field_type=field_type, primary_key=primary_key,
203 |                     nullable=field['nullable'], default=default, autoincrement=autoincrement,
204 |                     comment=field_comment, examples=examples)
205 | 
206 |     def sync_to_local(self, local_engine: Engine):
207 |         """同步数据到本地数据库"""
208 |         from sqlalchemy.orm import sessionmaker
209 | 
210 |         local_metadata = MetaData()
211 | 
212 |         # # 连接到远程数据库
213 |         remote_metadata = MetaData()
214 |         remote_metadata.reflect(bind=self._engine)
215 | 
216 |         remote_metadata.create_all(bind=self._engine)
217 | 
218 |         print(remote_metadata.tables.keys())
219 |         # 同步表结构和数据
220 |         for table_name in remote_metadata.tables:
221 |             remote_table = Table(table_name, remote_metadata, autoload_with=self._engine)
222 |             print(f"Syncing table {table_name}...")
223 | 
224 |             # 创建本地表
225 |             remote_table.metadata = local_metadata
226 |             local_metadata.drop_all(local_engine)
227 |             local_metadata.create_all(local_engine, tables=[remote_table])
228 | 
229 |             # 将数据同步到本地
230 |             Session = sessionmaker(bind=self._engine)
231 |             session = Session()
232 |             with local_engine.begin() as local_connection:
233 |                 data = session.query(remote_table).all()
234 |                 columns = remote_table.columns.keys()
235 |                 insert_data = [dict(zip(columns, d)) for d in data]
236 |                 local_connection.execute(remote_table.insert(), insert_data)
237 | 
238 |         print("Sync complete.")
239 | 
240 | 
241 | 


--------------------------------------------------------------------------------
/src/xiyan_mcp_server/utils/db_util.py:
--------------------------------------------------------------------------------
  1 | import re
  2 | import os
  3 | import datetime, decimal
  4 | from sqlalchemy import create_engine, MetaData, Table, Column, String, Integer, select, text
  5 | from sqlalchemy.engine import Engine
  6 | from .db_config import DBConfig
  7 | 
  8 | 
  9 | def init_db_conn(db_config: DBConfig) -> Engine:
 10 |     if db_config.dialect.lower() == 'sqlite':
 11 |         return connect_to_sqlite(db_config.db_path)
 12 |     elif db_config.dialect.lower() == 'mysql':
 13 |         return connect_to_mysql(db_config.db_name, db_config.user_name, db_config.db_pwd, db_config.db_host, db_config.port)
 14 |     elif db_config.dialect.lower() == 'postgresql':
 15 |         return connect_to_pg(db_config.db_name, db_config.user_name, db_config.db_pwd, db_config.db_host, db_config.port)
 16 |     else:
 17 |         raise NotImplementedError
 18 | 
 19 | 
 20 | def connect_to_sqlite(db_path: str) -> Engine:
 21 |     assert os.path.exists(db_path)
 22 |     db_engine = create_engine(f'sqlite:///{os.path.abspath(db_path)}')
 23 |     return db_engine
 24 | 
 25 | 
 26 | def connect_to_mysql(db_name, user_name, db_pwd, db_host, port) -> Engine:
 27 |     db_engine = create_engine(f"mysql+pymysql://{user_name}:{db_pwd}@{db_host}:{port}/{db_name}")
 28 |     return db_engine
 29 | 
 30 | 
 31 | def connect_to_pg(db_name, user_name, db_pwd, db_host, port) -> Engine:
 32 |     db_engine = create_engine(f"postgresql+psycopg2://{user_name}:{db_pwd}@{db_host}:{port}/{db_name}")
 33 |     return db_engine
 34 | 
 35 | 
 36 | def remove_sql_comments(sql_query: str) -> str:
 37 |     # 正则表达式用于匹配 SQL 注释
 38 |     single_line_comment_pattern = r'--[^\n]*'
 39 |     multi_line_comment_pattern = r'/\*.*?\*/'
 40 | 
 41 |     # 删除单行注释
 42 |     sql_without_single_comments = re.sub(single_line_comment_pattern, '', sql_query)
 43 | 
 44 |     # 删除多行注释
 45 |     sql_without_comments = re.sub(multi_line_comment_pattern, '', sql_without_single_comments, flags=re.DOTALL)
 46 | 
 47 |     return sql_without_comments.strip()
 48 | 
 49 | 
 50 | def preprocess_sql_query(sql_query: str) -> str:
 51 |     # 删除注释，加上分号
 52 |     sql_query = remove_sql_comments(sql_query)
 53 |     if not sql_query.strip().endswith(';'):
 54 |         sql_query += ';'
 55 |     return sql_query
 56 | 
 57 | 
 58 | def is_email(string):
 59 |     pattern = r'^[\w\.-]+@[\w\.-]+\.\w+$'
 60 |     match = re.match(pattern, string)
 61 |     if match:
 62 |         return True
 63 |     else:
 64 |         return False
 65 | 
 66 | 
 67 | def examples_to_str(examples: list) -> list[str]:
 68 |     """
 69 |     from examples to a list of str
 70 |     """
 71 |     values = examples
 72 |     for i in range(len(values)):
 73 |         if isinstance(values[i], datetime.date):
 74 |             values = [values[i]]
 75 |             break
 76 |         elif isinstance(values[i], datetime.datetime):
 77 |             values = [values[i]]
 78 |             break
 79 |         elif isinstance(values[i], decimal.Decimal):
 80 |             values[i] = str(float(values[i]))
 81 |         elif is_email(str(values[i])):
 82 |             values = []
 83 |             break
 84 |         elif 'http://' in str(values[i]) or 'https://' in str(values[i]):
 85 |             values = []
 86 |             break
 87 |         elif values[i] is not None and not isinstance(values[i], str):
 88 |             pass
 89 |         elif values[i] is not None and '.com' in values[i]:
 90 |             pass
 91 | 
 92 |     return [str(v) for v in values if v is not None and len(str(v)) > 0]
 93 | 
 94 | 
 95 | def sql_fetcher(db_engine: Engine, sql_query: str):
 96 |     sql_query = preprocess_sql_query(sql_query)
 97 |     with db_engine.begin() as connection:
 98 |         try:
 99 |             cursor = connection.execute(text(sql_query))
100 |             records = cursor.fetchall()
101 |         except Exception as e:
102 |             print("An exception occurred during SQL execution.\n", e)
103 |             records = None
104 |         return records


--------------------------------------------------------------------------------
/src/xiyan_mcp_server/utils/file_util.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: UTF-8 -*-
 2 | import json
 3 | import os
 4 | import pandas as pd
 5 | import re
 6 | 
 7 | def extract_sql_from_qwen(qwen_result) -> str:
 8 |     sql = qwen_result
 9 |     pattern = r"```sql(.*?)```"
10 | 
11 |     # 使用re.DOTALL标志来使得点号(.)可以匹配包括换行符在内的任意字符
12 |     sql_code_snippets = re.findall(pattern, qwen_result, re.DOTALL)
13 | 
14 |     if len(sql_code_snippets) > 0:
15 |         sql = sql_code_snippets[-1].strip()
16 | 
17 |     return sql
18 | 
19 | def read_text(filename)->list:
20 |     data = []
21 |     with open(filename, 'r', encoding='utf-8') as file:
22 |         for line in file.readlines():
23 |             line = line.strip()
24 |             data.append(line)
25 |     return data
26 | 
27 | 
28 | def save_raw_text(filename, content):
29 |     with open(filename, 'w', encoding='utf-8') as file:
30 |         file.write(content)
31 | 
32 | 
33 | def read_json_file(path, filter_func=None):
34 |     if os.path.exists(path):
35 |         with open(path, 'r', encoding='utf-8') as f:
36 |             try:
37 |                 json_data = json.load(f)
38 |                 if filter_func is not None:
39 |                     json_data = list(filter(filter_func, json_data))
40 |                 return json_data
41 |             except Exception as e:
42 |                 f.seek(0)
43 |                 lines = f.readlines()
44 |                 json_list = [json.loads(line.strip(
45 |                 )) for line in lines if filter_func is None or filter_func(json.loads(line.strip()))]
46 |                 return json_list
47 |     else:
48 |         return None
49 | 
50 | 
51 | def write_json_to_file(path: str, data: list, is_json_line: bool = False) -> None:
52 |     valid_path(path)
53 |     with open(path, 'w', encoding='utf-8') as f:
54 |         if is_json_line:
55 |             for line in data:
56 |                 f.write(json.dumps(line, ensure_ascii=False) + '\n')
57 |         else:
58 |             f.write(json.dumps(data, ensure_ascii=False, indent=4))
59 | 
60 | 
61 | def save_as_csv(path: str, data: list):
62 |     valid_path(path)
63 |     df = pd.DataFrame(data)
64 |     df.to_csv(path, index=False, encoding='utf-8')
65 | 
66 | 
67 | def valid_path(path):
68 |     dir = os.path.dirname(path)
69 |     if not os.path.exists(dir):
70 |         os.makedirs(dir)
71 | 
72 | 
73 | def find_lasest_timastamp_file(root_path):
74 |     pass


--------------------------------------------------------------------------------
/src/xiyan_mcp_server/utils/llm_util.py:
--------------------------------------------------------------------------------
 1 | from openai import OpenAI
 2 | 
 3 | 
 4 | def call_openai_sdk(**args):
 5 |     key = args['key']
 6 |     base_url = args['url']
 7 |     client = OpenAI(
 8 |         api_key=key,
 9 |         base_url=base_url,
10 |     )
11 |     del args['key']
12 |     del args['url']
13 |     completion = client.chat.completions.create(
14 |         **args
15 |     )
16 |     return completion
17 | 
18 | 


--------------------------------------------------------------------------------
/src/xiyan_mcp_server/utils/logger_util.py:
--------------------------------------------------------------------------------
 1 | from loguru import logger
 2 | 
 3 | # Configure logger settings
 4 | logger.add("xiyan_mcp_server.log", level="INFO")
 5 | 
 6 | # You can define custom formats or sinks as needed
 7 | logger_format = "{time} - {level} - {message}"
 8 | 
 9 | logger.configure(handlers=[{"sink": "xiyan_mcp_server.log", "format": logger_format, "level": "INFO"}])
10 | 
11 | # Export the logger instance
12 | logger = logger  # This line is optional, but makes it explicitly clear what's being exported
13 | 


--------------------------------------------------------------------------------