├── DB-reader.yml ├── README.md ├── agent.py ├── app.py ├── requirements.txt ├── result.png ├── student_data.sql └── 表名和表结构.txt /DB-reader.yml: -------------------------------------------------------------------------------- 1 | app: 2 | description: '' 3 | icon: 🤖 4 | icon_background: '#FFEAD5' 5 | mode: workflow 6 | name: DB-reader 7 | use_icon_as_answer_icon: false 8 | kind: app 9 | version: 0.1.3 10 | workflow: 11 | conversation_variables: [] 12 | environment_variables: [] 13 | features: 14 | file_upload: 15 | allowed_file_extensions: 16 | - .JPG 17 | - .JPEG 18 | - .PNG 19 | - .GIF 20 | - .WEBP 21 | - .SVG 22 | allowed_file_types: 23 | - image 24 | allowed_file_upload_methods: 25 | - local_file 26 | - remote_url 27 | enabled: false 28 | fileUploadConfig: 29 | audio_file_size_limit: 50 30 | batch_count_limit: 5 31 | file_size_limit: 15 32 | image_file_size_limit: 10 33 | video_file_size_limit: 100 34 | workflow_file_upload_limit: 10 35 | image: 36 | enabled: false 37 | number_limits: 3 38 | transfer_methods: 39 | - local_file 40 | - remote_url 41 | number_limits: 3 42 | opening_statement: '' 43 | retriever_resource: 44 | enabled: true 45 | sensitive_word_avoidance: 46 | enabled: false 47 | speech_to_text: 48 | enabled: false 49 | suggested_questions: [] 50 | suggested_questions_after_answer: 51 | enabled: false 52 | text_to_speech: 53 | enabled: false 54 | language: '' 55 | voice: '' 56 | graph: 57 | edges: 58 | - data: 59 | isInIteration: false 60 | sourceType: llm 61 | targetType: code 62 | id: 1732980872194-source-1732980912700-target 63 | source: '1732980872194' 64 | sourceHandle: source 65 | target: '1732980912700' 66 | targetHandle: target 67 | type: custom 68 | zIndex: 0 69 | - data: 70 | isInIteration: false 71 | sourceType: code 72 | targetType: llm 73 | id: 1732980912700-source-1732980920612-target 74 | source: '1732980912700' 75 | sourceHandle: source 76 | target: '1732980920612' 77 | targetHandle: target 78 | type: custom 79 | zIndex: 0 80 | - data: 81 | isInIteration: false 82 | sourceType: llm 83 | targetType: end 84 | id: 1732980920612-source-1732980940056-target 85 | source: '1732980920612' 86 | sourceHandle: source 87 | target: '1732980940056' 88 | targetHandle: target 89 | type: custom 90 | zIndex: 0 91 | - data: 92 | isInIteration: false 93 | sourceType: start 94 | targetType: knowledge-retrieval 95 | id: 1732970305758-source-1733132854491-target 96 | source: '1732970305758' 97 | sourceHandle: source 98 | target: '1733132854491' 99 | targetHandle: target 100 | type: custom 101 | zIndex: 0 102 | - data: 103 | isInIteration: false 104 | sourceType: knowledge-retrieval 105 | targetType: llm 106 | id: 1733132854491-source-1732980872194-target 107 | source: '1733132854491' 108 | sourceHandle: source 109 | target: '1732980872194' 110 | targetHandle: target 111 | type: custom 112 | zIndex: 0 113 | nodes: 114 | - data: 115 | desc: '' 116 | selected: false 117 | title: 开始 118 | type: start 119 | variables: 120 | - label: 请输入您的查询需求 121 | max_length: 200 122 | options: [] 123 | required: true 124 | type: text-input 125 | variable: question 126 | height: 90 127 | id: '1732970305758' 128 | position: 129 | x: -200.45867167179856 130 | y: 203.94580912716032 131 | positionAbsolute: 132 | x: -200.45867167179856 133 | y: 203.94580912716032 134 | selected: false 135 | sourcePosition: right 136 | targetPosition: left 137 | type: custom 138 | width: 244 139 | - data: 140 | context: 141 | enabled: true 142 | variable_selector: 143 | - '1733132854491' 144 | - result 145 | desc: '' 146 | model: 147 | completion_params: 148 | temperature: 0.7 149 | mode: chat 150 | name: Qwen2:7b 151 | provider: ollama 152 | prompt_template: 153 | - id: ab2999ee-62f7-4b4d-a230-b6dd043e7aad 154 | role: system 155 | text: '请作为一个数据库管理员,理解用户的具体需求,并生成对应的SQL。其中,数据库表名和字段名信息通过查阅{{#context#}}知识库获得。生成的结果只包含可以运行的SQL,不需要解释。 ' 156 | - id: 832946d2-761c-49dd-9136-9f6d3ec1a946 157 | role: user 158 | text: '问题: {{#1732970305758.question#}} 159 | 160 | 161 | ' 162 | - id: 64acd278-efbf-46bd-a7b7-2a4d5cfaa78b 163 | role: assistant 164 | text: '' 165 | selected: false 166 | title: LLM自动生成SQL 167 | type: llm 168 | variables: [] 169 | vision: 170 | enabled: false 171 | height: 98 172 | id: '1732980872194' 173 | position: 174 | x: 29.290583767340934 175 | y: 499.3999411044708 176 | positionAbsolute: 177 | x: 29.290583767340934 178 | y: 499.3999411044708 179 | selected: false 180 | sourcePosition: right 181 | targetPosition: left 182 | type: custom 183 | width: 244 184 | - data: 185 | code: "import urllib.request\nimport json\n\ndef main(sql_query: str) -> dict:\n\ 186 | \ # Flask 服务端点\n api_url = \"http://192.168.10.219:5003/execute_query\"\ 187 | \n\n # 请求体\n payload = {\n \"sql_query\": sql_query\n }\n\ 188 | \n # 将查询参数进行 URL 编码\n encoded_params = urllib.parse.urlencode(payload)\n\ 189 | \n # 创造请求对象, 拼接完整的 URL\n req = f\"{api_url}?{encoded_params}\"\n\n\ 190 | \ try:\n # 发送请求并获取响应\n with urllib.request.urlopen(req)\ 191 | \ as response:\n # 读取并解码响应\n result_data = json.loads(response.read().decode('utf-8'))\n\ 192 | \ # 将结果转换为字符串格式\n result_str = json.dumps(result_data,\ 193 | \ ensure_ascii=False) # 确保中文字符正常显示\n print(result_str)\n \ 194 | \ return {\n 'result': result_str # 返回字符串类型结果\n \ 195 | \ }\n except Exception as e:\n raise Exception(f\"Error:{str(e)}\"\ 196 | )\n" 197 | code_language: python3 198 | desc: '' 199 | outputs: 200 | result: 201 | children: null 202 | type: string 203 | selected: false 204 | title: 执行SQL 205 | type: code 206 | variables: 207 | - value_selector: 208 | - '1732980872194' 209 | - text 210 | variable: sql_query 211 | height: 54 212 | id: '1732980912700' 213 | position: 214 | x: 149.1166163724659 215 | y: 652.6699088065056 216 | positionAbsolute: 217 | x: 149.1166163724659 218 | y: 652.6699088065056 219 | selected: true 220 | sourcePosition: right 221 | targetPosition: left 222 | type: custom 223 | width: 244 224 | - data: 225 | context: 226 | enabled: true 227 | variable_selector: 228 | - '1733132854491' 229 | - result 230 | desc: '' 231 | model: 232 | completion_params: 233 | temperature: 0.7 234 | mode: chat 235 | name: Qwen2:7b 236 | provider: ollama 237 | prompt_template: 238 | - id: bf2cad29-b645-4416-9799-06dd56c5ce86 239 | role: system 240 | text: '你是数据分析师,请分析JSON格式的SQL查询放回结果,回答用户的问题 241 | 242 | ' 243 | - id: bef9db8e-1608-4e8f-abdd-eed980fc1b6b 244 | role: user 245 | text: '问题是:{{#1732970305758.question#}} 246 | 247 | SQL是: {{#1732980872194.text#}} 248 | 249 | 数据是:{{#1732980912700.result#}} 250 | 251 | 252 | 请在结果中按照模板中的三个步骤回答问题:(1)具体执行的SQL是{{#1732980872194.text#}},(2):SQL返回的结果是{{#1732980912700.result#}},(3)然后回复问题的答案,尽可能简短。' 253 | selected: false 254 | title: LLM结果分析 255 | type: llm 256 | variables: [] 257 | vision: 258 | enabled: false 259 | height: 98 260 | id: '1732980920612' 261 | position: 262 | x: 293.86542871638255 263 | y: 744.8910559846614 264 | positionAbsolute: 265 | x: 293.86542871638255 266 | y: 744.8910559846614 267 | selected: false 268 | sourcePosition: right 269 | targetPosition: left 270 | type: custom 271 | width: 244 272 | - data: 273 | desc: '' 274 | outputs: 275 | - value_selector: 276 | - '1732980920612' 277 | - text 278 | variable: text 279 | selected: false 280 | title: 结束 281 | type: end 282 | height: 90 283 | id: '1732980940056' 284 | position: 285 | x: 437.35951722392167 286 | y: 873.9731111688593 287 | positionAbsolute: 288 | x: 437.35951722392167 289 | y: 873.9731111688593 290 | selected: false 291 | sourcePosition: right 292 | targetPosition: left 293 | type: custom 294 | width: 244 295 | - data: 296 | dataset_ids: 297 | - 3ad8b24f-fd2b-4d3b-bd38-ff60039097ed 298 | desc: '' 299 | multiple_retrieval_config: 300 | reranking_enable: false 301 | reranking_mode: reranking_model 302 | top_k: 4 303 | query_variable_selector: 304 | - '1732970305758' 305 | - question 306 | retrieval_mode: multiple 307 | selected: false 308 | title: 知识检索 309 | type: knowledge-retrieval 310 | height: 92 311 | id: '1733132854491' 312 | position: 313 | x: -88.20604288351262 314 | y: 352.9945268220258 315 | positionAbsolute: 316 | x: -88.20604288351262 317 | y: 352.9945268220258 318 | selected: false 319 | sourcePosition: right 320 | targetPosition: left 321 | type: custom 322 | width: 244 323 | viewport: 324 | x: 398.636722585323 325 | y: 114.5359283321757 326 | zoom: 0.39931970501408437 327 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # 简介 2 | 该项目依赖qwen大模型,使用dify组建workflow,实现智能生成SQL并返回结果。 3 | 我是在普通的笔记本(Intel Core i7-13700H + 16G Mem)上运行的,没有用到GPU。 4 | 5 | - 使用技术:ollama + dify + docker + postgres + python flask 6 | - 模型: qwen2-7b,只占用4-5G内存 7 | - 输入: 自然语言一句话描述需求 8 | - 输出: 你想要的答案 9 | - 过程: 大模型生成SQL后,转发到数据库服务,再返回到大模型汇总结果 10 | - 坑点: dify不能像db-gpt一样直连数据库,需要把数据库打包成http链接对外提供服务,我用的Flask框架 11 | 12 | # 文件内容说明 13 | - app.py 打包数据库接口,向外提供http服务 14 | - DB-reader.yml 用于dify workflow的配置文件 15 | - agent.py 用于dify workflow里的执行SQL模块需要的python代码 16 | - 表名和表结构.txt PG数据库表结构,用于在dify workflow里组建知识库 17 | - student_data.sql 用于准备PG数据库数据 18 | 19 | # 步骤 20 | - ollama run qwen2:7b 使能qwen大模型 21 | - python app.py 提供数据库服务 22 | - 导入dify工作流的yml,部署运行 23 | - 填写运行一次 24 | 25 | # 结果 26 | ![查询测试](result.png) 27 | 参考视频:https://youtu.be/DYEzumbXeBw 28 | 29 | # 致谢 30 | - yihong0618 https://github.com/yihong0618 31 | - 智奕AI https://www.bilibili.com/video/BV1Pmy6YuEow -------------------------------------------------------------------------------- /agent.py: -------------------------------------------------------------------------------- 1 | import urllib.request 2 | import json 3 | 4 | def main(sql_query: str) -> dict: 5 | # Flask 服务端点 6 | api_url = "http://192.168.10.219:5003/execute_query" 7 | 8 | # 请求体 9 | payload = { 10 | "sql_query": sql_query 11 | } 12 | 13 | # 将查询参数进行 URL 编码 14 | encoded_params = urllib.parse.urlencode(payload) 15 | 16 | # 创造请求对象, 拼接完整的 URL 17 | req = f"{api_url}?{encoded_params}" 18 | 19 | try: 20 | # 发送请求并获取响应 21 | with urllib.request.urlopen(req) as response: 22 | # 读取并解码响应 23 | result_data = json.loads(response.read().decode('utf-8')) 24 | # 将结果转换为字符串格式 25 | result_str = json.dumps(result_data, ensure_ascii=False) # 确保中文字符正常显示 26 | print(result_str) 27 | return { 28 | 'result': result_str # 返回字符串类型结果 29 | } 30 | except Exception as e: 31 | raise Exception(f"Error:{str(e)}") 32 | -------------------------------------------------------------------------------- /app.py: -------------------------------------------------------------------------------- 1 | from flask import Flask, request, jsonify 2 | import psycopg2 3 | from psycopg2 import sql 4 | 5 | # 初始化 Flask 应用 6 | app = Flask(__name__) 7 | 8 | # PostgreSQL 数据库连接设置 9 | DB_HOST = 'localhost' 10 | DB_PORT = '5432' # 默认端口,按需修改 11 | DB_NAME = 'postgres' 12 | DB_USER = 'postgres' 13 | DB_PASSWORD = '123456' 14 | 15 | # 连接到 PostgreSQL 数据库的函数 16 | def get_db_connection(): 17 | conn = psycopg2.connect( 18 | host=DB_HOST, 19 | port=DB_PORT, 20 | dbname=DB_NAME, 21 | user=DB_USER, 22 | password=DB_PASSWORD 23 | ) 24 | return conn 25 | 26 | # 处理 /execute_query 路由 27 | @app.route('/execute_query', methods=['GET']) 28 | def execute_query(): 29 | # 获取用户传入的 SQL 查询 30 | query = request.args.get('sql_query') 31 | 32 | if not query: 33 | return jsonify({'error': 'No query provided'}), 400 34 | 35 | # 连接到数据库并执行查询 36 | try: 37 | conn = get_db_connection() 38 | cur = conn.cursor() 39 | cur.execute(query) 40 | result = cur.fetchall() # 获取查询结果 41 | cur.close() 42 | conn.close() 43 | 44 | # 返回查询结果 45 | return jsonify({'result': result}) 46 | 47 | except Exception as e: 48 | return jsonify({'error': str(e)}), 500 49 | 50 | # 启动 Flask 服务 51 | if __name__ == '__main__': 52 | app.run(host='0.0.0.0', port=5003) 53 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | psycopg2==2.9.10 2 | Flask==3.1.0 3 | -------------------------------------------------------------------------------- /result.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hyongtao-db/chatDB-dify/e8d69f4ca779e405acc629032a1726f307e4f134/result.png -------------------------------------------------------------------------------- /student_data.sql: -------------------------------------------------------------------------------- 1 | -- refer to https://www.sjkjc.com/postgresql/join/#google_vignette 2 | -- create 2 tables 3 | CREATE TABLE student ( 4 | student_id INTEGER NOT NULL, 5 | name varchar(45) NOT NULL, 6 | PRIMARY KEY (student_id) 7 | ); 8 | 9 | CREATE TABLE student_score ( 10 | student_id INTEGER NOT NULL, 11 | subject varchar(45) NOT NULL, 12 | score INTEGER NOT NULL 13 | ); 14 | 15 | -- insert some data 16 | INSERT INTO 17 | student (student_id, name) 18 | VALUES 19 | (1,'Tim'),(2,'Jim'),(3,'Lucy'); 20 | 21 | INSERT INTO 22 | student_score (student_id, subject, score) 23 | VALUES 24 | (1,'English',90), 25 | (1,'Math',80), 26 | (2,'English',85), 27 | (5,'English',92); 28 | -------------------------------------------------------------------------------- /表名和表结构.txt: -------------------------------------------------------------------------------- 1 | ## 学生表 2 | 表名:student;字段名:student_id, name;字段信息:学生编号, 学生姓名 3 | 4 | ## 学生分数表 5 | 表名:student_score;字段名:student_id, subject, score;字段信息:学生编号, 科目,分数 --------------------------------------------------------------------------------