├── DB-reader.yml
├── README.md
├── agent.py
├── app.py
├── requirements.txt
├── result.png
├── student_data.sql
└── 表名和表结构.txt


/DB-reader.yml:
--------------------------------------------------------------------------------
  1 | app:
  2 |   description: ''
  3 |   icon: 🤖
  4 |   icon_background: '#FFEAD5'
  5 |   mode: workflow
  6 |   name: DB-reader
  7 |   use_icon_as_answer_icon: false
  8 | kind: app
  9 | version: 0.1.3
 10 | workflow:
 11 |   conversation_variables: []
 12 |   environment_variables: []
 13 |   features:
 14 |     file_upload:
 15 |       allowed_file_extensions:
 16 |       - .JPG
 17 |       - .JPEG
 18 |       - .PNG
 19 |       - .GIF
 20 |       - .WEBP
 21 |       - .SVG
 22 |       allowed_file_types:
 23 |       - image
 24 |       allowed_file_upload_methods:
 25 |       - local_file
 26 |       - remote_url
 27 |       enabled: false
 28 |       fileUploadConfig:
 29 |         audio_file_size_limit: 50
 30 |         batch_count_limit: 5
 31 |         file_size_limit: 15
 32 |         image_file_size_limit: 10
 33 |         video_file_size_limit: 100
 34 |         workflow_file_upload_limit: 10
 35 |       image:
 36 |         enabled: false
 37 |         number_limits: 3
 38 |         transfer_methods:
 39 |         - local_file
 40 |         - remote_url
 41 |       number_limits: 3
 42 |     opening_statement: ''
 43 |     retriever_resource:
 44 |       enabled: true
 45 |     sensitive_word_avoidance:
 46 |       enabled: false
 47 |     speech_to_text:
 48 |       enabled: false
 49 |     suggested_questions: []
 50 |     suggested_questions_after_answer:
 51 |       enabled: false
 52 |     text_to_speech:
 53 |       enabled: false
 54 |       language: ''
 55 |       voice: ''
 56 |   graph:
 57 |     edges:
 58 |     - data:
 59 |         isInIteration: false
 60 |         sourceType: llm
 61 |         targetType: code
 62 |       id: 1732980872194-source-1732980912700-target
 63 |       source: '1732980872194'
 64 |       sourceHandle: source
 65 |       target: '1732980912700'
 66 |       targetHandle: target
 67 |       type: custom
 68 |       zIndex: 0
 69 |     - data:
 70 |         isInIteration: false
 71 |         sourceType: code
 72 |         targetType: llm
 73 |       id: 1732980912700-source-1732980920612-target
 74 |       source: '1732980912700'
 75 |       sourceHandle: source
 76 |       target: '1732980920612'
 77 |       targetHandle: target
 78 |       type: custom
 79 |       zIndex: 0
 80 |     - data:
 81 |         isInIteration: false
 82 |         sourceType: llm
 83 |         targetType: end
 84 |       id: 1732980920612-source-1732980940056-target
 85 |       source: '1732980920612'
 86 |       sourceHandle: source
 87 |       target: '1732980940056'
 88 |       targetHandle: target
 89 |       type: custom
 90 |       zIndex: 0
 91 |     - data:
 92 |         isInIteration: false
 93 |         sourceType: start
 94 |         targetType: knowledge-retrieval
 95 |       id: 1732970305758-source-1733132854491-target
 96 |       source: '1732970305758'
 97 |       sourceHandle: source
 98 |       target: '1733132854491'
 99 |       targetHandle: target
100 |       type: custom
101 |       zIndex: 0
102 |     - data:
103 |         isInIteration: false
104 |         sourceType: knowledge-retrieval
105 |         targetType: llm
106 |       id: 1733132854491-source-1732980872194-target
107 |       source: '1733132854491'
108 |       sourceHandle: source
109 |       target: '1732980872194'
110 |       targetHandle: target
111 |       type: custom
112 |       zIndex: 0
113 |     nodes:
114 |     - data:
115 |         desc: ''
116 |         selected: false
117 |         title: 开始
118 |         type: start
119 |         variables:
120 |         - label: 请输入您的查询需求
121 |           max_length: 200
122 |           options: []
123 |           required: true
124 |           type: text-input
125 |           variable: question
126 |       height: 90
127 |       id: '1732970305758'
128 |       position:
129 |         x: -200.45867167179856
130 |         y: 203.94580912716032
131 |       positionAbsolute:
132 |         x: -200.45867167179856
133 |         y: 203.94580912716032
134 |       selected: false
135 |       sourcePosition: right
136 |       targetPosition: left
137 |       type: custom
138 |       width: 244
139 |     - data:
140 |         context:
141 |           enabled: true
142 |           variable_selector:
143 |           - '1733132854491'
144 |           - result
145 |         desc: ''
146 |         model:
147 |           completion_params:
148 |             temperature: 0.7
149 |           mode: chat
150 |           name: Qwen2:7b
151 |           provider: ollama
152 |         prompt_template:
153 |         - id: ab2999ee-62f7-4b4d-a230-b6dd043e7aad
154 |           role: system
155 |           text: '请作为一个数据库管理员，理解用户的具体需求，并生成对应的SQL。其中，数据库表名和字段名信息通过查阅{{#context#}}知识库获得。生成的结果只包含可以运行的SQL，不需要解释。 '
156 |         - id: 832946d2-761c-49dd-9136-9f6d3ec1a946
157 |           role: user
158 |           text: '问题: {{#1732970305758.question#}}
159 | 
160 | 
161 |             '
162 |         - id: 64acd278-efbf-46bd-a7b7-2a4d5cfaa78b
163 |           role: assistant
164 |           text: ''
165 |         selected: false
166 |         title: LLM自动生成SQL
167 |         type: llm
168 |         variables: []
169 |         vision:
170 |           enabled: false
171 |       height: 98
172 |       id: '1732980872194'
173 |       position:
174 |         x: 29.290583767340934
175 |         y: 499.3999411044708
176 |       positionAbsolute:
177 |         x: 29.290583767340934
178 |         y: 499.3999411044708
179 |       selected: false
180 |       sourcePosition: right
181 |       targetPosition: left
182 |       type: custom
183 |       width: 244
184 |     - data:
185 |         code: "import urllib.request\nimport json\n\ndef main(sql_query: str) -> dict:\n\
186 |           \    # Flask 服务端点\n    api_url = \"http://192.168.10.219:5003/execute_query\"\
187 |           \n\n    # 请求体\n    payload = {\n        \"sql_query\": sql_query\n    }\n\
188 |           \n    # 将查询参数进行 URL 编码\n    encoded_params = urllib.parse.urlencode(payload)\n\
189 |           \n    # 创造请求对象， 拼接完整的 URL\n    req = f\"{api_url}?{encoded_params}\"\n\n\
190 |           \    try:\n        # 发送请求并获取响应\n        with urllib.request.urlopen(req)\
191 |           \ as response:\n            # 读取并解码响应\n            result_data = json.loads(response.read().decode('utf-8'))\n\
192 |           \            # 将结果转换为字符串格式\n            result_str = json.dumps(result_data,\
193 |           \ ensure_ascii=False) # 确保中文字符正常显示\n            print(result_str)\n    \
194 |           \        return {\n                'result': result_str # 返回字符串类型结果\n  \
195 |           \          }\n    except Exception as e:\n        raise Exception(f\"Error:{str(e)}\"\
196 |           )\n"
197 |         code_language: python3
198 |         desc: ''
199 |         outputs:
200 |           result:
201 |             children: null
202 |             type: string
203 |         selected: false
204 |         title: 执行SQL
205 |         type: code
206 |         variables:
207 |         - value_selector:
208 |           - '1732980872194'
209 |           - text
210 |           variable: sql_query
211 |       height: 54
212 |       id: '1732980912700'
213 |       position:
214 |         x: 149.1166163724659
215 |         y: 652.6699088065056
216 |       positionAbsolute:
217 |         x: 149.1166163724659
218 |         y: 652.6699088065056
219 |       selected: true
220 |       sourcePosition: right
221 |       targetPosition: left
222 |       type: custom
223 |       width: 244
224 |     - data:
225 |         context:
226 |           enabled: true
227 |           variable_selector:
228 |           - '1733132854491'
229 |           - result
230 |         desc: ''
231 |         model:
232 |           completion_params:
233 |             temperature: 0.7
234 |           mode: chat
235 |           name: Qwen2:7b
236 |           provider: ollama
237 |         prompt_template:
238 |         - id: bf2cad29-b645-4416-9799-06dd56c5ce86
239 |           role: system
240 |           text: '你是数据分析师，请分析JSON格式的SQL查询放回结果，回答用户的问题
241 | 
242 |             '
243 |         - id: bef9db8e-1608-4e8f-abdd-eed980fc1b6b
244 |           role: user
245 |           text: '问题是：{{#1732970305758.question#}}
246 | 
247 |             SQL是: {{#1732980872194.text#}}
248 | 
249 |             数据是：{{#1732980912700.result#}}
250 | 
251 | 
252 |             请在结果中按照模板中的三个步骤回答问题：（1）具体执行的SQL是{{#1732980872194.text#}}，（2）：SQL返回的结果是{{#1732980912700.result#}}，（3）然后回复问题的答案，尽可能简短。'
253 |         selected: false
254 |         title: LLM结果分析
255 |         type: llm
256 |         variables: []
257 |         vision:
258 |           enabled: false
259 |       height: 98
260 |       id: '1732980920612'
261 |       position:
262 |         x: 293.86542871638255
263 |         y: 744.8910559846614
264 |       positionAbsolute:
265 |         x: 293.86542871638255
266 |         y: 744.8910559846614
267 |       selected: false
268 |       sourcePosition: right
269 |       targetPosition: left
270 |       type: custom
271 |       width: 244
272 |     - data:
273 |         desc: ''
274 |         outputs:
275 |         - value_selector:
276 |           - '1732980920612'
277 |           - text
278 |           variable: text
279 |         selected: false
280 |         title: 结束
281 |         type: end
282 |       height: 90
283 |       id: '1732980940056'
284 |       position:
285 |         x: 437.35951722392167
286 |         y: 873.9731111688593
287 |       positionAbsolute:
288 |         x: 437.35951722392167
289 |         y: 873.9731111688593
290 |       selected: false
291 |       sourcePosition: right
292 |       targetPosition: left
293 |       type: custom
294 |       width: 244
295 |     - data:
296 |         dataset_ids:
297 |         - 3ad8b24f-fd2b-4d3b-bd38-ff60039097ed
298 |         desc: ''
299 |         multiple_retrieval_config:
300 |           reranking_enable: false
301 |           reranking_mode: reranking_model
302 |           top_k: 4
303 |         query_variable_selector:
304 |         - '1732970305758'
305 |         - question
306 |         retrieval_mode: multiple
307 |         selected: false
308 |         title: 知识检索
309 |         type: knowledge-retrieval
310 |       height: 92
311 |       id: '1733132854491'
312 |       position:
313 |         x: -88.20604288351262
314 |         y: 352.9945268220258
315 |       positionAbsolute:
316 |         x: -88.20604288351262
317 |         y: 352.9945268220258
318 |       selected: false
319 |       sourcePosition: right
320 |       targetPosition: left
321 |       type: custom
322 |       width: 244
323 |     viewport:
324 |       x: 398.636722585323
325 |       y: 114.5359283321757
326 |       zoom: 0.39931970501408437
327 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # 简介 
 2 | 该项目依赖qwen大模型，使用dify组建workflow，实现智能生成SQL并返回结果。
 3 | 我是在普通的笔记本(Intel Core i7-13700H + 16G Mem)上运行的，没有用到GPU。
 4 | 
 5 | - 使用技术：ollama + dify + docker + postgres + python flask
 6 | - 模型: qwen2-7b，只占用4-5G内存
 7 | - 输入: 自然语言一句话描述需求
 8 | - 输出: 你想要的答案
 9 | - 过程: 大模型生成SQL后，转发到数据库服务，再返回到大模型汇总结果
10 | - 坑点: dify不能像db-gpt一样直连数据库，需要把数据库打包成http链接对外提供服务，我用的Flask框架
11 | 
12 | # 文件内容说明
13 | - app.py 打包数据库接口，向外提供http服务
14 | - DB-reader.yml 用于dify workflow的配置文件
15 | - agent.py 用于dify workflow里的执行SQL模块需要的python代码
16 | - 表名和表结构.txt PG数据库表结构，用于在dify workflow里组建知识库
17 | - student_data.sql 用于准备PG数据库数据
18 | 
19 | # 步骤
20 | - ollama run qwen2:7b 使能qwen大模型
21 | - python app.py 提供数据库服务
22 | - 导入dify工作流的yml，部署运行
23 | - 填写运行一次
24 | 
25 | # 结果
26 | ![查询测试](result.png)
27 | 参考视频：https://youtu.be/DYEzumbXeBw
28 | 
29 | # 致谢
30 | - yihong0618 https://github.com/yihong0618
31 | - 智奕AI https://www.bilibili.com/video/BV1Pmy6YuEow


--------------------------------------------------------------------------------
/agent.py:
--------------------------------------------------------------------------------
 1 | import urllib.request
 2 | import json
 3 | 
 4 | def main(sql_query: str) -> dict:
 5 |     # Flask 服务端点
 6 |     api_url = "http://192.168.10.219:5003/execute_query"
 7 | 
 8 |     # 请求体
 9 |     payload = {
10 |         "sql_query": sql_query
11 |     }
12 | 
13 |     # 将查询参数进行 URL 编码
14 |     encoded_params = urllib.parse.urlencode(payload)
15 | 
16 |     # 创造请求对象， 拼接完整的 URL
17 |     req = f"{api_url}?{encoded_params}"
18 | 
19 |     try:
20 |         # 发送请求并获取响应
21 |         with urllib.request.urlopen(req) as response:
22 |             # 读取并解码响应
23 |             result_data = json.loads(response.read().decode('utf-8'))
24 |             # 将结果转换为字符串格式
25 |             result_str = json.dumps(result_data, ensure_ascii=False) # 确保中文字符正常显示
26 |             print(result_str)
27 |             return {
28 |                 'result': result_str # 返回字符串类型结果
29 |             }
30 |     except Exception as e:
31 |         raise Exception(f"Error:{str(e)}")
32 | 


--------------------------------------------------------------------------------
/app.py:
--------------------------------------------------------------------------------
 1 | from flask import Flask, request, jsonify
 2 | import psycopg2
 3 | from psycopg2 import sql
 4 | 
 5 | # 初始化 Flask 应用
 6 | app = Flask(__name__)
 7 | 
 8 | # PostgreSQL 数据库连接设置
 9 | DB_HOST = 'localhost'
10 | DB_PORT = '5432'  # 默认端口，按需修改
11 | DB_NAME = 'postgres'
12 | DB_USER = 'postgres'
13 | DB_PASSWORD = '123456'
14 | 
15 | # 连接到 PostgreSQL 数据库的函数
16 | def get_db_connection():
17 |     conn = psycopg2.connect(
18 |         host=DB_HOST,
19 |         port=DB_PORT,
20 |         dbname=DB_NAME,
21 |         user=DB_USER,
22 |         password=DB_PASSWORD
23 |     )
24 |     return conn
25 | 
26 | # 处理 /execute_query 路由
27 | @app.route('/execute_query', methods=['GET'])
28 | def execute_query():
29 |     # 获取用户传入的 SQL 查询
30 |     query = request.args.get('sql_query')
31 |     
32 |     if not query:
33 |         return jsonify({'error': 'No query provided'}), 400
34 | 
35 |     # 连接到数据库并执行查询
36 |     try:
37 |         conn = get_db_connection()
38 |         cur = conn.cursor()
39 |         cur.execute(query)
40 |         result = cur.fetchall()  # 获取查询结果
41 |         cur.close()
42 |         conn.close()
43 |         
44 |         # 返回查询结果
45 |         return jsonify({'result': result})
46 |     
47 |     except Exception as e:
48 |         return jsonify({'error': str(e)}), 500
49 | 
50 | # 启动 Flask 服务
51 | if __name__ == '__main__':
52 |     app.run(host='0.0.0.0', port=5003)
53 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | psycopg2==2.9.10
2 | Flask==3.1.0
3 | 


--------------------------------------------------------------------------------
/result.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hyongtao-db/chatDB-dify/e8d69f4ca779e405acc629032a1726f307e4f134/result.png


--------------------------------------------------------------------------------
/student_data.sql:
--------------------------------------------------------------------------------
 1 | -- refer to https://www.sjkjc.com/postgresql/join/#google_vignette
 2 | -- create 2 tables
 3 | CREATE TABLE student (
 4 |   student_id INTEGER NOT NULL,
 5 |   name varchar(45) NOT NULL,
 6 |   PRIMARY KEY (student_id)
 7 | );
 8 | 
 9 | CREATE TABLE student_score (
10 |   student_id INTEGER NOT NULL,
11 |   subject varchar(45) NOT NULL,
12 |   score INTEGER NOT NULL
13 | );
14 | 
15 | -- insert some data
16 | INSERT INTO
17 |   student (student_id, name)
18 | VALUES
19 |   (1,'Tim'),(2,'Jim'),(3,'Lucy');
20 | 
21 | INSERT INTO
22 |   student_score (student_id, subject, score)
23 | VALUES
24 |   (1,'English',90),
25 |   (1,'Math',80),
26 |   (2,'English',85),
27 |   (5,'English',92);
28 | 


--------------------------------------------------------------------------------
/表名和表结构.txt:
--------------------------------------------------------------------------------
1 | ## 学生表
2 | 表名：student；字段名：student_id, name；字段信息：学生编号, 学生姓名
3 | 
4 | ## 学生分数表 
5 | 表名：student_score；字段名：student_id, subject, score；字段信息：学生编号, 科目，分数


--------------------------------------------------------------------------------