├── Flask_src
    ├── app.py
    ├── requirements.txt
    ├── sql_alias.py
    ├── sql_count_value.py
    ├── sql_extra.py
    ├── sql_format_class.py
    ├── sql_index.py
    ├── sqlai.py
    ├── templates
    │   └── index.html
    └── where_clause.py
├── README.md
├── deepseek_flash_src
    ├── app.py
    ├── requirements.txt
    ├── sql_alias.py
    ├── sql_count_value.py
    ├── sql_deepseek.py
    ├── sql_extra.py
    ├── sql_format_class.py
    ├── sql_index.py
    ├── templates
    │   └── index.html
    └── where_clause.py
├── src
    ├── sql_alias.py
    ├── sql_count_value.py
    ├── sql_extra.py
    ├── sql_format_class.py
    ├── sql_index.py
    ├── sqlai.py
    ├── sqlai_helper.py
    └── where_clause.py
├── test.yaml
└── web
    └── sql_helper
        ├── README.md
        ├── conn.php
        ├── css
            └── bootstrap.min.css
        ├── schema
            └── sql_helper_schema.sql
        ├── sql_helper.php
        └── sql_helper_result.php


/Flask_src/app.py:
--------------------------------------------------------------------------------
  1 | import sys, re
  2 | import textwrap
  3 | from tabulate import tabulate
  4 | import pymysql
  5 | from sql_metadata import Parser
  6 | from sql_format_class import SQLFormatter
  7 | from sql_alias import has_table_alias
  8 | from sql_count_value import count_column_value, count_column_clause_value
  9 | from sql_index import execute_index_query, check_index_exist, check_index_exist_multi
 10 | from where_clause import *
 11 | from sql_extra import *
 12 | import yaml
 13 | import argparse
 14 | from sqlai import *
 15 | from flask import Flask, request, render_template
 16 | 
 17 | app = Flask(__name__)
 18 | app.config['JSON_AS_ASCII'] = False
 19 | app.config['JSON_SORT_KEYS'] = False
 20 | app.config['TEMPLATES_AUTO_RELOAD'] = True
 21 | app.config['DEFAULT_CHARSET'] = 'utf-8'
 22 | 
 23 | 
 24 | def analyze_sql(sql_query, db_config, sample_size=100000):
 25 |     mysql_settings = {
 26 |         "host": db_config["host"],
 27 |         "port": db_config["port"],
 28 |         "user": db_config["user"],
 29 |         "passwd": db_config["passwd"],
 30 |         "database": db_config["database"],
 31 |         "cursorclass": pymysql.cursors.DictCursor,
 32 |         "charset": 'utf8mb4'
 33 |     }
 34 | 
 35 |     try:
 36 |         formatted_sql = SQLFormatter().format_sql(sql_query)
 37 |     except UnicodeDecodeError as e:
 38 |         print(f"格式化 SQL 时出错: {e}, sql_query: {sql_query}")
 39 |         return {"error": f"格式化 SQL 时出错: {e}"}
 40 | 
 41 |     try:
 42 |         parser = Parser(sql_query)
 43 |         table_names = parser.tables
 44 |         table_aliases = parser.tables_aliases
 45 |         data = parser.columns_dict
 46 |         select_fields = data.get('select', [])
 47 |         join_fields = data.get('join', [])
 48 |         where_fields = data.get('where', [])
 49 |         order_by_fields = data.get('order_by', [])
 50 |         group_by_fields = data.get('group_by', [])
 51 |         if 'SELECT' not in sql_query.upper():
 52 |             return {"error": "sql_helper工具仅支持select语句"}
 53 |     except Exception as e:
 54 |         return {"error": f"解析 SQL 出现语法错误：{str(e)}"}
 55 | 
 56 |     conn = None
 57 |     try:
 58 |         conn = pymysql.connect(**mysql_settings)
 59 |         cur = conn.cursor()
 60 | 
 61 |         sql = f"EXPLAIN {sql_query}"
 62 |         try:
 63 |             cur.execute(sql)
 64 |         except pymysql.err.ProgrammingError as e:
 65 |             return {"error": f"MySQL 内部错误：{e}"}
 66 |         except Exception as e:
 67 |             return {"error": f"MySQL 内部错误：{e}"}
 68 |         explain_result = cur.fetchall()
 69 | 
 70 |         e_column_names = list(explain_result[0].keys())
 71 |         e_result_values = []
 72 |         for row in explain_result:
 73 |             values = list(row.values())
 74 |             wrapped_values = [textwrap.fill(str(value), width=20) for value in values]
 75 |             e_result_values.append(wrapped_values)
 76 |         e_table = tabulate(e_result_values, headers=e_column_names, tablefmt="grid", numalign="left")
 77 |     except Exception as e:
 78 |         return {"error": f"执行 EXPLAIN 或处理结果时出错: {e}"}
 79 |     finally:
 80 |         if conn:
 81 |             conn.close()
 82 | 
 83 |     index_suggestions = []
 84 |     contains_dot = False
 85 |     if len(where_fields) == 0:
 86 |         index_suggestions.append(f"你的SQL没有where条件.")
 87 |     else:
 88 |         contains_dot = any('.' in field for field in where_fields)
 89 | 
 90 |     if len(join_fields) != 0:
 91 |         table_field_dict = {}
 92 |         for field in join_fields:
 93 |             table_field = field.split('.')
 94 |             if len(table_field) == 2:
 95 |                 table_name = table_field[0]
 96 |                 field_name = table_field[1]
 97 |                 if table_name not in table_field_dict:
 98 |                     table_field_dict[table_name] = []
 99 |                 table_field_dict[table_name].append(field_name)
100 | 
101 |         for table_name, on_columns in table_field_dict.items():
102 |             for on_column in on_columns:
103 |                 try:
104 |                     show_index_sql = f"show index from {table_name} where Column_name = '{on_column}'"
105 |                     conn = pymysql.connect(**mysql_settings)
106 |                     cur = conn.cursor()
107 |                     cur.execute(show_index_sql)
108 |                     index_result = cur.fetchall()
109 |                     if not index_result:
110 |                         suggestion_text = "join联表查询，on关联字段必须增加索引！\n"
111 |                         suggestion_text += f"\033[91m需要添加索引：ALTER TABLE {table_name} ADD INDEX idx_{on_column}({on_column});\033[0m\n"
112 |                         suggestion_text += f"【{table_name}】表 【{on_column}】字段，索引分析：\n"
113 |                         index_static = execute_index_query(mysql_settings, database=mysql_settings["database"],
114 |                                                                                     table_name=table_name, index_columns=on_column)
115 |                         suggestion_text += index_static + "\n"
116 |                         index_suggestions.append(suggestion_text)
117 |                 except Exception as e:
118 |                     print(f"join查询分析出错: {e}")
119 |                     index_suggestions.append(f"join查询分析出错: {e}")
120 |                 finally:
121 |                     if conn:
122 |                         conn.close()
123 | 
124 | 
125 |     for row in explain_result:
126 |         table_name = row['table']
127 |         if table_name.lower().startswith('<derived'):
128 |             table_name = ""
129 |         if table_name == "":
130 |             continue
131 | 
132 |         add_index_fields = []
133 |         if (len(join_fields) != 0 and ((row['type'] == 'ALL' and row['key'] is None) or int(row['rows']) >= 1)) or (len(join_fields) == 0 and ((row['type'] == 'ALL' and row['key'] is None) or int(row['rows']) >= 1000)):
134 |             if has_table_alias(table_aliases) is False and contains_dot is False:
135 |                 if len(where_fields) != 0:
136 |                     for where_field in where_fields:
137 |                         where_clause_value = parse_where_condition(formatted_sql, where_field)
138 |                         if where_clause_value is not None:
139 |                             where_clause_value = where_clause_value.replace('\n', '').replace('\r', '')
140 |                             where_clause_value = re.sub(r'\s+', ' ', where_clause_value)
141 |                             where_clause_value = re.sub(r'\s+', ' ', where_clause_value)
142 |                             Cardinality = count_column_clause_value(table_name, where_field, where_clause_value, mysql_settings, sample_size)
143 |                         else:
144 |                             Cardinality = count_column_value(table_name, where_field, mysql_settings, sample_size)
145 |                         if Cardinality:
146 |                             count_value = Cardinality[0]['count']
147 |                             if where_clause_value is not None:
148 |                                 suggestion_text = f"取出表 【{table_name}】 where条件表达式 【{where_clause_value}】 {sample_size} 条记录，重复的数据有：【{count_value}】 条，没有必要为该字段创建索引。\n"
149 |                                 index_suggestions.append(suggestion_text)
150 |                             else:
151 |                                 suggestion_text = f"取出表 【{table_name}】 where条件字段 【{where_field}】 {sample_size} 条记录，重复的数据有：【{count_value}】 条，没有必要为该字段创建索引。\n"
152 |                                 index_suggestions.append(suggestion_text)
153 |                         else:
154 |                             add_index_fields.append(where_field)
155 | 
156 |                 if group_by_fields is not None and len(group_by_fields) != 0:
157 |                     for group_field in group_by_fields:
158 |                         Cardinality = count_column_value(table_name, group_field, mysql_settings, sample_size)
159 |                         if Cardinality:
160 |                             count_value = Cardinality[0]['count']
161 |                             suggestion_text = f"取出表 【{table_name}】 group by条件字段 【{group_field}】 {sample_size} 条记录，重复的数据有：【{count_value}】 条，没有必要为该字段创建索引。\n"
162 |                             index_suggestions.append(suggestion_text)
163 |                         else:
164 |                             add_index_fields.append(group_field)
165 | 
166 |                 if len(order_by_fields) != 0:
167 |                     for order_field in order_by_fields:
168 |                         Cardinality = count_column_value(table_name, order_field, mysql_settings, sample_size)
169 |                         if Cardinality:
170 |                             count_value = Cardinality[0]['count']
171 |                             suggestion_text = f"取出表 【{table_name}】 order by条件字段 【{order_field}】 {sample_size} 条记录，重复的数据有：【{count_value}】 条，没有必要为该字段创建索引。\n"
172 |                             index_suggestions.append(suggestion_text)
173 |                         else:
174 |                             add_index_fields.append(order_field)
175 | 
176 |                 add_index_fields = list(dict.fromkeys(add_index_fields).keys())
177 | 
178 |                 if len(add_index_fields) == 0:
179 |                     if not index_suggestions:
180 |                         index_suggestions.append(f"\n\u2192 \033[1;92m【{table_name}】 表，无需添加任何索引。\033[0m\n")
181 |                     else:
182 |                         index_suggestions.append(f"\n\u2192 \033[1;92m【{table_name}】 表，无需添加任何索引。\033[0m\n")
183 |                 elif len(add_index_fields) == 1:
184 |                     index_name = add_index_fields[0]
185 |                     index_columns = add_index_fields[0]
186 |                     try:
187 |                         conn = pymysql.connect(**mysql_settings)
188 |                         cur = conn.cursor()
189 |                         index_result = check_index_exist(mysql_settings, table_name=table_name, index_column=index_columns)
190 |                         if not index_result:
191 |                             suggestion_text = ""
192 |                             if row['key'] is None:
193 |                                 suggestion_text += f"\033[93m建议添加索引：ALTER TABLE {table_name} ADD INDEX idx_{index_name}({index_columns});\033[0m\n"
194 |                             elif row['key'] is not None and row['rows'] >= 1:
195 |                                 suggestion_text += f"\033[93m建议添加索引：ALTER TABLE {table_name} ADD INDEX idx_{index_name}({index_columns});\033[0m\n"
196 |                             suggestion_text += f"\n【{table_name}】表 【{index_columns}】字段，索引分析：\n"
197 |                             index_static = execute_index_query(mysql_settings, database=mysql_settings["database"],
198 |                                                                                         table_name=table_name, index_columns=index_columns)
199 |                             suggestion_text += index_static + "\n"
200 |                             index_suggestions.append(suggestion_text)
201 |                         else:
202 |                             suggestion_text = f"\n\u2192 \033[1;92m【{table_name}】表 【{index_columns}】字段，索引已经存在，无需添加任何索引。\033[0m\n"
203 |                             suggestion_text += f"\n【{table_name}】表 【{index_columns}】字段，索引分析：\n"
204 |                             index_static = execute_index_query(mysql_settings, database=mysql_settings["database"],
205 |                                                                                         table_name=table_name, index_columns=index_columns)
206 |                             suggestion_text += index_static + "\n"
207 |                             index_suggestions.append(suggestion_text)
208 |                     except Exception as e:
209 |                         print(f"单个索引分析出错: {e}")
210 |                         index_suggestions.append(f"单个索引分析出错: {e}")
211 |                     finally:
212 |                         if conn:
213 |                             conn.close()
214 | 
215 |                 else:
216 |                     merged_name = '_'.join(add_index_fields)
217 |                     merged_columns = ','.join(add_index_fields)
218 |                     try:
219 |                         conn = pymysql.connect(**mysql_settings)
220 |                         cur = conn.cursor()
221 |                         index_result_list = check_index_exist_multi(mysql_settings, database=mysql_settings["database"],
222 |                                                                                                 table_name=table_name, index_columns=merged_columns,
223 |                                                                                                 index_number=len(add_index_fields))
224 |                         if index_result_list is None:
225 |                             suggestion_text = ""
226 |                             if row['key'] is None:
227 |                                 suggestion_text += f"\033[93m建议添加索引：ALTER TABLE {table_name} ADD INDEX idx_{merged_name}({merged_columns});\033[0m\n"
228 |                             elif row['key'] is not None and row['rows'] >= 1:
229 |                                 suggestion_text += f"\033[93m建议添加索引：ALTER TABLE {table_name} ADD INDEX idx_{merged_name}({merged_columns});\033[0m\n"
230 |                             suggestion_text += f"\n【{table_name}】表 【{merged_columns}】字段，索引分析：\n"
231 |                             index_static = execute_index_query(mysql_settings, database=mysql_settings["database"],
232 |                                                                                         table_name=table_name, index_columns=merged_columns)
233 |                             suggestion_text += index_static + "\n"
234 |                             index_suggestions.append(suggestion_text)
235 |                         else:
236 |                             suggestion_text = f"\n\u2192 \033[1;92m【{table_name}】表 【{merged_columns}】字段，联合索引已经存在，无需添加任何索引。\033[0m\n"
237 |                             suggestion_text += f"\n【{table_name}】表 【{merged_columns}】字段，索引分析：\n"
238 |                             index_static = execute_index_query(mysql_settings, database=mysql_settings["database"],
239 |                                                                                         table_name=table_name, index_columns=merged_columns)
240 |                             suggestion_text += index_static + "\n"
241 |                             index_suggestions.append(suggestion_text)
242 |                     except Exception as e:
243 |                         print(f"联合索引分析出错: {e}")
244 |                         index_suggestions.append(f"联合索引分析出错: {e}")
245 |                     finally:
246 |                         if conn:
247 |                             conn.close()
248 | 
249 | 
250 |             if has_table_alias(table_aliases) is True or contains_dot is True:
251 |                 if has_table_alias(table_aliases) is True:
252 |                     try:
253 |                         table_real_name = table_aliases[table_name]
254 |                     except KeyError:
255 |                         if table_name.startswith('<union'):
256 |                             table_real_name = ""
257 |                 else:
258 |                     table_real_name = table_name
259 | 
260 |                 if table_real_name != "":
261 |                     if len(where_fields) != 0:
262 |                         where_matching_fields = []
263 |                         for field in where_fields:
264 |                             if field.startswith(table_real_name + '.'):
265 |                                 where_matching_fields.append(field.split('.')[1])
266 | 
267 |                         for where_field in where_matching_fields:
268 |                             talia_clause = table_name + '.' + where_field
269 |                             where_clause_value = parse_where_condition(formatted_sql, talia_clause)
270 |                             if where_clause_value is not None:
271 |                                 where_clause_value = where_clause_value.replace('\n', '').replace('\r', '')
272 |                                 where_clause_value = re.sub(r'\s+', ' ', where_clause_value)
273 |                                 prefix = where_clause_value.split('.')[0]
274 |                                 where_clause_value = where_clause_value.replace(prefix + '.', '')
275 |                                 if "." in where_clause_value:
276 |                                     Cardinality = count_column_value(table_real_name, where_field, mysql_settings, sample_size)
277 |                                 else:
278 |                                     Cardinality = count_column_clause_value(table_real_name, where_field, where_clause_value, mysql_settings, sample_size)
279 |                             else:
280 |                                 Cardinality = count_column_value(table_real_name, where_field, mysql_settings, sample_size)
281 |                             if Cardinality:
282 |                                 count_value = Cardinality[0]['count']
283 |                                 if where_clause_value is not None:
284 |                                     suggestion_text = f"取出表 【{table_real_name}】 where条件表达式 【{where_clause_value}】 {sample_size} 条记录，重复的数据有：【{count_value}】 条，没有必要为该字段创建索引。\n"
285 |                                     index_suggestions.append(suggestion_text)
286 |                                 else:
287 |                                     suggestion_text = f"取出表 【{table_real_name}】 where条件字段 【{where_field}】 {sample_size} 条记录，重复的数据有：【{count_value}】 条，没有必要为该字段创建索引。\n"
288 |                                     index_suggestions.append(suggestion_text)
289 |                             else:
290 |                                 add_index_fields.append(where_field)
291 | 
292 |                     if group_by_fields is not None and len(group_by_fields) != 0:
293 |                         group_matching_fields = []
294 |                         for field in group_by_fields:
295 |                             if field.startswith(table_real_name + '.'):
296 |                                 group_matching_fields.append(field.split('.')[1])
297 |                         for group_field in group_matching_fields:
298 |                             Cardinality = count_column_value(table_real_name, group_field, mysql_settings, sample_size)
299 |                             if Cardinality:
300 |                                 count_value = Cardinality[0]['count']
301 |                                 suggestion_text = f"取出表 【{table_real_name}】 group by条件字段 【{group_field}】 {sample_size} 条记录，重复的数据有：【{count_value}】 条，没有必要为该字段创建索引。\n"
302 |                                 index_suggestions.append(suggestion_text)
303 |                             else:
304 |                                 add_index_fields.append(group_field)
305 | 
306 |                     if len(order_by_fields) != 0:
307 |                         order_matching_fields = []
308 |                         for field in order_by_fields:
309 |                             if field.startswith(table_real_name + '.'):
310 |                                 order_matching_fields.append(field.split('.')[1])
311 |                         for order_field in order_matching_fields:
312 |                             Cardinality = count_column_value(table_real_name, order_field, mysql_settings, sample_size)
313 |                             if Cardinality:
314 |                                 count_value = Cardinality[0]['count']
315 |                                 suggestion_text = f"取出表 【{table_real_name}】 order by条件字段 【{order_field}】 {sample_size} 条记录，重复的数据有：【{count_value}】 条，没有必要为该字段创建索引。\n"
316 |                                 index_suggestions.append(suggestion_text)
317 |                             else:
318 |                                 add_index_fields.append(order_field)
319 | 
320 |                     add_index_fields = list(dict.fromkeys(add_index_fields).keys())
321 | 
322 |                     if len(add_index_fields) == 0:
323 |                         index_suggestions.append(f"\n\u2192 \033[1;92m【{table_real_name}】 表，无需添加任何索引。\033[0m\n")
324 |                     elif len(add_index_fields) == 1:
325 |                         index_name = add_index_fields[0]
326 |                         index_columns = add_index_fields[0]
327 |                         try:
328 |                             conn = pymysql.connect(**mysql_settings)
329 |                             cur = conn.cursor()
330 |                             index_result = check_index_exist(mysql_settings, table_name=table_real_name, index_column=index_columns)
331 |                             if not index_result:
332 |                                 suggestion_text = ""
333 |                                 if row['key'] is None:
334 |                                     suggestion_text += f"\033[93m建议添加索引：ALTER TABLE {table_real_name} ADD INDEX idx_{index_name}({index_columns});\033[0m\n"
335 |                                 elif row['key'] is not None and row['rows'] >= 1:
336 |                                     suggestion_text += f"\033[93m建议添加索引：ALTER TABLE {table_real_name} ADD INDEX idx_{index_name}({index_columns});\033[0m\n"
337 |                                 suggestion_text += f"\n【{table_real_name}】表 【{index_columns}】字段，索引分析：\n"
338 |                                 index_static = execute_index_query(mysql_settings, database=mysql_settings["database"],
339 |                                                                                             table_name=table_real_name, index_columns=index_columns)
340 |                                 suggestion_text += index_static + "\n"
341 |                                 index_suggestions.append(suggestion_text)
342 |                             else:
343 |                                 suggestion_text = f"\n\u2192 \033[1;92m【{table_real_name}】表 【{index_columns}】字段，索引已经存在，无需添加任何索引。\033[0m\n"
344 |                                 suggestion_text += f"\n【{table_real_name}】表 【{index_columns}】字段，索引分析：\n"
345 |                                 index_static = execute_index_query(mysql_settings, database=mysql_settings["database"],
346 |                                                                                             table_name=table_real_name, index_columns=index_columns)
347 |                                 suggestion_text += index_static + "\n"
348 |                                 index_suggestions.append(suggestion_text)
349 |                         except Exception as e:
350 |                             print(f"单个别名表索引分析出错: {e}")
351 |                             index_suggestions.append(f"单个别名表索引分析出错: {e}")
352 |                         finally:
353 |                             if conn:
354 |                                 conn.close()
355 |                     else:
356 |                         merged_name = '_'.join(add_index_fields)
357 |                         merged_columns = ','.join(add_index_fields)
358 |                         try:
359 |                             conn = pymysql.connect(**mysql_settings)
360 |                             cur = conn.cursor()
361 |                             index_result_list = check_index_exist_multi(mysql_settings, database=mysql_settings["database"],
362 |                                                                                                     table_name=table_real_name, index_columns=merged_columns,
363 |                                                                                                     index_number=len(add_index_fields))
364 |                             if index_result_list is None:
365 |                                 suggestion_text = ""
366 |                                 if row['key'] is None:
367 |                                     suggestion_text += f"\033[93m建议添加索引：ALTER TABLE {table_real_name} ADD INDEX idx_{merged_name}({merged_columns});\033[0m\n"
368 |                                 elif row['key'] is not None and row['rows'] >= 1:
369 |                                     suggestion_text += f"\033[93m建议添加索引：ALTER TABLE {table_real_name} ADD INDEX idx_{merged_name}({merged_columns});\033[0m\n"
370 |                                 suggestion_text += f"\n【{table_real_name}】表 【{merged_columns}】字段，索引分析：\n"
371 |                                 index_static = execute_index_query(mysql_settings, database=mysql_settings["database"],
372 |                                                                                             table_name=table_real_name, index_columns=merged_columns)
373 |                                 suggestion_text += index_static + "\n"
374 |                                 index_suggestions.append(suggestion_text)
375 |                             else:
376 |                                 suggestion_text = f"\n\u2192 \033[1;92m【{table_real_name}】表 【{merged_columns}】字段，联合索引已经存在，无需添加任何索引。\033[0m\n"
377 |                                 suggestion_text += f"\n【{table_real_name}】表 【{merged_columns}】字段，索引分析：\n"
378 |                                 index_static = execute_index_query(mysql_settings, database=mysql_settings["database"],
379 |                                                                                             table_name=table_real_name, index_columns=merged_columns)
380 |                                 suggestion_text += index_static + "\n"
381 |                                 index_suggestions.append(suggestion_text)
382 |                         except Exception as e:
383 |                             print(f"联合别名表索引分析出错: {e}")
384 |                             index_suggestions.append(f"联合别名表索引分析出错: {e}")
385 |                         finally:
386 |                             if conn:
387 |                                 conn.close()
388 | 
389 |     extra_suggestions = []
390 |     where_clause = parse_where_condition_full(formatted_sql)
391 |     if where_clause:
392 |         like_r, like_expression = check_percent_position(where_clause)
393 |         if like_r is True:
394 |             extra_suggestions.append(f"like模糊匹配，百分号在首位，【{like_expression}】是不能用到索引的，例如like '%张三%'，可以考虑改成like '张三%'，这样是可以用到索引的，如果业务上不能改，可以考虑用全文索引。\n")
395 | 
396 |         function_r = extract_function_index(where_clause)
397 |         if function_r is not False:
398 |             extra_suggestions.append(f"索引列使用了函数作计算：【{function_r}】，会导致索引失效。"
399 |                                      f"如果你是MySQL 8.0可以考虑创建函数索引；如果你是MySQL 5.7，你要更改你的SQL逻辑了。\n")
400 | 
401 |     try:
402 |         ai_suggestions = optimize_sql(formatted_sql)
403 |         if not isinstance(ai_suggestions, str):
404 |             ai_suggestions = str(ai_suggestions)
405 |         #ai_suggestions = ai_suggestions.replace("If you want to run the SQL query, connect to a database first. See here: https://vanna.ai/docs/databases.html", "").strip()
406 |         #ai_suggestions = ai_suggestions.replace("-------------------------------------------------------", "").strip()
407 |     except Exception as e:
408 |         ai_suggestions = f"调用 AI 优化出错: {e}"
409 |         print(f"调用 AI 优化出错: {e}")
410 | 
411 |     return {
412 |         "formatted_sql": formatted_sql,
413 |         "explain_table": e_table,
414 |         "index_suggestions": "".join(index_suggestions),
415 |         "extra_suggestions": "".join(extra_suggestions),
416 |         "ai_suggestions": ai_suggestions,
417 |         "no_suggestions": not index_suggestions and not extra_suggestions and not ai_suggestions
418 |     }
419 | 
420 | 
421 | @app.route("/", methods=["GET", "POST"])
422 | def index():
423 |     if request.method == "POST":
424 |         sql_query = request.form.get("sql_query")
425 |         host = request.form.get("host")
426 |         port = request.form.get("port", type=int, default=3306)
427 |         user = request.form.get("user")
428 |         password = request.form.get("password")
429 |         database = request.form.get("database")
430 | 
431 |         db_config = {}
432 |         config_source = ""
433 | 
434 |         if not all([host, user, password, database]):
435 |             return render_template("index.html", error="请提供完整的 MySQL 数据库连接信息 (请手动填写所有数据库参数).",
436 |                                    sql_query=sql_query,
437 |                                    host=host, port=port, user=user, database=database)
438 |         db_config = {
439 |             "host": host,
440 |             "port": port,
441 |             "user": user,
442 |             "passwd": password,
443 |             "database": database
444 |         }
445 |         config_source = "params"
446 | 
447 |         analysis_result = analyze_sql(sql_query, db_config)
448 | 
449 |         if "error" in analysis_result:
450 |             return render_template("index.html", error=analysis_result["error"],
451 |                                    sql_query=sql_query,
452 |                                    host=host, port=port, user=user, database=database,
453 |                                    config_source=config_source)
454 | 
455 |         return render_template("index.html",
456 |                                formatted_sql=analysis_result["formatted_sql"],
457 |                                explain_table=analysis_result["explain_table"],
458 |                                index_suggestions=analysis_result["index_suggestions"],
459 |                                extra_suggestions=analysis_result["extra_suggestions"],
460 |                                ai_suggestions=analysis_result["ai_suggestions"],
461 |                                no_suggestions=analysis_result["no_suggestions"],
462 |                                sql_query=sql_query,
463 |                                host=host, port=port, user=user, database=database,
464 |                                config_source=config_source
465 |                                )
466 | 
467 |     return render_template("index.html")
468 | 
469 | if __name__ == "__main__":
470 |     app.run(host='0.0.0.0', debug=True)
471 | 


--------------------------------------------------------------------------------
/Flask_src/requirements.txt:
--------------------------------------------------------------------------------
1 | Flask
2 | PyMySQL
3 | sql-metadata
4 | tabulate
5 | rich
6 | PyYAML
7 | vanna==0.0.36
8 | sqlparse
9 | 


--------------------------------------------------------------------------------
/Flask_src/sql_alias.py:
--------------------------------------------------------------------------------
 1 | def has_table_alias(table_alias):
 2 |     if isinstance(table_alias, dict):
 3 |         table_alias = {k.lower(): v.lower() for k, v in table_alias.items()}
 4 |         if 'join' in table_alias or 'on' in table_alias or 'where' in table_alias or 'group by' in table_alias or 'order by' in table_alias or 'limit' in table_alias or not table_alias:
 5 |            return False # 没有别名
 6 |         else:
 7 |            return True #有别名
 8 |     elif isinstance(table_alias, list):
 9 |         return False # 没有别名
10 |     else:
11 |         pass
12 | 


--------------------------------------------------------------------------------
/Flask_src/sql_count_value.py:
--------------------------------------------------------------------------------
  1 | import pymysql
  2 | from rich.progress import Progress, TimeElapsedColumn, TextColumn, BarColumn
  3 | 
  4 | def count_column_value(table_name, field_name, mysql_settings, sample_size):
  5 |     with pymysql.connect(**mysql_settings) as conn:
  6 |         with conn.cursor() as cursor:
  7 |             """
  8 |             在这个查询中，使用了CASE语句来判断数据行数是否小于100000。如果数据行数小于100000，则使用
  9 |             (SELECT COUNT(*) FROM {table_name}) / 2
 10 |             作为阈值，即表的实际大小除以2；否则使用 {sample_size} / 2 作为阈值。
 11 |             """
 12 | 
 13 |             # 如果你的数据库是MySQL 8.0，那么推荐用 CTE（公共表达式）的形式
 14 |             '''
 15 |             sql = f"""
 16 |             WITH subquery AS (
 17 |                 SELECT {field_name}
 18 |                 FROM {table_name}
 19 |                 LIMIT {sample_size}
 20 |             )
 21 |             SELECT COUNT(*) as count
 22 |             FROM subquery
 23 |             GROUP BY {field_name}
 24 |             HAVING COUNT(*) >= 
 25 |                 CASE 
 26 |                     WHEN (SELECT COUNT(*) FROM subquery) < {sample_size} THEN (SELECT COUNT(*) FROM {table_name}) / 2 
 27 |                     ELSE {sample_size} / 2 
 28 |                 END;
 29 |             """
 30 |             '''
 31 | 
 32 |             # 默认采用子查询兼容MySQL 5.7版本
 33 |             sql = f"""
 34 |             SELECT COUNT(*) as count
 35 |             FROM (
 36 |                 SELECT {field_name}
 37 |                 FROM {table_name}
 38 |                 LIMIT {sample_size}
 39 |             ) AS subquery
 40 |             GROUP BY {field_name}
 41 |             HAVING COUNT(*) >= CASE WHEN (SELECT COUNT(*) FROM {table_name} LIMIT {sample_size}) < {sample_size} 
 42 |             THEN (SELECT COUNT(*) FROM {table_name}) / 2 ELSE {sample_size} / 2 END;
 43 |             """
 44 | 
 45 |             # print(sql)
 46 |             with Progress(TextColumn("[progress.description]{task.description}", justify="right"), BarColumn(), TextColumn("[progress.percentage]{task.percentage:>3.0f}%"), TimeElapsedColumn()) as progress:
 47 |                 task = progress.add_task(f"[cyan]Executing SQL query [bold magenta]{table_name}[/bold magenta] [cyan]where_clause [bold magenta]{field_name}[/bold magenta]...", total=1)
 48 |                 cursor.execute(sql)
 49 |                 progress.update(task, completed=1)
 50 | 
 51 |             results = cursor.fetchall()
 52 | 
 53 |     if results:
 54 |         # 如果有超过半数的重复数据
 55 |         return results
 56 |     else:
 57 |         return False
 58 | 
 59 | 
 60 | def count_column_clause_value(table_name, field_name, where_clause_value, mysql_settings, sample_size):
 61 |     with pymysql.connect(**mysql_settings) as conn:
 62 |         with conn.cursor() as cursor:
 63 |             
 64 |             """
 65 |             在这个查询中，使用了CASE语句来判断数据行数是否小于100000。如果数据行数小于100000，则使用
 66 |             (SELECT COUNT(*) FROM {table_name}) / 2
 67 |             作为阈值，即表的实际大小除以2；否则使用 {sample_size} / 2 作为阈值。
 68 |             """
 69 | 
 70 |             # 如果你的数据库是MySQL 8.0，那么推荐用 CTE（公共表达式）的形式
 71 |             '''
 72 |             sql = f"""
 73 |             WITH subquery AS (
 74 |                 SELECT {field_name}
 75 |                 FROM {table_name}
 76 |                 LIMIT {sample_size}
 77 |             )
 78 |             SELECT COUNT(*) as count
 79 |             FROM subquery
 80 |             GROUP BY {field_name}
 81 |             HAVING COUNT(*) >= 
 82 |                 CASE 
 83 |                     WHEN (SELECT COUNT(*) FROM subquery) < {sample_size} THEN (SELECT COUNT(*) FROM {table_name}) / 2 
 84 |                     ELSE {sample_size} / 2 
 85 |                 END;
 86 |             """
 87 |             '''
 88 | 
 89 |             # 默认采用子查询兼容MySQL 5.7版本
 90 |             sql = f"""
 91 |             SELECT COUNT(*) as count
 92 |             FROM (
 93 |                 SELECT {field_name}
 94 |                 FROM {table_name}
 95 |                 WHERE {where_clause_value}
 96 |                 LIMIT {sample_size}
 97 |             ) AS subquery
 98 |             GROUP BY {field_name}
 99 |             HAVING COUNT(*) >= CASE WHEN (SELECT COUNT(*) FROM {table_name} LIMIT {sample_size}) < {sample_size} 
100 |             THEN (SELECT COUNT(*) FROM {table_name}) / 2 ELSE {sample_size} / 2 END;
101 |             """
102 | 
103 |             #print(sql)
104 |             with Progress(TextColumn("[progress.description]{task.description}", justify="right"), BarColumn(), TextColumn("[progress.percentage]{task.percentage:>3.0f}%"), TimeElapsedColumn()) as progress:
105 |                 task = progress.add_task(f"[cyan]Executing SQL query [bold magenta]{table_name}[/bold magenta] [cyan]where_clause [bold magenta]{where_clause_value}[/bold magenta]...", total=1)
106 |                 cursor.execute(sql)
107 |                 progress.update(task, completed=1)
108 | 
109 |             results = cursor.fetchall()
110 | 
111 |     if results:
112 |         # 如果有超过半数的重复数据
113 |         return results
114 |     else:
115 |         return False
116 | 


--------------------------------------------------------------------------------
/Flask_src/sql_extra.py:
--------------------------------------------------------------------------------
 1 | import re
 2 | 
 3 | def check_percent_position(string):
 4 |     string = string.lower()
 5 |     pattern = r".*like\s+'%|.*like\s+concat\(+'%|.*regexp\s+"
 6 |     matches = re.findall(pattern, string)
 7 |     if matches:
 8 |         like_pattern = r"like\s+(?:concat\(.*?\)|'%%'|\'.*?%(?:.*?)?\')"
 9 |         like_match = re.search(like_pattern, string)
10 |         if like_match:
11 |             return True, like_match.group()
12 |         #return True
13 |     return False, None
14 | 
15 | 
16 | def extract_function_index(string):
17 |     #pattern = r'\b(\w+)\('
18 |     #pattern = r'\b(\w+)\(.*\).*[>=<!=<>]'
19 |     pattern = r'\b(\w+(\(.*\).*[>=<!=<>]))'
20 |     matches = re.findall(pattern, string)
21 |     #function_indexes = set(matches)
22 |     function_indexes = [match[0] for match in matches]
23 |     if function_indexes:
24 |         return ', '.join(function_indexes)
25 |     else:
26 |         return False
27 | 


--------------------------------------------------------------------------------
/Flask_src/sql_format_class.py:
--------------------------------------------------------------------------------
 1 | import sqlparse
 2 | 
 3 | class SQLFormatter:
 4 |     def format_sql(self, sql_query):
 5 |         """
 6 |         格式化 SQL 查询语句
 7 |         """
 8 |         formatted_sql = sqlparse.format(sql_query, reindent=True, keyword_case='upper')
 9 | 
10 |         return formatted_sql
11 | 
12 | 
13 | 


--------------------------------------------------------------------------------
/Flask_src/sql_index.py:
--------------------------------------------------------------------------------
  1 | import textwrap
  2 | from tabulate import tabulate
  3 | import pymysql
  4 | 
  5 | def execute_index_query(mysql_settings, database, table_name, index_columns):
  6 |     index_columns = index_columns
  7 |     index_columns = index_columns.split(',')
  8 |     updated_columns = [f"'{column.strip()}'" for column in index_columns]
  9 |     final_columns = ', '.join(updated_columns)
 10 |     sql = f"SELECT TABLE_NAME,INDEX_NAME,COLUMN_NAME,CARDINALITY FROM information_schema.STATISTICS WHERE TABLE_SCHEMA = '{database}' AND TABLE_NAME = '{table_name}' AND COLUMN_NAME IN ({final_columns})"
 11 |     #print(sql)
 12 |     try:
 13 |         conn = pymysql.connect(**mysql_settings)
 14 |         cur = conn.cursor()
 15 |         cur.execute(sql)
 16 |         index_result = cur.fetchall()
 17 | 
 18 |         if not index_result:
 19 |             print(f"没有检测到 {table_name} 表 字段 {final_columns} 有索引。")
 20 | 
 21 |         # 提取列名
 22 |         e_column_names = [desc[0] for desc in cur.description]
 23 | 
 24 |         # 提取结果值并进行自动换行处理
 25 |         e_result_values = []
 26 |         for row in index_result:
 27 |             values = list(row.values())
 28 |             wrapped_values = [textwrap.fill(str(value), width=30) for value in values]
 29 |             e_result_values.append(wrapped_values)
 30 | 
 31 |         # 将结果格式化为表格（包含竖线）
 32 |         e_table = tabulate(e_result_values, headers=e_column_names, tablefmt="grid", numalign="left")
 33 | 
 34 |         return e_table
 35 | 
 36 |     except pymysql.err.ProgrammingError as e:
 37 |         print("MySQL 内部错误：",e)
 38 |         return None
 39 |     except Exception as e:
 40 |         print("MySQL 内部错误：",e)
 41 |         return None
 42 |     finally:
 43 |         if cur:
 44 |             cur.close()
 45 |         if conn:
 46 |             conn.close()
 47 | 
 48 | #########################################################
 49 | 
 50 | def check_index_exist(mysql_settings, table_name, index_column):
 51 |     show_index_sql = f"show index from {table_name} where Column_name = '{index_column}'"
 52 |     try:
 53 |         conn = pymysql.connect(**mysql_settings)
 54 |         cur = conn.cursor()
 55 |         cur.execute(show_index_sql)
 56 |         index_result = cur.fetchall()
 57 | 
 58 |         #if not index_result:
 59 |             #print(f"没有检测到 {table_name} 表 字段 {final_columns} 有索引。")
 60 | 
 61 |         return index_result
 62 | 
 63 |     except pymysql.err.ProgrammingError as e:
 64 |         print("MySQL 内部错误：",e)
 65 |         return None
 66 |     except Exception as e:
 67 |         print("MySQL 内部错误：",e)
 68 |         return None
 69 |     finally:
 70 |         if cur:
 71 |             cur.close()
 72 |         if conn:
 73 |             conn.close()
 74 | 
 75 | #########################################################
 76 | 
 77 | def check_index_exist_multi(mysql_settings, database, table_name, index_columns, index_number):
 78 |     index_columns = index_columns
 79 |     index_columns = index_columns.split(',')
 80 |     updated_columns = [f"'{column.strip()}'" for column in index_columns]
 81 |     final_columns = ', '.join(updated_columns)
 82 |     sql = f"SELECT TABLE_NAME,INDEX_NAME,COLUMN_NAME,CARDINALITY FROM information_schema.STATISTICS WHERE TABLE_SCHEMA = '{database}' AND TABLE_NAME = '{table_name}' AND COLUMN_NAME IN ({final_columns}) GROUP BY INDEX_NAME HAVING COUNT(INDEX_NAME) = {index_number}"
 83 |     #print(sql)
 84 |     try:
 85 |         conn = pymysql.connect(**mysql_settings)
 86 |         cur = conn.cursor()
 87 |         cur.execute(sql)
 88 |         index_result = cur.fetchall()
 89 | 
 90 |         if not index_result:
 91 |             return None
 92 | 
 93 |         return index_result
 94 | 
 95 |     except pymysql.err.ProgrammingError as e:
 96 |         print("MySQL 内部错误：",e)
 97 |         return None
 98 |     except Exception as e:
 99 |         print("MySQL 内部错误：",e)
100 |         return None
101 |     finally:
102 |         if cur:
103 |             cur.close()
104 |         if conn:
105 |             conn.close()
106 | 
107 | 


--------------------------------------------------------------------------------
/Flask_src/sqlai.py:
--------------------------------------------------------------------------------
 1 | from vanna.remote import VannaDefault
 2 | 
 3 | def optimize_sql(original_sql):
 4 |     """
 5 |     调用 vanna.ai LLM 接口优化 SQL 查询语句，并返回优化后的 SQL 字符串。
 6 | 
 7 |     Args:
 8 |         original_sql (str): 原始的 SQL 查询语句。
 9 | 
10 |     Returns:
11 |         str: 优化后的 SQL 查询语句。 如果调用过程中发生错误，则返回包含错误信息的字符串。
12 |     """
13 |     try:
14 |         # 创建 VannaDefault 实例
15 |         vn = VannaDefault(model='sql_helper', api_key='xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx')
16 | 
17 |         # 输出调用信息 (可选，仅用于调试)
18 |         print('\033[94m以下是调用的vanna.ai LLM接口.\033[0m')
19 |         print('优化前的SQL是：')
20 |         print(original_sql)
21 |         print('-' * 55)
22 | 
23 |         # 获取优化后的 SQL
24 |         #vn.ask('How to optimize this SQL : {}'.format(original_sql))
25 |         optimized_sql = vn.generate_sql('How to optimize this SQL : {}'.format(original_sql))
26 |  
27 |         # 输出优化后的 SQL (可选，仅用于调试)
28 |         print('\033[92m优化后的SQL是：\033[0m')
29 | 
30 |         return optimized_sql
31 | 
32 |     except Exception as e:
33 |         # 捕获异常，返回包含错误信息的字符串
34 |         error_message = f"调用 vanna.ai 优化出错: {e}"
35 |         print(error_message)  # 打印错误信息到控制台 (可选)
36 |         return error_message
37 | 


--------------------------------------------------------------------------------
/Flask_src/templates/index.html:
--------------------------------------------------------------------------------
  1 | <!DOCTYPE html>
  2 | <html lang="zh">
  3 | <head>
  4 |     <meta charset="UTF-8">
  5 |     <title>SQLAI Helper 工具</title>
  6 |     <link href="https://cdnjs.cloudflare.com/ajax/libs/prism/1.29.0/themes/prism.min.css" rel="stylesheet" />
  7 |     <style>
  8 |         body {
  9 |             font-family: 'Arial', sans-serif;
 10 |             background: #f1f1f1;
 11 |             margin: 0;
 12 |             padding: 0;
 13 |             display: flex;
 14 |             justify-content: center;
 15 |             align-items: flex-start;
 16 |             height: 100vh;
 17 |             color: #333;
 18 |             overflow-y: auto;
 19 |         }
 20 |         .container {
 21 |             background: #fff;
 22 |             border-radius: 8px;
 23 |             box-shadow: 0 4px 10px rgba(0, 0, 0, 0.1);
 24 |             width: 80%;
 25 |             max-width: 900px;
 26 |             padding: 30px;
 27 |             margin-top: -40px;
 28 |         }
 29 |         h1 {
 30 |             text-align: center;
 31 |             color: #007bff;
 32 |             margin-bottom: 20px;
 33 |         }
 34 |         .config-box {
 35 |             background: #fafafa;
 36 |             padding: 20px;
 37 |             border-radius: 8px;
 38 |             margin-bottom: 30px;
 39 |             box-shadow: 0 2px 5px rgba(0, 0, 0, 0.1);
 40 |         }
 41 |         .config-box label {
 42 |             font-weight: bold;
 43 |             margin-top: 5px;
 44 |             display: block;
 45 |         }
 46 |         .inline-inputs {
 47 |             display: flex;
 48 |             flex-wrap: wrap;
 49 |             gap: 10px;
 50 |         }
 51 |         .inline-inputs input {
 52 |             flex: 1;
 53 |             min-width: 200px;
 54 |         }
 55 |         .config-box textarea {
 56 |             width: 100%;
 57 |             height: 150px;
 58 |             padding: 12px;
 59 |             margin: 8px 0;
 60 |             border-radius: 5px;
 61 |             border: 1px solid #ddd;
 62 |             font-size: 16px;
 63 |             resize: vertical;
 64 |         }
 65 |         .button-container {
 66 |             text-align: center;
 67 |             margin-top: 20px;
 68 |         }
 69 |         button {
 70 |             padding: 12px 24px;
 71 |             background-color: #007bff;
 72 |             border: none;
 73 |             color: white;
 74 |             font-size: 16px;
 75 |             border-radius: 5px;
 76 |             cursor: pointer;
 77 |             transition: background-color 0.3s;
 78 |         }
 79 |         button:hover {
 80 |             background-color: #0056b3;
 81 |         }
 82 |         .output-box {
 83 |             background: #f9f9f9;
 84 |             border-radius: 8px;
 85 |             padding: 15px;
 86 |             margin-top: 10px;
 87 |             box-shadow: 0 2px 5px rgba(0, 0, 0, 0.1);
 88 |             overflow-x: auto;
 89 |         }
 90 |         .section-title {
 91 |             font-weight: bold;
 92 |             font-size: 18px;
 93 |             color: #333;
 94 |             margin-bottom: 10px;
 95 |         }
 96 |         .suggestion, .ai-suggestion, .error {
 97 |             white-space: pre-wrap;
 98 |             word-wrap: break-word;
 99 |         }
100 |         .error {
101 |             color: red;
102 |         }
103 |         .ai-suggestion {
104 |             color: #007bff;
105 |         }
106 |         .success-suggestion {
107 |             background: #d4edda;
108 |             color: #155724;
109 |             padding: 10px;
110 |             border-radius: 5px;
111 |         }
112 |         pre.language-sql {
113 |             background: #f9f9f9;
114 |             padding: 15px;
115 |             border-radius: 8px;
116 |             overflow-x: auto;
117 |         }
118 |         pre.language-sql code {
119 |             display: block;
120 |             white-space: pre-wrap;
121 |             word-wrap: break-word;
122 |             line-height: 1.2;
123 |         }
124 |         table {
125 |             width: 100%;
126 |             border-collapse: collapse;
127 |             margin: 10px 0;
128 |         }
129 |         th, td {
130 |             border: 1px solid #ddd;
131 |             padding: 8px;
132 |             text-align: left;
133 |         }
134 |         th {
135 |             background-color: #f2f2f2;
136 |             font-weight: bold;
137 |         }
138 |         .ai-sql-table td {
139 |             white-space: pre-wrap;
140 |             word-wrap: break-word;
141 |             font-family: monospace;
142 |             color: #0056b3;
143 |         }
144 |     </style>
145 | </head>
146 | <body>
147 |     <div class="container">
148 |         <h1>SQLAI Helper 工具</h1>
149 | 
150 |         <div class="config-box">
151 |             <form method="post">
152 |                 <label for="sql_query">输入 SQL 自动给出索引优化建议 + SQL重写建议:</label>
153 |                 <textarea id="sql_query" name="sql_query" placeholder="请输入 SQL 查询语句" required>{{ sql_query or '' }}</textarea>
154 | 
155 |                 <p><strong>数据库连接配置</strong> (手动填写数据库参数)</p>
156 | 
157 |                 <div class="inline-inputs">
158 |                     <div>
159 |                         <label for="host">MySQL 主机名:</label>
160 |                         <input type="text" id="host" name="host" placeholder="MySQL 主机名" value="{{ host or '' }}" required>
161 |                     </div>
162 |                     <div>
163 |                         <label for="port">MySQL 端口:</label>
164 |                         <input type="number" id="port" name="port" placeholder="MySQL 端口 (默认: 3306)" value="{{ port or 3306 }}" required>
165 |                     </div>
166 |                     <div>
167 |                         <label for="user">MySQL 用户名:</label>
168 |                         <input type="text" id="user" name="user" placeholder="MySQL 用户名" value="{{ user or '' }}" required>
169 |                     </div>
170 |                     <div>
171 |                         <label for="password">MySQL 密码:</label>
172 |                         <input type="password" id="password" name="password" placeholder="MySQL 密码" value="{{ password or '' }}" required>
173 |                     </div>
174 |                     <div>
175 |                         <label for="database">数据库名:</label>
176 |                         <input type="text" id="database" name="database" placeholder="数据库名" value="{{ database or '' }}" required>
177 |                     </div>
178 |                 </div>
179 | 
180 |                 <div class="button-container">
181 |                     <br>
182 |                     <button type="submit" id="submitButton" onclick="showLoading()">分析 SQL</button>
183 |                 </div>
184 |             </form>
185 |         </div>
186 | 
187 |         <!-- 加载动画 -->
188 |         <div id="loading" style="display: none; text-align: center; margin-top: 20px;">
189 |             <img src="https://cdnjs.cloudflare.com/ajax/libs/loading.io/1.0.10/assets/img/loading.svg" alt="加载中..." width="50" height="50">
190 |             <p>正在分析，请稍候...</p>
191 |         </div>
192 | 
193 |         {% if error %}
194 |         <div class="output-box error" id="output-start">
195 |             <p class="section-title">错误信息:</p>
196 |             <pre class="language-sql"><code>{{ error|default('<span class="red-text">未检测到错误</span>')|safe }}</code></pre>
197 |         </div>
198 |         {% endif %}
199 | 
200 |         {% if formatted_sql %}
201 |         <div class="output-box" id="output-start">
202 |             <p class="section-title">1) 你刚才输入的 SQL 语句是:</p>
203 |             <pre class="language-sql"><code>{{ formatted_sql|default('<span class="red-text">无输入 SQL</span>')|safe }}</code></pre>
204 |         </div>
205 |         {% endif %}
206 | 
207 |         {% if explain_table %}
208 |         <div class="output-box">
209 |             <p class="section-title">2) EXPLAIN 执行计划:</p>
210 |             {{ explain_table|safe }}
211 |         </div>
212 |         {% endif %}
213 | 
214 |         {% if index_suggestions %}
215 |         <div class="output-box">
216 |             <p class="section-title">3) 索引优化建议:</p>
217 |             <pre class="language-sql"><code>{{ index_suggestions|default('<span class="red-text">无索引优化建议</span>')|safe }}</code></pre>
218 |         </div>
219 |         {% endif %}
220 | 
221 |         {% if extra_suggestions %}
222 |         <div class="output-box">
223 |             <p class="section-title">4) 额外的建议:</p>
224 |             <pre class="language-sql"><code>{{ extra_suggestions|default('<span class="red-text">无额外建议</span>')|safe }}</code></pre>
225 |         </div>
226 |         {% endif %}
227 | 
228 |         {% if ai_suggestions %}
229 |         <div class="output-box">
230 |             <p class="section-title">5) DeepSeek 的建议:</p>
231 |             <table class="ai-sql-table">
232 |                 <thead>
233 |                     <tr>
234 |                         <th>优化后的 SQL 语句</th>
235 |                     </tr>
236 |                 </thead>
237 |                 <tbody>
238 |                     <tr>
239 |                         <td>{{ ai_suggestions|default('暂无 AI 建议')|safe }}</td>
240 |                     </tr>
241 |                 </tbody>
242 |             </table>
243 |         </div>
244 |         {% endif %}
245 | 
246 |         {% if no_suggestions and formatted_sql and explain_table %}
247 |         <div class="output-box success-suggestion">
248 |             <p class="section-title">分析结果:</p>
249 |             <pre>SQL 语句分析完成，当前 SQL 语句执行计划良好，没有额外的优化建议！</pre>
250 |         </div>
251 |         {% endif %}
252 |     </div>
253 | 
254 |     <script src="https://cdnjs.cloudflare.com/ajax/libs/prism/1.29.0/prism.min.js"></script>
255 |     <script src="https://cdnjs.cloudflare.com/ajax/libs/prism/1.29.0/components/prism-sql.min.js"></script>
256 |     <script>
257 |         // 显示加载动画并禁用按钮
258 |         function showLoading() {
259 |             document.getElementById('loading').style.display = 'block';
260 |             document.getElementById('submitButton').disabled = true;
261 |         }
262 | 
263 |         // 页面加载完成后初始化状态
264 |         window.onload = function() {
265 |             Prism.highlightAll();
266 |             document.getElementById('loading').style.display = 'none';
267 |             document.getElementById('submitButton').disabled = false;
268 | 
269 |             // 如果有输出结果，滚动到 #output-start
270 |             var outputStart = document.getElementById('output-start');
271 |             if (outputStart) {
272 |                 outputStart.scrollIntoView({ behavior: 'smooth' });
273 |             }
274 |         };
275 | 
276 |         // 根据后端传递的 is_processing 控制加载动画
277 |         {% if is_processing %}
278 |             document.getElementById('loading').style.display = 'block';
279 |             document.getElementById('submitButton').disabled = true;
280 |         {% else %}
281 |             document.getElementById('loading').style.display = 'none';
282 |             document.getElementById('submitButton').disabled = false;
283 | 
284 |             // 处理完成后滚动到输出结果
285 |             var outputStart = document.getElementById('output-start');
286 |             if (outputStart) {
287 |                 outputStart.scrollIntoView({ behavior: 'smooth' });
288 |             }
289 |         {% endif %}
290 |     </script>
291 | </body>
292 | </html>
293 | 


--------------------------------------------------------------------------------
/Flask_src/where_clause.py:
--------------------------------------------------------------------------------
 1 | import sqlparse
 2 | 
 3 | def parse_where_condition(sql, column):
 4 |     parsed = sqlparse.parse(sql)
 5 |     stmt = parsed[0]
 6 | 
 7 |     where_column = ""
 8 |     where_expression = ""
 9 |     where_value = ""
10 |     where_clause = ""
11 |     found = False
12 |     result = ""
13 | 
14 |     for token in stmt.tokens:
15 |         if isinstance(token, sqlparse.sql.Where):
16 |             where_clause = token.value
17 |             conditions = []
18 |             for cond_tok in token.tokens:
19 |                 if isinstance(cond_tok, sqlparse.sql.Comparison):
20 |                     left_token = cond_tok.left.value.strip()
21 |                     if column == left_token:
22 |                         #return f"比较运算符: {cond_tok.value.strip()}"
23 |                         return cond_tok.value.strip()
24 | 
25 |                 if isinstance(cond_tok, sqlparse.sql.Identifier) and cond_tok.value == column:
26 |                     found = True
27 | 
28 |                 if found:
29 |                     if isinstance(cond_tok, sqlparse.sql.Token) and cond_tok.value.upper() in ["OR","AND"]:
30 |                         break
31 |                     else:
32 |                         result += cond_tok.value
33 | 
34 |                 if isinstance(cond_tok, sqlparse.sql.Parenthesis) and found:
35 |                     break
36 | 
37 |     if len(result) != 0:
38 |         #return f"逻辑运算符: {result.strip()}"
39 |         return result.strip()
40 |     else:
41 |         #return "没有找到该字段的条件表达式"
42 |         return None
43 | 
44 | 
45 | def parse_where_condition_full(sql):
46 |     parsed = sqlparse.parse(sql)
47 |     stmt = parsed[0]
48 | 
49 |     where_clause = ""
50 |     found = False
51 | 
52 |     for token in stmt.tokens:
53 |         if found:
54 |             where_clause += token.value
55 |         if isinstance(token, sqlparse.sql.Where):
56 |             found = True
57 |             where_clause += token.value
58 | 
59 |     return where_clause.strip() if where_clause else None
60 | 
61 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # sql_helper - 输入SQL自动给出索引优化建议+SQL重写建议
  2 | 
  3 | #### 2025年2月26日更新：sqlai_helper 接入DeepSeek
  4 | ```
  5 | 注：openai库需要调用ssl，由于python3.10之后版本不在支持libressl使用ssl，需要用openssl1.1.1版本或者更高版本
  6 | 
  7 | 参见：python3.10编译安装报SSL失败解决方法
  8 | https://blog.csdn.net/mdh17322249/article/details/123966953
  9 | ```
 10 | ```
 11 | 第一步、先去 https://platform.deepseek.com/api_keys 申请密钥并充值1元，替换sql_deepseek.py文件里的密钥。
 12 | 第二步，运行：
 13 | shell> cd sql_helper_1.1/deepseek_flash_src/
 14 | shell> pip3 install -r requirements.txt -i "http://mirrors.aliyun.com/pypi/simple" --trusted-host "mirrors.aliyun.com"
 15 | shell> python3 app.py
 16 |  * Running on http://192.168.137.131:5000
 17 | ```
 18 | 
 19 | #### 2025年2月20日更新：sqlai_helper 增加 Flask，无需部署PHP，可直接访问。
 20 | ```
 21 | shell> pip3 install -r requirements.txt -i "http://mirrors.aliyun.com/pypi/simple" --trusted-host "mirrors.aliyun.com"
 22 | shell> python3 app.py
 23 |  * Running on http://192.168.137.131:5000
 24 | ```
 25 | 
 26 | ![image](https://github.com/user-attachments/assets/f66d977b-8948-4990-be58-d9647a0f6906)
 27 | 
 28 | #### sqlai_helper工具版本号: 2.1.3，更新日期：2024-10-10 <-> 支持SQL改写，合并LLM模型接口
 29 | 链接： https://github.com/hcymysql/sql_helper/releases/tag/sqlai_helper_v2.1.3
 30 | ```
 31 | #### ★贡献：ThinkSQL 类似 ThinkPHP 的数据库引擎，集成sql_helper
 32 | #### ※ThinkSQL地址： https://pypi.org/project/think-sql/
 33 | 
 34 | #### 2023-09-05日更新：1.1.1版本-新增where条件表达式值判断，索引推荐更加精准。
 35 | #### 2023-09-06日更新：1.1.2版本-新增额外的建议：like模糊匹配检查、索引列使用了函数作计算
 36 | ```
 37 | 
 38 | 索引在数据库中非常重要，它可以加快查询速度并提高数据库性能。对于经常被用作查询条件的字段，添加索引可以显著改善查询效率。然而，索引的创建和维护需要考虑多个因素，包括数据量、查询频率、更新频率等。
 39 | 
 40 | sql_helper 工具是一个开源项目，其主要功能是自动判断条件字段是否需要增加索引，适用于MySQL5.7/8.0和MariaDB数据库，并且旨在帮助开发人员优化数据库查询性能。通过分析SQL语句，该工具可以检测出哪些条件字段可以考虑添加索引来提高查询效率。
 41 | 
 42 | ### 工作流程
 43 | 
 44 | 第一步、通过SQL语法解析器，提炼出表名，别名，关联字段名，条件字段名，排序字段名，分组字段名。
 45 | 
 46 | 第二步、检查是否有where条件，如没有则给出提示。
 47 | 
 48 | 第三步、检测到a join b on a.id = b.id（关联查询时），通过查询表结构，检查关联字段是否有索引，如没有给出创建索引提示。
 49 | 
 50 | 第四步、通过调用Explain执行计划，如果type值是ALL，或者rows大于1000，检查该表（如有别名，找到其对应的原始表名）和where条件字段的数据分布，工具默认会采样10万条数据作为样本，检查Cardinality基数，例如sex性别字段，有男女两个值，如果占比超过半数（50%），则不建议对该字段创建索引。
 51 | 
 52 | 第五步、检查group by和order by字段（同样的算法），之后与where条件字段合并，组合成联合索引。
 53 | 
 54 | 第六步、检查这些字段之前是否创建过索引，如果没有给与提示创建，如果之前就有索引，不提示。
 55 | 
 56 |     需要注意的是：sql_helper工具假定您的sql语句条件表达式都为and的前提下，提示创建联合索引。
 57 |     
 58 |     如果是or，sql解析器解析起来会有些困难(sql灵活多变，且不固定，无法用通用的算法组合字段)。
 59 |   
 60 |     例如where c1 = 1 or c2 = 2
 61 |   
 62 |     工具会提示(c1,c2)创建一个联合索引，但实际上应该单独对c1和c2创建一个独立索引。
 63 |   
 64 |     即select ... from t where c1 = 1
 65 |     union all
 66 |     select ... from t where c2 = 2
 67 | 
 68 | ![image](https://github.com/hcymysql/sql_helper/assets/19261879/ca3d23a7-f2d3-4a14-80af-3688e2bb061e)
 69 | 
 70 | 演示：https://www.douyin.com/video/7277857326072122676
 71 | 
 72 | ### 命令行方式使用 | [web端接口使用](https://github.com/hcymysql/sql_helper/blob/main/web/sql_helper/README.md)
 73 | ```
 74 | shell> chmod 755 sql_helper
 75 | shell> ./sql_helper -f test.yaml -q "select * from sbtest1 limit 1;"
 76 | 或者
 77 | shell> sql_helper -f test.yaml -q "select（SQL太长可以直接回车分割）
 78 | >  * from sbtest1 limit 10"
 79 | ```
 80 | 
 81 | 注：test.yaml为MySQL配置文件，如果SQL里包含反引号，请直接去掉反引号。
 82 | 
 83 | --sample参数：默认采样10万条数据（你可以在从库上获取样本数据），根据你的实际情况，适当增加采样数据，比如100-1000万行，这样工具会更精准的判断是否添加索引。
 84 | 
 85 | 仅支持SELECT查询（主要针对慢日志里的SQL）
 86 | 
 87 | ### Docker方式使用
 88 | ```
 89 | shell> vim Dockerfile
 90 | FROM centos:7
 91 | 
 92 | COPY sqlai_helper /root/
 93 | RUN chmod 755 /root/sqlai_helper
 94 | 
 95 | shell> docker build -t sqlai_helper .
 96 | shell> docker run -itd --name sqlai_helper sqlai_helper /bin/bash
 97 | shell> docker exec -it sqlai_helper /root/sqlai_helper -f /root/test.yaml -q "select * from t1 where cid=11"
 98 | ```
 99 | 
100 | ![image](https://github.com/hcymysql/sql_helper/assets/19261879/a603a7fd-7163-4c05-a5fd-4e605f02acc5)
101 | 
102 | ![image](https://github.com/hcymysql/sql_helper/assets/19261879/39da7b69-aebb-4c27-ac18-f0abc497064d)
103 | 
104 | 请注意，自动判断是否增加索引只是一个辅助功能，最终的决策还应该根据具体的业务需求和数据库性能优化的考虑来进行。此外，索引的创建和维护需要谨慎操作，需要考虑数据量、查询频率、更新频率等因素，以避免对数据库性能产生负面影响。
105 | 
106 | 工具适用于Centos7 系统
107 | 


--------------------------------------------------------------------------------
/deepseek_flash_src/app.py:
--------------------------------------------------------------------------------
  1 | import sys, re
  2 | import textwrap
  3 | from tabulate import tabulate
  4 | import pymysql
  5 | from sql_metadata import Parser
  6 | from sql_format_class import SQLFormatter
  7 | from sql_alias import has_table_alias
  8 | from sql_count_value import count_column_value, count_column_clause_value
  9 | from sql_index import execute_index_query, check_index_exist, check_index_exist_multi
 10 | from where_clause import *
 11 | from sql_extra import *
 12 | import yaml
 13 | import argparse
 14 | from sql_deepseek import *
 15 | from flask import Flask, request, render_template
 16 | 
 17 | app = Flask(__name__)
 18 | app.config['JSON_AS_ASCII'] = False
 19 | app.config['JSON_SORT_KEYS'] = False
 20 | app.config['TEMPLATES_AUTO_RELOAD'] = True
 21 | app.config['DEFAULT_CHARSET'] = 'utf-8'
 22 | 
 23 | def analyze_sql(sql_query, db_config, sample_size=100000):
 24 |     mysql_settings = {
 25 |         "host": db_config["host"],
 26 |         "port": db_config["port"],
 27 |         "user": db_config["user"],
 28 |         "passwd": db_config["passwd"],
 29 |         "database": db_config["database"],
 30 |         "cursorclass": pymysql.cursors.DictCursor,
 31 |         "charset": 'utf8mb4'
 32 |     }
 33 | 
 34 |     try:
 35 |         formatted_sql = SQLFormatter().format_sql(sql_query)
 36 |     except UnicodeDecodeError as e:
 37 |         print(f"格式化 SQL 时出错: {e}, sql_query: {sql_query}")
 38 |         return {"error": f"格式化 SQL 时出错: {e}", "is_processing": False}
 39 | 
 40 |     try:
 41 |         parser = Parser(sql_query)
 42 |         table_names = parser.tables
 43 |         table_aliases = parser.tables_aliases
 44 |         data = parser.columns_dict
 45 |         select_fields = data.get('select', [])
 46 |         join_fields = data.get('join', [])
 47 |         where_fields = data.get('where', [])
 48 |         order_by_fields = data.get('order_by', [])
 49 |         group_by_fields = data.get('group_by', [])
 50 |         if 'SELECT' not in sql_query.upper():
 51 |             return {"error": "sql_helper工具仅支持select语句", "is_processing": False}
 52 |     except Exception as e:
 53 |         return {"error": f"解析 SQL 出现语法错误：{str(e)}", "is_processing": False}
 54 | 
 55 |     conn = None
 56 |     try:
 57 |         conn = pymysql.connect(**mysql_settings)
 58 |         cur = conn.cursor()
 59 | 
 60 |         sql = f"EXPLAIN {sql_query}"
 61 |         try:
 62 |             cur.execute(sql)
 63 |         except pymysql.err.ProgrammingError as e:
 64 |             return {"error": f"MySQL 内部错误：{e}", "is_processing": False}
 65 |         except Exception as e:
 66 |             return {"error": f"MySQL 内部错误：{e}", "is_processing": False}
 67 |         explain_result = cur.fetchall()
 68 | 
 69 |         e_column_names = list(explain_result[0].keys())
 70 |         e_result_values = []
 71 |         for row in explain_result:
 72 |             values = [str(value) for value in row.values()]  # 直接转为字符串
 73 |             e_result_values.append(values)
 74 |         e_table = tabulate(e_result_values, headers=e_column_names, tablefmt="html", numalign="left")
 75 |     except Exception as e:
 76 |         return {"error": f"执行 EXPLAIN 或处理结果时出错: {e}", "is_processing": False}
 77 |     finally:
 78 |         if conn:
 79 |             conn.close()
 80 | 
 81 |     index_suggestions = []
 82 |     contains_dot = False
 83 |     if len(where_fields) == 0:
 84 |         index_suggestions.append(f"你的SQL没有where条件.")
 85 |     else:
 86 |         contains_dot = any('.' in field for field in where_fields)
 87 | 
 88 |     if len(join_fields) != 0:
 89 |         table_field_dict = {}
 90 |         for field in join_fields:
 91 |             table_field = field.split('.')
 92 |             if len(table_field) == 2:
 93 |                 table_name = table_field[0]
 94 |                 field_name = table_field[1]
 95 |                 if table_name not in table_field_dict:
 96 |                     table_field_dict[table_name] = []
 97 |                 table_field_dict[table_name].append(field_name)
 98 | 
 99 |         for table_name, on_columns in table_field_dict.items():
100 |             for on_column in on_columns:
101 |                 try:
102 |                     show_index_sql = f"show index from {table_name} where Column_name = '{on_column}'"
103 |                     conn = pymysql.connect(**mysql_settings)
104 |                     cur = conn.cursor()
105 |                     cur.execute(show_index_sql)
106 |                     index_result = cur.fetchall()
107 |                     if not index_result:
108 |                         suggestion_text = "join联表查询，on关联字段必须增加索引！\n"
109 |                         suggestion_text += f"<span style=\"color: red;\">\n需要添加索引：ALTER TABLE {table_name} ADD INDEX idx_{on_column}({on_column});</span>\n"
110 |                         suggestion_text += f"【{table_name}】表 【{on_column}】字段，索引分析：\n"
111 |                         index_static = execute_index_query(mysql_settings, database=mysql_settings["database"],
112 |                                                            table_name=table_name, index_columns=on_column)
113 |                         suggestion_text += index_static + "\n"
114 |                         index_suggestions.append(suggestion_text)
115 |                 except Exception as e:
116 |                     print(f"join查询分析出错: {e}")
117 |                     index_suggestions.append(f"join查询分析出错: {e}")
118 |                 finally:
119 |                     if conn:
120 |                         conn.close()
121 | 
122 |     for row in explain_result:
123 |         # 修改处：防止 table_name 为 None
124 |         #table_name = row.get('table', '')  # 使用 get() 设置默认值为空字符串
125 |         table_name = row.get('table') or ''
126 |         if table_name.lower().startswith('<derived'):
127 |             table_name = ""
128 |         if table_name == "":
129 |             continue
130 | 
131 |         add_index_fields = []
132 |         if (len(join_fields) != 0 and ((row['type'] == 'ALL' and row['key'] is None) or int(row['rows']) >= 1)) or \
133 |            (len(join_fields) == 0 and ((row['type'] == 'ALL' and row['key'] is None) or int(row['rows']) >= 1000)):
134 |             if has_table_alias(table_aliases) is False and contains_dot is False:
135 |                 if len(where_fields) != 0:
136 |                     for where_field in where_fields:
137 |                         where_clause_value = parse_where_condition(formatted_sql, where_field)
138 |                         if where_clause_value is not None:
139 |                             where_clause_value = where_clause_value.replace('\n', '').replace('\r', '')
140 |                             where_clause_value = re.sub(r'\s+', ' ', where_clause_value)
141 |                             where_clause_value = re.sub(r'\s+', ' ', where_clause_value)
142 |                             Cardinality = count_column_clause_value(table_name, where_field, where_clause_value, mysql_settings, sample_size)
143 |                         else:
144 |                             Cardinality = count_column_value(table_name, where_field, mysql_settings, sample_size)
145 |                         if Cardinality:
146 |                             count_value = Cardinality[0]['count']
147 |                             if where_clause_value is not None:
148 |                                 suggestion_text = f"\n取出表 【{table_name}】 where条件表达式 【{where_clause_value}】 {sample_size} 条记录，重复的数据有：【{count_value}】 条，没有必要为该字段创建索引。\n"
149 |                                 index_suggestions.append(suggestion_text)
150 |                             else:
151 |                                 suggestion_text = f"\n取出表 【{table_name}】 where条件字段 【{where_field}】 {sample_size} 条记录，重复的数据有：【{count_value}】 条，没有必要为该字段创建索引。\n"
152 |                                 index_suggestions.append(suggestion_text)
153 |                         else:
154 |                             add_index_fields.append(where_field)
155 | 
156 |                 if group_by_fields is not None and len(group_by_fields) != 0:
157 |                     for group_field in group_by_fields:
158 |                         Cardinality = count_column_value(table_name, group_field, mysql_settings, sample_size)
159 |                         if Cardinality:
160 |                             count_value = Cardinality[0]['count']
161 |                             suggestion_text = f"\n取出表 【{table_name}】 group by条件字段 【{group_field}】 {sample_size} 条记录，重复的数据有：【{count_value}】 条，没有必要为该字段创建索引。\n"
162 |                             index_suggestions.append(suggestion_text)
163 |                         else:
164 |                             add_index_fields.append(group_field)
165 | 
166 |                 if len(order_by_fields) != 0:
167 |                     for order_field in order_by_fields:
168 |                         Cardinality = count_column_value(table_name, order_field, mysql_settings, sample_size)
169 |                         if Cardinality:
170 |                             count_value = Cardinality[0]['count']
171 |                             suggestion_text = f"\n取出表 【{table_name}】 order by条件字段 【{order_field}】 {sample_size} 条记录，重复的数据有：【{count_value}】 条，没有必要为该字段创建索引。\n"
172 |                             index_suggestions.append(suggestion_text)
173 |                         else:
174 |                             add_index_fields.append(order_field)
175 | 
176 |                 add_index_fields = list(dict.fromkeys(add_index_fields).keys())
177 | 
178 |                 if len(add_index_fields) == 0:
179 |                     if not index_suggestions:
180 |                         index_suggestions.append(f"\n<span style=\"color: green; font-weight: bold;\">→ 【{table_name}】 表，无需添加任何索引。</span>\n")
181 |                     else:
182 |                         index_suggestions.append(f"\n<span style=\"color: green; font-weight: bold;\">→ 【{table_name}】 表，无需添加任何索引。</span>\n")
183 |                 elif len(add_index_fields) == 1:
184 |                     index_name = add_index_fields[0]
185 |                     index_columns = add_index_fields[0]
186 |                     try:
187 |                         conn = pymysql.connect(**mysql_settings)
188 |                         cur = conn.cursor()
189 |                         index_result = check_index_exist(mysql_settings, table_name=table_name, index_column=index_columns)
190 |                         if not index_result:
191 |                             suggestion_text = ""
192 |                             if row['key'] is None:
193 |                                 suggestion_text += f"<span style=\"color: #FFA500;\">\n建议添加索引：ALTER TABLE {table_name} ADD INDEX idx_{index_name}({index_columns});</span>\n"
194 |                             elif row['key'] is not None and row['rows'] >= 1:
195 |                                 suggestion_text += f"<span style=\"color: #FFA500;\">\n建议添加索引：ALTER TABLE {table_name} ADD INDEX idx_{index_name}({index_columns});</span>\n"
196 |                             suggestion_text += f"\n【{table_name}】表 【{index_columns}】字段，索引分析：\n"
197 |                             index_static = execute_index_query(mysql_settings, database=mysql_settings["database"],
198 |                                                                table_name=table_name, index_columns=index_columns)
199 |                             suggestion_text += index_static + "\n"
200 |                             index_suggestions.append(suggestion_text)
201 |                         else:
202 |                             suggestion_text = f"\n<span style=\"color: green; font-weight: bold;\">→ 【{table_name}】表 【{index_columns}】字段，索引已经存在，无需添加任何索引。</span>\n"
203 |                             suggestion_text += f"\n【{table_name}】表 【{index_columns}】字段，索引分析：\n"
204 |                             index_static = execute_index_query(mysql_settings, database=mysql_settings["database"],
205 |                                                                table_name=table_name, index_columns=index_columns)
206 |                             suggestion_text += index_static + "\n"
207 |                             index_suggestions.append(suggestion_text)
208 |                     except Exception as e:
209 |                         print(f"单个索引分析出错: {e}")
210 |                         index_suggestions.append(f"单个索引分析出错: {e}")
211 |                     finally:
212 |                         if conn:
213 |                             conn.close()
214 | 
215 |                 else:
216 |                     merged_name = '_'.join(add_index_fields)
217 |                     merged_columns = ','.join(add_index_fields)
218 |                     try:
219 |                         conn = pymysql.connect(**mysql_settings)
220 |                         cur = conn.cursor()
221 |                         index_result_list = check_index_exist_multi(mysql_settings, database=mysql_settings["database"],
222 |                                                                     table_name=table_name, index_columns=merged_columns,
223 |                                                                     index_number=len(add_index_fields))
224 |                         if index_result_list is None:
225 |                             suggestion_text = ""
226 |                             if row['key'] is None:
227 |                                 suggestion_text += f"<span style=\"color: #FFA500;\">\n建议添加索引：ALTER TABLE {table_name} ADD INDEX idx_{merged_name}({merged_columns});</span>\n"
228 |                             elif row['key'] is not None and row['rows'] >= 1:
229 |                                 suggestion_text += f"<span style=\"color: #FFA500;\">\n建议添加索引：ALTER TABLE {table_name} ADD INDEX idx_{merged_name}({merged_columns});</span>\n"
230 |                             suggestion_text += f"\n【{table_name}】表 【{merged_columns}】字段，索引分析：\n"
231 |                             index_static = execute_index_query(mysql_settings, database=mysql_settings["database"],
232 |                                                                table_name=table_name, index_columns=merged_columns)
233 |                             suggestion_text += index_static + "\n"
234 |                             index_suggestions.append(suggestion_text)
235 |                         else:
236 |                             suggestion_text = f"\n<span style=\"color: green; font-weight: bold;\">→ 【{table_name}】表 【{merged_columns}】字段，联合索引已经存在，无需添加任何索引。</span>\n"
237 |                             suggestion_text += f"\n【{table_name}】表 【{merged_columns}】字段，索引分析：\n"
238 |                             index_static = execute_index_query(mysql_settings, database=mysql_settings["database"],
239 |                                                                table_name=table_name, index_columns=merged_columns)
240 |                             suggestion_text += index_static + "\n"
241 |                             index_suggestions.append(suggestion_text)
242 |                     except Exception as e:
243 |                         print(f"联合索引分析出错: {e}")
244 |                         index_suggestions.append(f"联合索引分析出错: {e}")
245 |                     finally:
246 |                         if conn:
247 |                             conn.close()
248 | 
249 |             if has_table_alias(table_aliases) is True or contains_dot is True:
250 |                 if has_table_alias(table_aliases) is True:
251 |                     try:
252 |                         table_real_name = table_aliases[table_name]
253 |                     except KeyError:
254 |                         if table_name.startswith('<union'):
255 |                             table_real_name = ""
256 |                 else:
257 |                     table_real_name = table_name
258 | 
259 |                 if table_real_name != "":
260 |                     if len(where_fields) != 0:
261 |                         where_matching_fields = []
262 |                         for field in where_fields:
263 |                             if field.startswith(table_real_name + '.'):
264 |                                 where_matching_fields.append(field.split('.')[1])
265 | 
266 |                         for where_field in where_matching_fields:
267 |                             talia_clause = table_name + '.' + where_field
268 |                             where_clause_value = parse_where_condition(formatted_sql, talia_clause)
269 |                             if where_clause_value is not None:
270 |                                 where_clause_value = where_clause_value.replace('\n', '').replace('\r', '')
271 |                                 where_clause_value = re.sub(r'\s+', ' ', where_clause_value)
272 |                                 prefix = where_clause_value.split('.')[0]
273 |                                 where_clause_value = where_clause_value.replace(prefix + '.', '')
274 |                                 if "." in where_clause_value:
275 |                                     Cardinality = count_column_value(table_real_name, where_field, mysql_settings, sample_size)
276 |                                 else:
277 |                                     Cardinality = count_column_clause_value(table_real_name, where_field, where_clause_value, mysql_settings, sample_size)
278 |                             else:
279 |                                 Cardinality = count_column_value(table_real_name, where_field, mysql_settings, sample_size)
280 |                             if Cardinality:
281 |                                 count_value = Cardinality[0]['count']
282 |                                 if where_clause_value is not None:
283 |                                     suggestion_text = f"\n取出表 【{table_real_name}】 where条件表达式 【{where_clause_value}】 {sample_size} 条记录，重复的数据有：【{count_value}】 条，没有必要为该字段创建索引。\n"
284 |                                     index_suggestions.append(suggestion_text)
285 |                                 else:
286 |                                     suggestion_text = f"\n取出表 【{table_real_name}】 where条件字段 【{where_field}】 {sample_size} 条记录，重复的数据有：【{count_value}】 条，没有必要为该字段创建索引。\n"
287 |                                     index_suggestions.append(suggestion_text)
288 |                             else:
289 |                                 add_index_fields.append(where_field)
290 | 
291 |                     if group_by_fields is not None and len(group_by_fields) != 0:
292 |                         group_matching_fields = []
293 |                         for field in group_by_fields:
294 |                             if field.startswith(table_real_name + '.'):
295 |                                 group_matching_fields.append(field.split('.')[1])
296 |                         for group_field in group_matching_fields:
297 |                             Cardinality = count_column_value(table_real_name, group_field, mysql_settings, sample_size)
298 |                             if Cardinality:
299 |                                 count_value = Cardinality[0]['count']
300 |                                 suggestion_text = f"\n取出表 【{table_real_name}】 group by条件字段 【{group_field}】 {sample_size} 条记录，重复的数据有：【{count_value}】 条，没有必要为该字段创建索引。\n"
301 |                                 index_suggestions.append(suggestion_text)
302 |                             else:
303 |                                 add_index_fields.append(group_field)
304 | 
305 |                     if len(order_by_fields) != 0:
306 |                         order_matching_fields = []
307 |                         for field in order_by_fields:
308 |                             if field.startswith(table_real_name + '.'):
309 |                                 order_matching_fields.append(field.split('.')[1])
310 |                         for order_field in order_matching_fields:
311 |                             Cardinality = count_column_value(table_real_name, order_field, mysql_settings, sample_size)
312 |                             if Cardinality:
313 |                                 count_value = Cardinality[0]['count']
314 |                                 suggestion_text = f"\n取出表 【{table_real_name}】 order by条件字段 【{order_field}】 {sample_size} 条记录，重复的数据有：【{count_value}】 条，没有必要为该字段创建索引。\n"
315 |                                 index_suggestions.append(suggestion_text)
316 |                             else:
317 |                                 add_index_fields.append(order_field)
318 | 
319 |                     add_index_fields = list(dict.fromkeys(add_index_fields).keys())
320 | 
321 |                     if len(add_index_fields) == 0:
322 |                         index_suggestions.append(f"\n<span style=\"color: green; font-weight: bold;\">→ 【{table_real_name}】 表，无需添加任何索引。</span>\n")
323 |                     elif len(add_index_fields) == 1:
324 |                         index_name = add_index_fields[0]
325 |                         index_columns = add_index_fields[0]
326 |                         try:
327 |                             conn = pymysql.connect(**mysql_settings)
328 |                             cur = conn.cursor()
329 |                             index_result = check_index_exist(mysql_settings, table_name=table_real_name, index_column=index_columns)
330 |                             if not index_result:
331 |                                 suggestion_text = ""
332 |                                 if row['key'] is None:
333 |                                     suggestion_text += f"<span style=\"color: #FFA500;\">\n建议添加索引：ALTER TABLE {table_real_name} ADD INDEX idx_{index_name}({index_columns});</span>\n"
334 |                                 elif row['key'] is not None and row['rows'] >= 1:
335 |                                     suggestion_text += f"<span style=\"color: #FFA500;\">\n建议添加索引：ALTER TABLE {table_real_name} ADD INDEX idx_{index_name}({index_columns});</span>\n"
336 |                                 suggestion_text += f"\n【{table_real_name}】表 【{index_columns}】字段，索引分析：\n"
337 |                                 index_static = execute_index_query(mysql_settings, database=mysql_settings["database"],
338 |                                                                    table_name=table_real_name, index_columns=index_columns)
339 |                                 suggestion_text += index_static + "\n"
340 |                                 index_suggestions.append(suggestion_text)
341 |                             else:
342 |                                 suggestion_text = f"\n<span style=\"color: green; font-weight: bold;\">→ 【{table_real_name}】表 【{index_columns}】字段，索引已经存在，无需添加任何索引。</span>\n"
343 |                                 suggestion_text += f"\n【{table_real_name}】表 【{index_columns}】字段，索引分析：\n"
344 |                                 index_static = execute_index_query(mysql_settings, database=mysql_settings["database"],
345 |                                                                    table_name=table_real_name, index_columns=index_columns)
346 |                                 suggestion_text += index_static + "\n"
347 |                                 index_suggestions.append(suggestion_text)
348 |                         except Exception as e:
349 |                             print(f"单个别名表索引分析出错: {e}")
350 |                             index_suggestions.append(f"单个别名表索引分析出错: {e}")
351 |                         finally:
352 |                             if conn:
353 |                                 conn.close()
354 |                     else:
355 |                         merged_name = '_'.join(add_index_fields)
356 |                         merged_columns = ','.join(add_index_fields)
357 |                         try:
358 |                             conn = pymysql.connect(**mysql_settings)
359 |                             cur = conn.cursor()
360 |                             index_result_list = check_index_exist_multi(mysql_settings, database=mysql_settings["database"],
361 |                                                                         table_name=table_real_name, index_columns=merged_columns,
362 |                                                                         index_number=len(add_index_fields))
363 |                             if index_result_list is None:
364 |                                 suggestion_text = ""
365 |                                 if row['key'] is None:
366 |                                     suggestion_text += f"<span style=\"color: #FFA500;\">\n建议添加索引：ALTER TABLE {table_real_name} ADD INDEX idx_{merged_name}({merged_columns});</span>\n"
367 |                                 elif row['key'] is not None and row['rows'] >= 1:
368 |                                     suggestion_text += f"<span style=\"color: #FFA500;\">\n建议添加索引：ALTER TABLE {table_real_name} ADD INDEX idx_{merged_name}({merged_columns});</span>\n"
369 |                                 suggestion_text += f"\n【{table_real_name}】表 【{merged_columns}】字段，索引分析：\n"
370 |                                 index_static = execute_index_query(mysql_settings, database=mysql_settings["database"],
371 |                                                                    table_name=table_real_name, index_columns=merged_columns)
372 |                                 suggestion_text += index_static + "\n"
373 |                                 index_suggestions.append(suggestion_text)
374 |                             else:
375 |                                 suggestion_text = f"\n<span style=\"color: green; font-weight: bold;\">→ 【{table_real_name}】表 【{merged_columns}】字段，联合索引已经存在，无需添加任何索引。</span>\n"
376 |                                 suggestion_text += f"\n【{table_real_name}】表 【{merged_columns}】字段，索引分析：\n"
377 |                                 index_static = execute_index_query(mysql_settings, database=mysql_settings["database"],
378 |                                                                    table_name=table_real_name, index_columns=merged_columns)
379 |                                 suggestion_text += index_static + "\n"
380 |                                 index_suggestions.append(suggestion_text)
381 |                         except Exception as e:
382 |                             print(f"联合别名表索引分析出错: {e}")
383 |                             index_suggestions.append(f"联合别名表索引分析出错: {e}")
384 |                         finally:
385 |                             if conn:
386 |                                 conn.close()
387 | 
388 |     extra_suggestions = []
389 |     where_clause = parse_where_condition_full(formatted_sql)
390 |     if where_clause:
391 |         like_r, like_expression = check_percent_position(where_clause)
392 |         if like_r is True:
393 |             extra_suggestions.append(f"like模糊匹配，百分号在首位，【{like_expression}】是不能用到索引的，例如like '%张三%'，可以考虑改成like '张三%'，这样是可以用到索引的，如果业务上不能改，可以考虑用全文索引。\n")
394 | 
395 |         function_r = extract_function_index(where_clause)
396 |         if function_r is not False:
397 |             extra_suggestions.append(f"索引列使用了函数作计算：【{function_r}】，会导致索引失效。"
398 |                                      f"如果你是MySQL 8.0可以考虑创建函数索引；如果你是MySQL 5.7，你要更改你的SQL逻辑了。\n")
399 | 
400 |     try:
401 |         # 开始处理 AI 优化，设置 is_processing=True
402 |         ai_suggestions = optimize_sql(formatted_sql)
403 |         if not isinstance(ai_suggestions, str):
404 |             ai_suggestions = str(ai_suggestions)
405 |         ai_suggestions = re.sub(r'```html\s*|\s*```', '', ai_suggestions, flags=re.MULTILINE).strip()
406 |     except Exception as e:
407 |         ai_suggestions = f"调用 AI 优化出错: {e}"
408 |         print(f"调用 AI 优化出错: {e}")
409 | 
410 |     return {
411 |         "formatted_sql": formatted_sql,
412 |         "explain_table": e_table,
413 |         "index_suggestions": "".join(index_suggestions),
414 |         "extra_suggestions": "".join(extra_suggestions),
415 |         "ai_suggestions": ai_suggestions,
416 |         "no_suggestions": not index_suggestions and not extra_suggestions and not ai_suggestions,
417 |         "is_processing": False  # 处理完成
418 |     }
419 | 
420 | @app.route("/", methods=["GET", "POST"])
421 | def index():
422 |     if request.method == "POST":
423 |         sql_query = request.form.get("sql_query")
424 |         host = request.form.get("host")
425 |         port = request.form.get("port", type=int, default=3306)
426 |         user = request.form.get("user")
427 |         password = request.form.get("password")
428 |         database = request.form.get("database")
429 | 
430 |         db_config = {}
431 |         config_source = ""
432 | 
433 |         if not all([host, user, password, database]):
434 |             return render_template("index.html", error="请提供完整的 MySQL 数据库连接信息 (请手动填写所有数据库参数).",
435 |                                    sql_query=sql_query,
436 |                                    host=host, port=port, user=user, database=database,
437 |                                    is_processing=False)
438 | 
439 |         db_config = {
440 |             "host": host,
441 |             "port": port,
442 |             "user": user,
443 |             "passwd": password,
444 |             "database": database
445 |         }
446 |         config_source = "params"
447 | 
448 |         # 在分析开始时，显示加载动画
449 |         analysis_result = analyze_sql(sql_query, db_config)
450 | 
451 |         if "error" in analysis_result:
452 |             return render_template("index.html", error=analysis_result["error"],
453 |                                    sql_query=sql_query,
454 |                                    host=host, port=port, user=user, database=database,
455 |                                    config_source=config_source,
456 |                                    is_processing=False)
457 | 
458 |         return render_template("index.html",
459 |                                formatted_sql=analysis_result["formatted_sql"],
460 |                                explain_table=analysis_result["explain_table"],
461 |                                index_suggestions=analysis_result["index_suggestions"],
462 |                                extra_suggestions=analysis_result["extra_suggestions"],
463 |                                ai_suggestions=analysis_result["ai_suggestions"],
464 |                                no_suggestions=analysis_result["no_suggestions"],
465 |                                sql_query=sql_query,
466 |                                host=host, port=port, user=user, database=database,
467 |                                config_source=config_source,
468 |                                is_processing=False)  # 处理完成
469 | 
470 |     return render_template("index.html", is_processing=False)
471 | 
472 | if __name__ == "__main__":
473 |     app.run(host='0.0.0.0', debug=True)
474 | 


--------------------------------------------------------------------------------
/deepseek_flash_src/requirements.txt:
--------------------------------------------------------------------------------
1 | Flask
2 | PyMySQL
3 | sql-metadata
4 | tabulate
5 | rich
6 | PyYAML
7 | openai
8 | sqlparse
9 | 


--------------------------------------------------------------------------------
/deepseek_flash_src/sql_alias.py:
--------------------------------------------------------------------------------
 1 | def has_table_alias(table_alias):
 2 |     if isinstance(table_alias, dict):
 3 |         table_alias = {k.lower(): v.lower() for k, v in table_alias.items()}
 4 |         if 'join' in table_alias or 'on' in table_alias or 'where' in table_alias or 'group by' in table_alias or 'order by' in table_alias or 'limit' in table_alias or not table_alias:
 5 |            return False # 没有别名
 6 |         else:
 7 |            return True #有别名
 8 |     elif isinstance(table_alias, list):
 9 |         return False # 没有别名
10 |     else:
11 |         pass
12 | 


--------------------------------------------------------------------------------
/deepseek_flash_src/sql_count_value.py:
--------------------------------------------------------------------------------
  1 | import pymysql
  2 | from rich.progress import Progress, TimeElapsedColumn, TextColumn, BarColumn
  3 | 
  4 | def count_column_value(table_name, field_name, mysql_settings, sample_size):
  5 |     with pymysql.connect(**mysql_settings) as conn:
  6 |         with conn.cursor() as cursor:
  7 |             """
  8 |             在这个查询中，使用了CASE语句来判断数据行数是否小于100000。如果数据行数小于100000，则使用
  9 |             (SELECT COUNT(*) FROM {table_name}) / 2
 10 |             作为阈值，即表的实际大小除以2；否则使用 {sample_size} / 2 作为阈值。
 11 |             """
 12 | 
 13 |             # 如果你的数据库是MySQL 8.0，那么推荐用 CTE（公共表达式）的形式
 14 |             '''
 15 |             sql = f"""
 16 |             WITH subquery AS (
 17 |                 SELECT {field_name}
 18 |                 FROM {table_name}
 19 |                 LIMIT {sample_size}
 20 |             )
 21 |             SELECT COUNT(*) as count
 22 |             FROM subquery
 23 |             GROUP BY {field_name}
 24 |             HAVING COUNT(*) >= 
 25 |                 CASE 
 26 |                     WHEN (SELECT COUNT(*) FROM subquery) < {sample_size} THEN (SELECT COUNT(*) FROM {table_name}) / 2 
 27 |                     ELSE {sample_size} / 2 
 28 |                 END;
 29 |             """
 30 |             '''
 31 | 
 32 |             # 默认采用子查询兼容MySQL 5.7版本
 33 |             sql = f"""
 34 |             SELECT COUNT(*) as count
 35 |             FROM (
 36 |                 SELECT {field_name}
 37 |                 FROM {table_name}
 38 |                 LIMIT {sample_size}
 39 |             ) AS subquery
 40 |             GROUP BY {field_name}
 41 |             HAVING COUNT(*) >= CASE WHEN (SELECT COUNT(*) FROM {table_name} LIMIT {sample_size}) < {sample_size} 
 42 |             THEN (SELECT COUNT(*) FROM {table_name}) / 2 ELSE {sample_size} / 2 END;
 43 |             """
 44 | 
 45 |             # print(sql)
 46 |             with Progress(TextColumn("[progress.description]{task.description}", justify="right"), BarColumn(), TextColumn("[progress.percentage]{task.percentage:>3.0f}%"), TimeElapsedColumn()) as progress:
 47 |                 task = progress.add_task(f"[cyan]Executing SQL query [bold magenta]{table_name}[/bold magenta] [cyan]where_clause [bold magenta]{field_name}[/bold magenta]...", total=1)
 48 |                 cursor.execute(sql)
 49 |                 progress.update(task, completed=1)
 50 | 
 51 |             results = cursor.fetchall()
 52 | 
 53 |     if results:
 54 |         # 如果有超过半数的重复数据
 55 |         return results
 56 |     else:
 57 |         return False
 58 | 
 59 | 
 60 | def count_column_clause_value(table_name, field_name, where_clause_value, mysql_settings, sample_size):
 61 |     with pymysql.connect(**mysql_settings) as conn:
 62 |         with conn.cursor() as cursor:
 63 |             
 64 |             """
 65 |             在这个查询中，使用了CASE语句来判断数据行数是否小于100000。如果数据行数小于100000，则使用
 66 |             (SELECT COUNT(*) FROM {table_name}) / 2
 67 |             作为阈值，即表的实际大小除以2；否则使用 {sample_size} / 2 作为阈值。
 68 |             """
 69 | 
 70 |             # 如果你的数据库是MySQL 8.0，那么推荐用 CTE（公共表达式）的形式
 71 |             '''
 72 |             sql = f"""
 73 |             WITH subquery AS (
 74 |                 SELECT {field_name}
 75 |                 FROM {table_name}
 76 |                 LIMIT {sample_size}
 77 |             )
 78 |             SELECT COUNT(*) as count
 79 |             FROM subquery
 80 |             GROUP BY {field_name}
 81 |             HAVING COUNT(*) >= 
 82 |                 CASE 
 83 |                     WHEN (SELECT COUNT(*) FROM subquery) < {sample_size} THEN (SELECT COUNT(*) FROM {table_name}) / 2 
 84 |                     ELSE {sample_size} / 2 
 85 |                 END;
 86 |             """
 87 |             '''
 88 | 
 89 |             # 默认采用子查询兼容MySQL 5.7版本
 90 |             sql = f"""
 91 |             SELECT COUNT(*) as count
 92 |             FROM (
 93 |                 SELECT {field_name}
 94 |                 FROM {table_name}
 95 |                 WHERE {where_clause_value}
 96 |                 LIMIT {sample_size}
 97 |             ) AS subquery
 98 |             GROUP BY {field_name}
 99 |             HAVING COUNT(*) >= CASE WHEN (SELECT COUNT(*) FROM {table_name} LIMIT {sample_size}) < {sample_size} 
100 |             THEN (SELECT COUNT(*) FROM {table_name}) / 2 ELSE {sample_size} / 2 END;
101 |             """
102 | 
103 |             #print(sql)
104 |             with Progress(TextColumn("[progress.description]{task.description}", justify="right"), BarColumn(), TextColumn("[progress.percentage]{task.percentage:>3.0f}%"), TimeElapsedColumn()) as progress:
105 |                 task = progress.add_task(f"[cyan]Executing SQL query [bold magenta]{table_name}[/bold magenta] [cyan]where_clause [bold magenta]{where_clause_value}[/bold magenta]...", total=1)
106 |                 cursor.execute(sql)
107 |                 progress.update(task, completed=1)
108 | 
109 |             results = cursor.fetchall()
110 | 
111 |     if results:
112 |         # 如果有超过半数的重复数据
113 |         return results
114 |     else:
115 |         return False
116 | 


--------------------------------------------------------------------------------
/deepseek_flash_src/sql_deepseek.py:
--------------------------------------------------------------------------------
 1 | # https://platform.deepseek.com/api_keys 申请密钥并充值1元
 2 | 
 3 | from openai import OpenAI
 4 | import re
 5 | 
 6 | def optimize_sql(original_sql):
 7 |     """
 8 |     调用 deepseek V3 接口优化 SQL 查询语句，并返回优化后的纯 SQL 字符串。
 9 | 
10 |     Args:
11 |         original_sql (str): 原始的 SQL 查询语句。
12 | 
13 |     Returns:
14 |         str: 优化后的纯 SQL 查询语句。如果调用过程中发生错误，则返回包含错误信息的字符串。
15 |     """
16 |     try:
17 |         client = OpenAI(api_key="sk-xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", base_url="https://api.deepseek.com")
18 | 
19 |         # 输出调用信息 (可选，仅用于调试)
20 |         print('\033[94m以下是调用的deepseek V3接口.\033[0m')
21 |         print('优化前的SQL是：')
22 |         print(original_sql)
23 |         print('-' * 55)
24 | 
25 |         # 获取优化后的 SQL，不要求返回 HTML 格式
26 |         response = client.chat.completions.create(
27 |             model="deepseek-chat",
28 |             messages=[
29 |                 {"role": "system", "content": "你是MySQL专家！你精通SQL优化！也是高级程序员！"},
30 |                 {"role": "user", "content": f"帮我优化这个SQL，返回纯 SQL 语句，不要添加任何标记或格式：{original_sql}"},
31 |             ],
32 |             stream=False
33 |         )
34 |         
35 |         # 获取优化后的 SQL 内容
36 |         optimized_sql = response.choices[0].message.content
37 |         
38 |         # 清理可能的标记（例如 ```sql 或 ```），确保返回纯 SQL
39 |         optimized_sql = re.sub(r'```sql|```|<\w+>.*?</\w+>', '', optimized_sql, flags=re.MULTILINE).strip()
40 | 
41 |         # 输出优化后的 SQL (可选，仅用于调试)
42 |         print('\033[92m优化后的SQL是：\033[0m')
43 |         print(optimized_sql)
44 | 
45 |         return optimized_sql
46 | 
47 |     except Exception as e:
48 |         # 捕获异常，返回纯文本错误信息
49 |         error_message = f"调用 deepseek API 接口出错: {str(e)}"
50 |         print(error_message)  # 打印错误信息到控制台 (可选)
51 |         return error_message
52 | 


--------------------------------------------------------------------------------
/deepseek_flash_src/sql_extra.py:
--------------------------------------------------------------------------------
 1 | import re
 2 | 
 3 | def check_percent_position(string):
 4 |     string = string.lower()
 5 |     pattern = r".*like\s+'%|.*like\s+concat\(+'%|.*regexp\s+"
 6 |     matches = re.findall(pattern, string)
 7 |     if matches:
 8 |         like_pattern = r"like\s+(?:concat\(.*?\)|'%%'|\'.*?%(?:.*?)?\')"
 9 |         like_match = re.search(like_pattern, string)
10 |         if like_match:
11 |             return True, like_match.group()
12 |         #return True
13 |     return False, None
14 | 
15 | 
16 | def extract_function_index(string):
17 |     #pattern = r'\b(\w+)\('
18 |     #pattern = r'\b(\w+)\(.*\).*[>=<!=<>]'
19 |     pattern = r'\b(\w+(\(.*\).*[>=<!=<>]))'
20 |     matches = re.findall(pattern, string)
21 |     #function_indexes = set(matches)
22 |     function_indexes = [match[0] for match in matches]
23 |     if function_indexes:
24 |         return ', '.join(function_indexes)
25 |     else:
26 |         return False
27 | 


--------------------------------------------------------------------------------
/deepseek_flash_src/sql_format_class.py:
--------------------------------------------------------------------------------
 1 | import sqlparse
 2 | 
 3 | class SQLFormatter:
 4 |     def format_sql(self, sql_query):
 5 |         """
 6 |         格式化 SQL 查询语句
 7 |         """
 8 |         formatted_sql = sqlparse.format(sql_query, reindent=True, keyword_case='upper')
 9 | 
10 |         return formatted_sql
11 | 
12 | 
13 | 


--------------------------------------------------------------------------------
/deepseek_flash_src/sql_index.py:
--------------------------------------------------------------------------------
  1 | import textwrap
  2 | from tabulate import tabulate
  3 | import pymysql
  4 | 
  5 | def execute_index_query(mysql_settings, database, table_name, index_columns):
  6 |     index_columns = index_columns
  7 |     index_columns = index_columns.split(',')
  8 |     updated_columns = [f"'{column.strip()}'" for column in index_columns]
  9 |     final_columns = ', '.join(updated_columns)
 10 |     sql = f"SELECT TABLE_NAME,INDEX_NAME,COLUMN_NAME,CARDINALITY FROM information_schema.STATISTICS WHERE TABLE_SCHEMA = '{database}' AND TABLE_NAME = '{table_name}' AND COLUMN_NAME IN ({final_columns})"
 11 |     #print(sql)
 12 |     try:
 13 |         conn = pymysql.connect(**mysql_settings)
 14 |         cur = conn.cursor()
 15 |         cur.execute(sql)
 16 |         index_result = cur.fetchall()
 17 | 
 18 |         if not index_result:
 19 |             print(f"没有检测到 {table_name} 表 字段 {final_columns} 有索引。")
 20 | 
 21 |         # 提取列名
 22 |         e_column_names = [desc[0] for desc in cur.description]
 23 | 
 24 |         # 提取结果值并进行自动换行处理
 25 |         e_result_values = []
 26 |         for row in index_result:
 27 |             values = list(row.values())
 28 |             wrapped_values = [textwrap.fill(str(value), width=30) for value in values]
 29 |             e_result_values.append(wrapped_values)
 30 | 
 31 |         # 将结果格式化为表格（包含竖线）
 32 |         e_table = tabulate(e_result_values, headers=e_column_names, tablefmt="grid", numalign="left")
 33 | 
 34 |         return e_table
 35 | 
 36 |     except pymysql.err.ProgrammingError as e:
 37 |         print("MySQL 内部错误：",e)
 38 |         return None
 39 |     except Exception as e:
 40 |         print("MySQL 内部错误：",e)
 41 |         return None
 42 |     finally:
 43 |         if cur:
 44 |             cur.close()
 45 |         if conn:
 46 |             conn.close()
 47 | 
 48 | #########################################################
 49 | 
 50 | def check_index_exist(mysql_settings, table_name, index_column):
 51 |     show_index_sql = f"show index from {table_name} where Column_name = '{index_column}'"
 52 |     try:
 53 |         conn = pymysql.connect(**mysql_settings)
 54 |         cur = conn.cursor()
 55 |         cur.execute(show_index_sql)
 56 |         index_result = cur.fetchall()
 57 | 
 58 |         #if not index_result:
 59 |             #print(f"没有检测到 {table_name} 表 字段 {final_columns} 有索引。")
 60 | 
 61 |         return index_result
 62 | 
 63 |     except pymysql.err.ProgrammingError as e:
 64 |         print("MySQL 内部错误：",e)
 65 |         return None
 66 |     except Exception as e:
 67 |         print("MySQL 内部错误：",e)
 68 |         return None
 69 |     finally:
 70 |         if cur:
 71 |             cur.close()
 72 |         if conn:
 73 |             conn.close()
 74 | 
 75 | #########################################################
 76 | 
 77 | def check_index_exist_multi(mysql_settings, database, table_name, index_columns, index_number):
 78 |     index_columns = index_columns
 79 |     index_columns = index_columns.split(',')
 80 |     updated_columns = [f"'{column.strip()}'" for column in index_columns]
 81 |     final_columns = ', '.join(updated_columns)
 82 |     sql = f"SELECT TABLE_NAME,INDEX_NAME,COLUMN_NAME,CARDINALITY FROM information_schema.STATISTICS WHERE TABLE_SCHEMA = '{database}' AND TABLE_NAME = '{table_name}' AND COLUMN_NAME IN ({final_columns}) GROUP BY INDEX_NAME HAVING COUNT(INDEX_NAME) = {index_number}"
 83 |     #print(sql)
 84 |     try:
 85 |         conn = pymysql.connect(**mysql_settings)
 86 |         cur = conn.cursor()
 87 |         cur.execute(sql)
 88 |         index_result = cur.fetchall()
 89 | 
 90 |         if not index_result:
 91 |             return None
 92 | 
 93 |         return index_result
 94 | 
 95 |     except pymysql.err.ProgrammingError as e:
 96 |         print("MySQL 内部错误：",e)
 97 |         return None
 98 |     except Exception as e:
 99 |         print("MySQL 内部错误：",e)
100 |         return None
101 |     finally:
102 |         if cur:
103 |             cur.close()
104 |         if conn:
105 |             conn.close()
106 | 
107 | 


--------------------------------------------------------------------------------
/deepseek_flash_src/templates/index.html:
--------------------------------------------------------------------------------
  1 | <!DOCTYPE html>
  2 | <html lang="zh">
  3 | <head>
  4 |     <meta charset="UTF-8">
  5 |     <title>SQLAI Helper 工具</title>
  6 |     <link href="https://cdnjs.cloudflare.com/ajax/libs/prism/1.29.0/themes/prism.min.css" rel="stylesheet" />
  7 |     <style>
  8 |         body {
  9 |             font-family: 'Arial', sans-serif;
 10 |             background: #f1f1f1;
 11 |             margin: 0;
 12 |             padding: 0;
 13 |             display: flex;
 14 |             justify-content: center;
 15 |             align-items: flex-start;
 16 |             height: 100vh;
 17 |             color: #333;
 18 |             overflow-y: auto;
 19 |         }
 20 |         .container {
 21 |             background: #fff;
 22 |             border-radius: 8px;
 23 |             box-shadow: 0 4px 10px rgba(0, 0, 0, 0.1);
 24 |             width: 80%;
 25 |             max-width: 900px;
 26 |             padding: 30px;
 27 |             margin-top: -40px;
 28 |         }
 29 |         h1 {
 30 |             text-align: center;
 31 |             color: #007bff;
 32 |             margin-bottom: 20px;
 33 |         }
 34 |         .config-box {
 35 |             background: #fafafa;
 36 |             padding: 20px;
 37 |             border-radius: 8px;
 38 |             margin-bottom: 30px;
 39 |             box-shadow: 0 2px 5px rgba(0, 0, 0, 0.1);
 40 |         }
 41 |         .config-box label {
 42 |             font-weight: bold;
 43 |             margin-top: 5px;
 44 |             display: block;
 45 |         }
 46 |         .inline-inputs {
 47 |             display: flex;
 48 |             flex-wrap: wrap;
 49 |             gap: 10px;
 50 |         }
 51 |         .inline-inputs input {
 52 |             flex: 1;
 53 |             min-width: 200px;
 54 |         }
 55 |         .config-box textarea {
 56 |             width: 100%;
 57 |             height: 150px;
 58 |             padding: 12px;
 59 |             margin: 8px 0;
 60 |             border-radius: 5px;
 61 |             border: 1px solid #ddd;
 62 |             font-size: 16px;
 63 |             resize: vertical;
 64 |         }
 65 |         .button-container {
 66 |             text-align: center;
 67 |             margin-top: 20px;
 68 |         }
 69 |         button {
 70 |             padding: 12px 24px;
 71 |             background-color: #007bff;
 72 |             border: none;
 73 |             color: white;
 74 |             font-size: 16px;
 75 |             border-radius: 5px;
 76 |             cursor: pointer;
 77 |             transition: background-color 0.3s;
 78 |         }
 79 |         button:hover {
 80 |             background-color: #0056b3;
 81 |         }
 82 |         .output-box {
 83 |             background: #f9f9f9;
 84 |             border-radius: 8px;
 85 |             padding: 15px;
 86 |             margin-top: 10px;
 87 |             box-shadow: 0 2px 5px rgba(0, 0, 0, 0.1);
 88 |             overflow-x: auto;
 89 |         }
 90 |         .section-title {
 91 |             font-weight: bold;
 92 |             font-size: 18px;
 93 |             color: #333;
 94 |             margin-bottom: 10px;
 95 |         }
 96 |         .suggestion, .ai-suggestion, .error {
 97 |             white-space: pre-wrap;
 98 |             word-wrap: break-word;
 99 |         }
100 |         .error {
101 |             color: red;
102 |         }
103 |         .ai-suggestion {
104 |             color: #007bff;
105 |         }
106 |         .success-suggestion {
107 |             background: #d4edda;
108 |             color: #155724;
109 |             padding: 10px;
110 |             border-radius: 5px;
111 |         }
112 |         pre.language-sql {
113 |             background: #f9f9f9;
114 |             padding: 15px;
115 |             border-radius: 8px;
116 |             overflow-x: auto;
117 |         }
118 |         pre.language-sql code {
119 |             display: block;
120 |             white-space: pre-wrap;
121 |             word-wrap: break-word;
122 |             line-height: 1.2;
123 |         }
124 |         table {
125 |             width: 100%;
126 |             border-collapse: collapse;
127 |             margin: 10px 0;
128 |         }
129 |         th, td {
130 |             border: 1px solid #ddd;
131 |             padding: 8px;
132 |             text-align: left;
133 |         }
134 |         th {
135 |             background-color: #f2f2f2;
136 |             font-weight: bold;
137 |         }
138 |         .ai-sql-table td {
139 |             white-space: pre-wrap;
140 |             word-wrap: break-word;
141 |             font-family: monospace;
142 |             color: #0056b3;
143 |         }
144 |         #loading {
145 |             display: none;
146 |             text-align: center;
147 |             margin-top: 20px;
148 |             position: fixed;
149 |             top: 50%;
150 |             left: 50%;
151 |             transform: translate(-50%, -50%);
152 |             background: rgba(255, 255, 255, 0.8);
153 |             padding: 20px;
154 |             border-radius: 5px;
155 |             z-index: 1000;
156 |         }
157 |         .progress-bar {
158 |             width: 300px;
159 |             height: 10px;
160 |             background-color: #f0f0f0;
161 |             border-radius: 5px;
162 |             overflow: hidden;
163 |         }
164 |         .progress-bar-fill {
165 |             width: 0%;
166 |             height: 100%;
167 |             background-color: #007bff;
168 |             animation: progress-animation 2s linear infinite; /* 无限循环动画 */
169 |         }
170 |         @keyframes progress-animation {
171 |             0% { width: 0%; }
172 |             100% { width: 100%; }
173 |         }
174 |     </style>
175 | </head>
176 | <body>
177 |     <div class="container">
178 |         <h1>SQLAI Helper 工具</h1>
179 | 
180 |         <div class="config-box">
181 |             <form method="post" id="sqlForm">
182 |                 <label for="sql_query">输入 SQL 自动给出索引优化建议 + SQL重写建议:</label>
183 |                 <textarea id="sql_query" name="sql_query" placeholder="请输入 SQL 查询语句" required>{{ sql_query or '' }}</textarea>
184 | 
185 |                 <p><strong>数据库连接配置</strong> (手动填写数据库参数)</p>
186 | 
187 |                 <div class="inline-inputs">
188 |                     <div>
189 |                         <label for="host">MySQL 主机名:</label>
190 |                         <input type="text" id="host" name="host" placeholder="MySQL 主机名" value="{{ host or '' }}" required>
191 |                     </div>
192 |                     <div>
193 |                         <label for="port">MySQL 端口:</label>
194 |                         <input type="number" id="port" name="port" placeholder="MySQL 端口 (默认: 3306)" value="{{ port or 3306 }}" required>
195 |                     </div>
196 |                     <div>
197 |                         <label for="user">MySQL 用户名:</label>
198 |                         <input type="text" id="user" name="user" placeholder="MySQL 用户名" value="{{ user or '' }}" required>
199 |                     </div>
200 |                     <div>
201 |                         <label for="password">MySQL 密码:</label>
202 |                         <input type="password" id="password" name="password" placeholder="MySQL 密码" value="{{ password or '' }}" required>
203 |                     </div>
204 |                     <div>
205 |                         <label for="database">数据库名:</label>
206 |                         <input type="text" id="database" name="database" placeholder="数据库名" value="{{ database or '' }}" required>
207 |                     </div>
208 |                 </div>
209 | 
210 |                 <div class="button-container">
211 |                     <br>
212 |                     <button type="submit" id="submitButton">分析 SQL</button>
213 |                 </div>
214 |             </form>
215 |         </div>
216 | 
217 |         <!-- 加载动画和进度条 -->
218 |         <div id="loading" style="display: none;">
219 |             <p>正在分析，请稍候...</p>
220 |             <div class="progress-bar">
221 |                 <div class="progress-bar-fill"></div>
222 |             </div>
223 |         </div>
224 | 
225 |         {% if error %}
226 |         <div class="output-box error" id="output-start">
227 |             <p class="section-title">错误信息:</p>
228 |             <pre class="language-sql"><code>{{ error|default('<span class="red-text">未检测到错误</span>')|safe }}</code></pre>
229 |         </div>
230 |         {% endif %}
231 | 
232 |         {% if formatted_sql %}
233 |         <div class="output-box" id="output-start">
234 |             <p class="section-title">1) 你刚才输入的 SQL 语句是:</p>
235 |             <pre class="language-sql"><code>{{ formatted_sql|default('<span class="red-text">无输入 SQL</span>')|safe }}</code></pre>
236 |         </div>
237 |         {% endif %}
238 | 
239 |         {% if explain_table %}
240 |         <div class="output-box">
241 |             <p class="section-title">2) EXPLAIN 执行计划:</p>
242 |             {{ explain_table|safe }}
243 |         </div>
244 |         {% endif %}
245 | 
246 |         {% if index_suggestions %}
247 |         <div class="output-box">
248 |             <p class="section-title">3) 索引优化建议:</p>
249 |             <pre class="language-sql"><code>{{ index_suggestions|default('<span class="red-text">无索引优化建议</span>')|safe }}</code></pre>
250 |         </div>
251 |         {% endif %}
252 | 
253 |         {% if extra_suggestions %}
254 |         <div class="output-box">
255 |             <p class="section-title">4) 额外的建议:</p>
256 |             <pre class="language-sql"><code>{{ extra_suggestions|default('<span class="red-text">无额外建议</span>')|safe }}</code></pre>
257 |         </div>
258 |         {% endif %}
259 | 
260 |         {% if ai_suggestions %}
261 |         <div class="output-box">
262 |             <p class="section-title">5) DeepSeek 的建议:</p>
263 |             <table class="ai-sql-table">
264 |                 <thead>
265 |                     <tr>
266 |                         <th>优化后的 SQL 语句</th>
267 |                     </tr>
268 |                 </thead>
269 |                 <tbody>
270 |                     <tr>
271 |                         <td>{{ ai_suggestions|default('暂无 AI 建议')|safe }}</td>
272 |                     </tr>
273 |                 </tbody>
274 |             </table>
275 |         </div>
276 |         {% endif %}
277 | 
278 |         {% if no_suggestions and formatted_sql and explain_table %}
279 |         <div class="output-box success-suggestion">
280 |             <p class="section-title">分析结果:</p>
281 |             <pre>SQL 语句分析完成，当前 SQL 语句执行计划良好，没有额外的优化建议！</pre>
282 |         </div>
283 |         {% endif %}
284 |     </div>
285 | 
286 |     <script src="https://cdnjs.cloudflare.com/ajax/libs/prism/1.29.0/prism.min.js"></script>
287 |     <script src="https://cdnjs.cloudflare.com/ajax/libs/prism/1.29.0/components/prism-sql.min.js"></script>
288 |     <script>
289 |         // 显示加载动画和进度条
290 |         function showLoading() {
291 |             document.getElementById('loading').style.display = 'block';
292 |             document.getElementById('submitButton').disabled = true;
293 |             sessionStorage.setItem('isAnalyzing', 'true'); // 设置分析状态
294 |         }
295 | 
296 |         // 隐藏加载动画和进度条
297 |         function hideLoading() {
298 |             document.getElementById('loading').style.display = 'none';
299 |             document.getElementById('submitButton').disabled = false;
300 |             sessionStorage.removeItem('isAnalyzing'); // 清除分析状态
301 |         }
302 | 
303 |         // 页面加载时检查状态
304 |         window.onload = function() {
305 |             Prism.highlightAll();
306 |             // 如果正在分析，显示进度条
307 |             if (sessionStorage.getItem('isAnalyzing') === 'true') {
308 |                 showLoading();
309 |             } else {
310 |                 hideLoading();
311 |             }
312 | 
313 |             // 检查是否有结果输出，如果有则隐藏进度条
314 |             var outputStart = document.getElementById('output-start');
315 |             if (outputStart && outputStart.innerHTML.trim() !== '') {
316 |                 hideLoading(); // 结果已展示，隐藏进度条
317 |             } else if (sessionStorage.getItem('isAnalyzing') === 'true') {
318 |                 // 如果没有结果但仍在分析状态，保持进度条显示
319 |                 showLoading();
320 |             }
321 |         };
322 | 
323 |         // 表单提交时触发加载动画
324 |         document.getElementById('sqlForm').addEventListener('submit', function(event) {
325 |             showLoading();
326 |         });
327 | 
328 |         // 确保错误信息加载后滚动到可见区域并关闭加载动画
329 |         {% if error %}
330 |             window.onload = function() {
331 |                 Prism.highlightAll();
332 |                 hideLoading(); // 错误时关闭加载动画
333 |                 var outputStart = document.getElementById('output-start');
334 |                 if (outputStart) {
335 |                     outputStart.scrollIntoView({ behavior: 'smooth' });
336 |                 }
337 |             };
338 |         {% endif %}
339 |     </script>
340 | </body>
341 | </html>
342 | 


--------------------------------------------------------------------------------
/deepseek_flash_src/where_clause.py:
--------------------------------------------------------------------------------
 1 | import sqlparse
 2 | 
 3 | def parse_where_condition(sql, column):
 4 |     parsed = sqlparse.parse(sql)
 5 |     stmt = parsed[0]
 6 | 
 7 |     where_column = ""
 8 |     where_expression = ""
 9 |     where_value = ""
10 |     where_clause = ""
11 |     found = False
12 |     result = ""
13 | 
14 |     for token in stmt.tokens:
15 |         if isinstance(token, sqlparse.sql.Where):
16 |             where_clause = token.value
17 |             conditions = []
18 |             for cond_tok in token.tokens:
19 |                 if isinstance(cond_tok, sqlparse.sql.Comparison):
20 |                     left_token = cond_tok.left.value.strip()
21 |                     if column == left_token:
22 |                         #return f"比较运算符: {cond_tok.value.strip()}"
23 |                         return cond_tok.value.strip()
24 | 
25 |                 if isinstance(cond_tok, sqlparse.sql.Identifier) and cond_tok.value == column:
26 |                     found = True
27 | 
28 |                 if found:
29 |                     if isinstance(cond_tok, sqlparse.sql.Token) and cond_tok.value.upper() in ["OR","AND"]:
30 |                         break
31 |                     else:
32 |                         result += cond_tok.value
33 | 
34 |                 if isinstance(cond_tok, sqlparse.sql.Parenthesis) and found:
35 |                     break
36 | 
37 |     if len(result) != 0:
38 |         #return f"逻辑运算符: {result.strip()}"
39 |         return result.strip()
40 |     else:
41 |         #return "没有找到该字段的条件表达式"
42 |         return None
43 | 
44 | 
45 | def parse_where_condition_full(sql):
46 |     parsed = sqlparse.parse(sql)
47 |     stmt = parsed[0]
48 | 
49 |     where_clause = ""
50 |     found = False
51 | 
52 |     for token in stmt.tokens:
53 |         if found:
54 |             where_clause += token.value
55 |         if isinstance(token, sqlparse.sql.Where):
56 |             found = True
57 |             where_clause += token.value
58 | 
59 |     return where_clause.strip() if where_clause else None
60 | 
61 | 


--------------------------------------------------------------------------------
/src/sql_alias.py:
--------------------------------------------------------------------------------
 1 | def has_table_alias(table_alias):
 2 |     if isinstance(table_alias, dict):
 3 |         table_alias = {k.lower(): v.lower() for k, v in table_alias.items()}
 4 |         if 'join' in table_alias or 'on' in table_alias or 'where' in table_alias or 'group by' in table_alias or 'order by' in table_alias or 'limit' in table_alias or not table_alias:
 5 |            return False # 没有别名
 6 |         else:
 7 |            return True #有别名
 8 |     elif isinstance(table_alias, list):
 9 |         return False # 没有别名
10 |     else:
11 |         pass
12 | 


--------------------------------------------------------------------------------
/src/sql_count_value.py:
--------------------------------------------------------------------------------
  1 | import pymysql
  2 | from rich.progress import Progress, TimeElapsedColumn, TextColumn, BarColumn
  3 | 
  4 | def count_column_value(table_name, field_name, mysql_settings, sample_size):
  5 |     with pymysql.connect(**mysql_settings) as conn:
  6 |         with conn.cursor() as cursor:
  7 |             """
  8 |             在这个查询中，使用了CASE语句来判断数据行数是否小于100000。如果数据行数小于100000，则使用
  9 |             (SELECT COUNT(*) FROM {table_name}) / 2
 10 |             作为阈值，即表的实际大小除以2；否则使用 {sample_size} / 2 作为阈值。
 11 |             """
 12 | 
 13 |             # 如果你的数据库是MySQL 8.0，那么推荐用 CTE（公共表达式）的形式
 14 |             '''
 15 |             sql = f"""
 16 |             WITH subquery AS (
 17 |                 SELECT {field_name}
 18 |                 FROM {table_name}
 19 |                 LIMIT {sample_size}
 20 |             )
 21 |             SELECT COUNT(*) as count
 22 |             FROM subquery
 23 |             GROUP BY {field_name}
 24 |             HAVING COUNT(*) >= 
 25 |                 CASE 
 26 |                     WHEN (SELECT COUNT(*) FROM subquery) < {sample_size} THEN (SELECT COUNT(*) FROM {table_name}) / 2 
 27 |                     ELSE {sample_size} / 2 
 28 |                 END;
 29 |             """
 30 |             '''
 31 | 
 32 |             # 默认采用子查询兼容MySQL 5.7版本
 33 |             sql = f"""
 34 |             SELECT COUNT(*) as count
 35 |             FROM (
 36 |                 SELECT {field_name}
 37 |                 FROM {table_name}
 38 |                 LIMIT {sample_size}
 39 |             ) AS subquery
 40 |             GROUP BY {field_name}
 41 |             HAVING COUNT(*) >= CASE WHEN (SELECT COUNT(*) FROM {table_name} LIMIT {sample_size}) < {sample_size} 
 42 |             THEN (SELECT COUNT(*) FROM {table_name}) / 2 ELSE {sample_size} / 2 END;
 43 |             """
 44 | 
 45 |             # print(sql)
 46 |             with Progress(TextColumn("[progress.description]{task.description}", justify="right"), BarColumn(), TextColumn("[progress.percentage]{task.percentage:>3.0f}%"), TimeElapsedColumn()) as progress:
 47 |                 task = progress.add_task(f"[cyan]Executing SQL query [bold magenta]{table_name}[/bold magenta] [cyan]where_clause [bold magenta]{field_name}[/bold magenta]...", total=1)
 48 |                 cursor.execute(sql)
 49 |                 progress.update(task, completed=1)
 50 | 
 51 |             results = cursor.fetchall()
 52 | 
 53 |     if results:
 54 |         # 如果有超过半数的重复数据
 55 |         return results
 56 |     else:
 57 |         return False
 58 | 
 59 | 
 60 | def count_column_clause_value(table_name, field_name, where_clause_value, mysql_settings, sample_size):
 61 |     with pymysql.connect(**mysql_settings) as conn:
 62 |         with conn.cursor() as cursor:
 63 |             
 64 |             """
 65 |             在这个查询中，使用了CASE语句来判断数据行数是否小于100000。如果数据行数小于100000，则使用
 66 |             (SELECT COUNT(*) FROM {table_name}) / 2
 67 |             作为阈值，即表的实际大小除以2；否则使用 {sample_size} / 2 作为阈值。
 68 |             """
 69 | 
 70 |             # 如果你的数据库是MySQL 8.0，那么推荐用 CTE（公共表达式）的形式
 71 |             '''
 72 |             sql = f"""
 73 |             WITH subquery AS (
 74 |                 SELECT {field_name}
 75 |                 FROM {table_name}
 76 |                 LIMIT {sample_size}
 77 |             )
 78 |             SELECT COUNT(*) as count
 79 |             FROM subquery
 80 |             GROUP BY {field_name}
 81 |             HAVING COUNT(*) >= 
 82 |                 CASE 
 83 |                     WHEN (SELECT COUNT(*) FROM subquery) < {sample_size} THEN (SELECT COUNT(*) FROM {table_name}) / 2 
 84 |                     ELSE {sample_size} / 2 
 85 |                 END;
 86 |             """
 87 |             '''
 88 | 
 89 |             # 默认采用子查询兼容MySQL 5.7版本
 90 |             sql = f"""
 91 |             SELECT COUNT(*) as count
 92 |             FROM (
 93 |                 SELECT {field_name}
 94 |                 FROM {table_name}
 95 |                 WHERE {where_clause_value}
 96 |                 LIMIT {sample_size}
 97 |             ) AS subquery
 98 |             GROUP BY {field_name}
 99 |             HAVING COUNT(*) >= CASE WHEN (SELECT COUNT(*) FROM {table_name} LIMIT {sample_size}) < {sample_size} 
100 |             THEN (SELECT COUNT(*) FROM {table_name}) / 2 ELSE {sample_size} / 2 END;
101 |             """
102 | 
103 |             #print(sql)
104 |             with Progress(TextColumn("[progress.description]{task.description}", justify="right"), BarColumn(), TextColumn("[progress.percentage]{task.percentage:>3.0f}%"), TimeElapsedColumn()) as progress:
105 |                 task = progress.add_task(f"[cyan]Executing SQL query [bold magenta]{table_name}[/bold magenta] [cyan]where_clause [bold magenta]{where_clause_value}[/bold magenta]...", total=1)
106 |                 cursor.execute(sql)
107 |                 progress.update(task, completed=1)
108 | 
109 |             results = cursor.fetchall()
110 | 
111 |     if results:
112 |         # 如果有超过半数的重复数据
113 |         return results
114 |     else:
115 |         return False
116 | 


--------------------------------------------------------------------------------
/src/sql_extra.py:
--------------------------------------------------------------------------------
 1 | import re
 2 | 
 3 | def check_percent_position(string):
 4 |     string = string.lower()
 5 |     pattern = r".*like\s+'%|.*like\s+concat\(+'%|.*regexp\s+"
 6 |     matches = re.findall(pattern, string)
 7 |     if matches:
 8 |         like_pattern = r"like\s+(?:concat\(.*?\)|'%%'|\'.*?%(?:.*?)?\')"
 9 |         like_match = re.search(like_pattern, string)
10 |         if like_match:
11 |             return True, like_match.group()
12 |         #return True
13 |     return False, None
14 | 
15 | 
16 | def extract_function_index(string):
17 |     #pattern = r'\b(\w+)\('
18 |     #pattern = r'\b(\w+)\(.*\).*[>=<!=<>]'
19 |     pattern = r'\b(\w+(\(.*\).*[>=<!=<>]))'
20 |     matches = re.findall(pattern, string)
21 |     #function_indexes = set(matches)
22 |     function_indexes = [match[0] for match in matches]
23 |     if function_indexes:
24 |         return ', '.join(function_indexes)
25 |     else:
26 |         return False
27 | 


--------------------------------------------------------------------------------
/src/sql_format_class.py:
--------------------------------------------------------------------------------
 1 | import sqlparse
 2 | 
 3 | class SQLFormatter:
 4 |     def format_sql(self, sql_query):
 5 |         """
 6 |         格式化 SQL 查询语句
 7 |         """
 8 |         formatted_sql = sqlparse.format(sql_query, reindent=True, keyword_case='upper')
 9 | 
10 |         return formatted_sql
11 | 
12 | 
13 | 


--------------------------------------------------------------------------------
/src/sql_index.py:
--------------------------------------------------------------------------------
  1 | import textwrap
  2 | from tabulate import tabulate
  3 | import pymysql
  4 | 
  5 | def execute_index_query(mysql_settings, database, table_name, index_columns):
  6 |     index_columns = index_columns
  7 |     index_columns = index_columns.split(',')
  8 |     updated_columns = [f"'{column.strip()}'" for column in index_columns]
  9 |     final_columns = ', '.join(updated_columns)
 10 |     sql = f"SELECT TABLE_NAME,INDEX_NAME,COLUMN_NAME,CARDINALITY FROM information_schema.STATISTICS WHERE TABLE_SCHEMA = '{database}' AND TABLE_NAME = '{table_name}' AND COLUMN_NAME IN ({final_columns})"
 11 |     #print(sql)
 12 |     try:
 13 |         conn = pymysql.connect(**mysql_settings)
 14 |         cur = conn.cursor()
 15 |         cur.execute(sql)
 16 |         index_result = cur.fetchall()
 17 | 
 18 |         if not index_result:
 19 |             print(f"没有检测到 {table_name} 表 字段 {final_columns} 有索引。")
 20 | 
 21 |         # 提取列名
 22 |         e_column_names = [desc[0] for desc in cur.description]
 23 | 
 24 |         # 提取结果值并进行自动换行处理
 25 |         e_result_values = []
 26 |         for row in index_result:
 27 |             values = list(row.values())
 28 |             wrapped_values = [textwrap.fill(str(value), width=30) for value in values]
 29 |             e_result_values.append(wrapped_values)
 30 | 
 31 |         # 将结果格式化为表格（包含竖线）
 32 |         e_table = tabulate(e_result_values, headers=e_column_names, tablefmt="grid", numalign="left")
 33 | 
 34 |         return e_table
 35 | 
 36 |     except pymysql.err.ProgrammingError as e:
 37 |         print("MySQL 内部错误：",e)
 38 |         return None
 39 |     except Exception as e:
 40 |         print("MySQL 内部错误：",e)
 41 |         return None
 42 |     finally:
 43 |         if cur:
 44 |             cur.close()
 45 |         if conn:
 46 |             conn.close()
 47 | 
 48 | #########################################################
 49 | 
 50 | def check_index_exist(mysql_settings, table_name, index_column):
 51 |     show_index_sql = f"show index from {table_name} where Column_name = '{index_column}'"
 52 |     try:
 53 |         conn = pymysql.connect(**mysql_settings)
 54 |         cur = conn.cursor()
 55 |         cur.execute(show_index_sql)
 56 |         index_result = cur.fetchall()
 57 | 
 58 |         #if not index_result:
 59 |             #print(f"没有检测到 {table_name} 表 字段 {final_columns} 有索引。")
 60 | 
 61 |         return index_result
 62 | 
 63 |     except pymysql.err.ProgrammingError as e:
 64 |         print("MySQL 内部错误：",e)
 65 |         return None
 66 |     except Exception as e:
 67 |         print("MySQL 内部错误：",e)
 68 |         return None
 69 |     finally:
 70 |         if cur:
 71 |             cur.close()
 72 |         if conn:
 73 |             conn.close()
 74 | 
 75 | #########################################################
 76 | 
 77 | def check_index_exist_multi(mysql_settings, database, table_name, index_columns, index_number):
 78 |     index_columns = index_columns
 79 |     index_columns = index_columns.split(',')
 80 |     updated_columns = [f"'{column.strip()}'" for column in index_columns]
 81 |     final_columns = ', '.join(updated_columns)
 82 |     sql = f"SELECT TABLE_NAME,INDEX_NAME,COLUMN_NAME,CARDINALITY FROM information_schema.STATISTICS WHERE TABLE_SCHEMA = '{database}' AND TABLE_NAME = '{table_name}' AND COLUMN_NAME IN ({final_columns}) GROUP BY INDEX_NAME HAVING COUNT(INDEX_NAME) = {index_number}"
 83 |     #print(sql)
 84 |     try:
 85 |         conn = pymysql.connect(**mysql_settings)
 86 |         cur = conn.cursor()
 87 |         cur.execute(sql)
 88 |         index_result = cur.fetchall()
 89 | 
 90 |         if not index_result:
 91 |             return None
 92 | 
 93 |         return index_result
 94 | 
 95 |     except pymysql.err.ProgrammingError as e:
 96 |         print("MySQL 内部错误：",e)
 97 |         return None
 98 |     except Exception as e:
 99 |         print("MySQL 内部错误：",e)
100 |         return None
101 |     finally:
102 |         if cur:
103 |             cur.close()
104 |         if conn:
105 |             conn.close()
106 | 
107 | 


--------------------------------------------------------------------------------
/src/sqlai.py:
--------------------------------------------------------------------------------
 1 | from vanna.remote import VannaDefault
 2 | 
 3 | """
 4 | pip3 install vanna==0.0.36 -i "http://mirrors.aliyun.com/pypi/simple" --trusted-host "mirrors.aliyun.com"
 5 | """
 6 | 
 7 | def optimize_sql(original_sql):
 8 |     # 创建 VannaDefault 实例
 9 |     vn = VannaDefault(model='sql_helper', api_key='xxxxxxxxxxxxxxxxxxxx')
10 | 
11 |     # 输出调用信息
12 |     print('以下是调用的vanna.ai LLM接口.')
13 |     print('优化前的SQL是：')
14 |     print(original_sql)
15 |     print('-' * 55)
16 | 
17 |     # 输出优化后的 SQL
18 |     print('优化后的SQL是：')
19 | 
20 |     # 调用 VannaDefault 的 ask 方法
21 |     vn.ask('How to optimize this SQL : {}'.format(original_sql))
22 | 


--------------------------------------------------------------------------------
/src/sqlai_helper.py:
--------------------------------------------------------------------------------
  1 | import sys, re
  2 | import textwrap
  3 | from tabulate import tabulate
  4 | import pymysql
  5 | from sql_metadata import Parser
  6 | from sql_format_class import SQLFormatter
  7 | from sql_alias import has_table_alias
  8 | from sql_count_value import count_column_value, count_column_clause_value
  9 | from sql_index import execute_index_query, check_index_exist, check_index_exist_multi
 10 | from where_clause import * # 1.1版本-新增where条件表达式值
 11 | from sql_extra import *
 12 | import yaml
 13 | import argparse
 14 | from sqlai import *
 15 | 
 16 | # 创建命令行参数解析器
 17 | parser = argparse.ArgumentParser()
 18 | 
 19 | #------------
 20 | # 添加-f/--file参数，用于指定db.yaml文件的路径
 21 | parser.add_argument("-f", "--file", help="Path to db.yaml file")
 22 | 
 23 | # 添加参数，用于指定MySQL的主机名
 24 | parser.add_argument("-H", "--host", help="MySQL host")
 25 | 
 26 | # 添加参数，用于指定MySQL的端口号
 27 | parser.add_argument("-P", "--port", default=3306, type=int, help="MySQL port(default: 3306)")
 28 | 
 29 | # 添加参数，用于指定MySQL的用户名
 30 | parser.add_argument("-u", "--user", help="MySQL user")
 31 | 
 32 | # 添加参数，用于指定MySQL的密码
 33 | parser.add_argument("-p", "--password", help="MySQL password")
 34 | 
 35 | # 添加参数，用于指定MySQL的数据库名
 36 | parser.add_argument("-d", "--database", help="MySQL database name")
 37 | 
 38 | #-----------
 39 | 
 40 | # 添加--sql参数
 41 | parser.add_argument("-q", "--sql", required=True, help="SQL query")
 42 | 
 43 | # 添加--sample参数，默认值为100000，表示10万行
 44 | parser.add_argument("--sample", default=100000, type=int, help="Number of rows to sample (default: 100000)")
 45 | 
 46 | # 添加版本号参数
 47 | parser.add_argument('-v', '--version', action='version', version='sqlai_helper工具版本号: 2.1.3，更新日期：2024-10-10 <-> 支持SQL改写，合并LLM模型接口')
 48 | 
 49 | # 解析命令行参数
 50 | args = parser.parse_args()
 51 | 
 52 | # 获取样本数据行数
 53 | #sample_size = args.sample
 54 | 
 55 | if args.file:
 56 |     # 如果使用了-f参数，则必须只用-f参数且不能再使用其他MySQL配置参数
 57 |     if args.host or args.user or args.password or args.database:
 58 |         parser.error("-f/--file参数与其他MySQL配置参数不能共存。")
 59 |     
 60 |     # 获取传入的db.yaml文件路径
 61 |     file_path = args.file
 62 | 
 63 |     # 从外部的db.yaml文件加载配置
 64 |     with open(file_path, "r") as f:
 65 |         db_config = yaml.safe_load(f)
 66 | 
 67 |     # 使用加载的配置赋值给mysql_settings
 68 |     mysql_settings = {
 69 |         "host": db_config["host"],
 70 |         "port": db_config["port"],
 71 |         "user": db_config["user"],
 72 |         "passwd": db_config["passwd"],
 73 |         "database": db_config["database"],
 74 |         "cursorclass": pymysql.cursors.DictCursor
 75 |     }
 76 | else:
 77 |     # 如果没有使用-f参数，则必须指定所有MySQL配置参数
 78 |     if not all([args.host, args.user, args.password, args.database]):
 79 |         parser.error("必须指定所有MySQL配置参数，包括-H/--host、-P/--port、-u/--user、-p/--password和-d/--database。")
 80 | 
 81 |     # 获取MySQL的配置信息
 82 |     mysql_settings = {
 83 |         "host": args.host,
 84 |         "port": args.port,
 85 |         "user": args.user,
 86 |         "passwd": args.password,
 87 |         "database": args.database,
 88 |         "cursorclass": pymysql.cursors.DictCursor
 89 |     }
 90 | 
 91 | ####################################################################
 92 | # 获取样本数据行数
 93 | sample_size = args.sample
 94 | 
 95 | # 获取传入的sql_query的值
 96 | sql_query = args.sql
 97 | 
 98 | print("\n1) 你刚才输入的SQL语句是：")
 99 | print("-" * 100)
100 | # 美化SQL
101 | formatter = SQLFormatter()
102 | formatted_sql = formatter.format_sql(sql_query)
103 | print(formatted_sql)
104 | print("-" * 100)
105 | 
106 | ###########################################################################
107 | # 解析SQL，识别出表名和字段名
108 | try:
109 |     parser = Parser(sql_query)
110 |     table_names = parser.tables
111 |     # print(f"表名是: {table_names}")
112 |     table_aliases = parser.tables_aliases
113 |     data = parser.columns_dict
114 |     select_fields = data.get('select', [])
115 |     join_fields = data.get('join', [])
116 |     where_fields = data.get('where', [])
117 |     # print(f"WHERE字段是：{where_fields}")
118 |     order_by_fields = data.get('order_by', [])
119 |     group_by_fields = data.get('group_by', [])
120 |     if 'SELECT' not in sql_query.upper():
121 |         print("sql_helper工具仅支持select语句")
122 |         sys.exit(1)
123 | except Exception as e:
124 |     print("解析 SQL 出现语法错误：", str(e))
125 |     sys.exit(2)
126 | 
127 | ###########################################################################
128 | conn = pymysql.connect(**mysql_settings)
129 | cur = conn.cursor()
130 | 
131 | sql = f"EXPLAIN {sql_query}"
132 | try:
133 |     cur.execute(sql)
134 | except pymysql.err.ProgrammingError as e:
135 |     print("MySQL 内部错误：", e)
136 |     sys.exit(1)
137 | except Exception as e:
138 |     print("MySQL 内部错误：", e)
139 |     sys.exit(1)
140 | explain_result = cur.fetchall()
141 | 
142 | # 提取列名
143 | e_column_names = list(explain_result[0].keys())
144 | 
145 | # 提取结果值并进行自动换行处理
146 | e_result_values = []
147 | for row in explain_result:
148 |     values = list(row.values())
149 |     wrapped_values = [textwrap.fill(str(value), width=20) for value in values]
150 |     e_result_values.append(wrapped_values)
151 | 
152 | # 将结果格式化为表格（包含竖线）
153 | e_table = tabulate(e_result_values, headers=e_column_names, tablefmt="grid", numalign="left")
154 | 
155 | print("\n")
156 | print("2) EXPLAIN执行计划:")
157 | print(e_table)
158 | print("\n")
159 | print("3) 索引优化建议：")
160 | print("-" * 100)
161 | ###########################################################################
162 | 
163 | contains_dot = False
164 | # 判断有无where条件
165 | if len(where_fields) == 0:
166 |     print(f"你的SQL没有where条件.")
167 | else:
168 |     contains_dot = any('.' in field for field in where_fields)
169 | 
170 | # 判断如果SQL里包含on，检查on后面的字段是否有索引。
171 | if len(join_fields) != 0:
172 |     table_field_dict = {}
173 | 
174 |     for field in join_fields:
175 |         table_field = field.split('.')
176 |         if len(table_field) == 2:
177 |             table_name = table_field[0]
178 |             field_name = table_field[1]
179 |             if table_name not in table_field_dict:
180 |                 table_field_dict[table_name] = []
181 |             table_field_dict[table_name].append(field_name)
182 | 
183 |     for table_name, on_columns in table_field_dict.items():
184 |         for on_column in on_columns:
185 |             show_index_sql = f"show index from {table_name} where Column_name = '{on_column}'"
186 |             cur.execute(show_index_sql)
187 |             index_result = cur.fetchall()
188 |             if not index_result:
189 |                 print("join联表查询，on关联字段必须增加索引！")
190 |                 print(f"\033[91m需要添加索引：ALTER TABLE {table_name} ADD INDEX idx_{on_column}({on_column});\033[0m\n")
191 |                 print(f"【{table_name}】表 【{on_column}】字段，索引分析：")
192 |                 index_static = execute_index_query(mysql_settings, database=mysql_settings["database"],
193 |                                                    table_name=table_name, index_columns=on_column)
194 |                 print(index_static)
195 | 
196 | # 解析执行计划，查找需要加索引的字段
197 | for row in explain_result:
198 |     # 获取查询语句涉及的表和字段信息
199 |     table_name = row['table']
200 |     if table_name.lower().startswith('<derived'):
201 |         table_name = ""
202 | 
203 |     # 如果 table_name 是空字符串，跳过后续处理
204 |     if table_name == "":
205 |         continue
206 | 
207 |     add_index_fields = []
208 |     # 判断是否需要加索引的条件
209 |     # 2023-08-22日更新：修复join多表关联后，where条件表达式字段判断不全。
210 |     if (len(join_fields) != 0 and ((row['type'] == 'ALL' and row['key'] is None) or int(row['rows']) >= 1)) or (len(join_fields) == 0 and ((row['type'] == 'ALL' and row['key'] is None) or int(row['rows']) >= 1000)):
211 |         # 判断表是否有别名，没有别名的情况：
212 |         if has_table_alias(table_aliases) is False and contains_dot is False:
213 |             if len(where_fields) != 0:
214 |                 for where_field in where_fields:
215 |                     # 1.1版本-新增where条件表达式值
216 |                     where_clause_value = parse_where_condition(formatted_sql, where_field)
217 |                     if where_clause_value is not None:
218 |                         where_clause_value = where_clause_value.replace('\n', '').replace('\r', '')
219 |                         where_clause_value = re.sub(r'\s+', ' ', where_clause_value)
220 |                         Cardinality = count_column_clause_value(table_name, where_field, where_clause_value, mysql_settings, sample_size)
221 |                     else:
222 |                         Cardinality = count_column_value(table_name, where_field, mysql_settings, sample_size)
223 |                     if Cardinality:
224 |                         count_value = Cardinality[0]['count']
225 |                         if where_clause_value is not None:
226 |                             print(
227 |                                 f"取出表 【{table_name}】 where条件表达式 【{where_clause_value}】 {sample_size} 条记录，重复的数据有：【{count_value}】 条，没有必要为该字段创建索引。")
228 |                         else:
229 |                             print(
230 |                                 f"取出表 【{table_name}】 where条件字段 【{where_field}】 {sample_size} 条记录，重复的数据有：【{count_value}】 条，没有必要为该字段创建索引。")
231 |                     else:
232 |                         add_index_fields.append(where_field)
233 | 
234 |             if group_by_fields is not None and len(group_by_fields) != 0:
235 |                 for group_field in group_by_fields:
236 |                     #print(f"调试-表名：{table_name}, 分组字段:{group_field}")
237 |                     Cardinality = count_column_value(table_name, group_field, mysql_settings, sample_size)
238 |                     if Cardinality:
239 |                         count_value = Cardinality[0]['count']
240 |                         print(
241 |                             f"取出表 【{table_name}】 group by条件字段 【{group_field}】 {sample_size} 条记录，重复的数据有：【{count_value}】 条，没有必要为该字段创建索引。")
242 |                     else:
243 |                         add_index_fields.append(group_field)
244 | 
245 |             if len(order_by_fields) != 0:
246 |                 for order_field in order_by_fields:
247 |                     Cardinality = count_column_value(table_name, order_field, mysql_settings, sample_size)
248 |                     if Cardinality:
249 |                         count_value = Cardinality[0]['count']
250 |                         print(
251 |                             f"取出表 【{table_name}】 order by条件字段 【{order_field}】 {sample_size} 条记录，重复的数据有：【{count_value}】 条，没有必要为该字段创建索引。")
252 |                     else:
253 |                         add_index_fields.append(order_field)
254 | 
255 |             # add_index_fields = list(set(add_index_fields)) # 字段名如果一样，则去重
256 |             add_index_fields = list(dict.fromkeys(add_index_fields).keys())  # 字段名如果一样，则去重，并确保元素的位置不发生改变
257 | 
258 |             if len(add_index_fields) == 0:
259 |                 if 'index_result' not in globals():
260 |                     print(f"\n\u2192 \033[1;92m【{table_name}】 表，无需添加任何索引。\033[0m\n")
261 |                 elif index_result:
262 |                     print(f"\n\u2192 \033[1;92m【{table_name}】 表，无需添加任何索引。\033[0m\n")
263 |                 else:
264 |                     pass
265 |             elif len(add_index_fields) == 1:
266 |                 index_name = add_index_fields[0]
267 |                 index_columns = add_index_fields[0]
268 |                 index_result = check_index_exist(mysql_settings, table_name=table_name, index_column=index_columns)
269 |                 if not index_result:
270 |                     if row['key'] is None:
271 |                         print(
272 |                             f"\033[93m建议添加索引：ALTER TABLE {table_name} ADD INDEX idx_{index_name}({index_columns});\033[0m")
273 |                     elif row['key'] is not None and row['rows'] >= 1:
274 |                         print(
275 |                             f"\033[93m建议添加索引：ALTER TABLE {table_name} ADD INDEX idx_{index_name}({index_columns});\033[0m")
276 |                 else:
277 |                     print(f"\n\u2192 \033[1;92m【{table_name}】表 【{index_columns}】字段，索引已经存在，无需添加任何索引。\033[0m")
278 |                 print(f"\n【{table_name}】表 【{index_columns}】字段，索引分析：")
279 |                 index_static = execute_index_query(mysql_settings, database=mysql_settings["database"],
280 |                                                    table_name=table_name, index_columns=index_columns)
281 |                 print(index_static)
282 |                 print()
283 |             else:
284 |                 merged_name = '_'.join(add_index_fields)
285 |                 merged_columns = ','.join(add_index_fields)
286 |                 index_result_list = check_index_exist_multi(mysql_settings, database=mysql_settings["database"],
287 |                                                             table_name=table_name, index_columns=merged_columns,
288 |                                                             index_number=len(add_index_fields))
289 |                 if index_result_list is None:
290 |                     if row['key'] is None:
291 |                         print(
292 |                             f"\033[93m建议添加索引：ALTER TABLE {table_name} ADD INDEX idx_{merged_name}({merged_columns});\033[0m")
293 |                     elif row['key'] is not None and row['rows'] >= 1:
294 |                         print(
295 |                             f"\033[93m建议添加索引：ALTER TABLE {table_name} ADD INDEX idx_{merged_name}({merged_columns});\033[0m")
296 |                 else:
297 |                     print(f"\n\u2192 \033[1;92m【{table_name}】表 【{merged_columns}】字段，联合索引已经存在，无需添加任何索引。\033[0m")
298 |                 print(f"\n【{table_name}】表 【{merged_columns}】字段，索引分析：")
299 |                 index_static = execute_index_query(mysql_settings, database=mysql_settings["database"],
300 |                                                    table_name=table_name, index_columns=merged_columns)
301 |                 print(index_static)
302 |                 print()
303 | 
304 |         # 判断表是否有别名，有别名的情况：
305 |         if has_table_alias(table_aliases) is True or contains_dot is True:
306 |             if has_table_alias(table_aliases) is True:
307 |                 try:
308 |                     table_real_name = table_aliases[table_name]
309 |                 except KeyError:
310 |                     if table_name.startswith('<union'):
311 |                         table_real_name = ""
312 |             else:
313 |                 table_real_name = table_name
314 | 
315 |             if len(where_fields) != 0:
316 |                 where_matching_fields = []
317 |                 for field in where_fields:
318 |                     if field.startswith(table_real_name + '.'):
319 |                         where_matching_fields.append(field.split('.')[1])
320 |                 # print(f"where_fields: {where_fields}")
321 |                 # print(f"where_matching_fields: {where_matching_fields}")
322 |                 for where_field in where_matching_fields:
323 |                     # 1.1版本-新增where条件表达式值
324 |                     talia_clause = table_name + '.' + where_field
325 |                     where_clause_value = parse_where_condition(formatted_sql, talia_clause)
326 |                     if where_clause_value is not None:
327 |                         where_clause_value = where_clause_value.replace('\n', '').replace('\r', '')
328 |                         where_clause_value = re.sub(r'\s+', ' ', where_clause_value)
329 |                         prefix = where_clause_value.split('.')[0]
330 |                         where_clause_value = where_clause_value.replace(prefix + '.', '')
331 |                         if "." in where_clause_value:
332 |                             Cardinality = count_column_value(table_real_name, where_field, mysql_settings, sample_size)
333 |                         else:
334 |                             Cardinality = count_column_clause_value(table_real_name, where_field, where_clause_value, mysql_settings, sample_size)
335 |                     else:
336 |                         Cardinality = count_column_value(table_real_name, where_field, mysql_settings, sample_size)
337 |                     if Cardinality:
338 |                         count_value = Cardinality[0]['count']
339 |                         if where_clause_value is not None:
340 |                             print(
341 |                             f"取出表 【{table_real_name}】 where条件表达式 【{where_clause_value}】 {sample_size} 条记录，重复的数据有：【{count_value}】 条，没有必要为该字段创建索引。")
342 |                         else:
343 |                             print(
344 |                             f"取出表 【{table_real_name}】 where条件字段 【{where_field}】 {sample_size} 条记录，重复的数据有：【{count_value}】 条，没有必要为该字段创建索引。")
345 |                     else:
346 |                         add_index_fields.append(where_field)
347 | 
348 |             if group_by_fields is not None and len(group_by_fields) != 0:
349 |                 group_matching_fields = []
350 |                 for field in group_by_fields:
351 |                     if field.startswith(table_real_name + '.'):
352 |                         group_matching_fields.append(field.split('.')[1])
353 |                 for group_field in group_matching_fields:
354 |                     Cardinality = count_column_value(table_real_name, group_field, mysql_settings, sample_size)
355 |                     if Cardinality:
356 |                         count_value = Cardinality[0]['count']
357 |                         print(
358 |                             f"取出表 【{table_real_name}】 group by条件字段 【{group_field}】 {sample_size} 条记录，重复的数据有：【{count_value}】 条，没有必要为该字段创建索引。")
359 |                     else:
360 |                         add_index_fields.append(group_field)
361 | 
362 |             if len(order_by_fields) != 0:
363 |                 order_matching_fields = []
364 |                 for field in order_by_fields:
365 |                     if field.startswith(table_real_name + '.'):
366 |                         order_matching_fields.append(field.split('.')[1])
367 |                 for order_field in order_matching_fields:
368 |                     Cardinality = count_column_value(table_real_name, order_field, mysql_settings, sample_size)
369 |                     if Cardinality:
370 |                         count_value = Cardinality[0]['count']
371 |                         print(
372 |                             f"取出表 【{table_real_name}】 order by条件字段 【{order_field}】 {sample_size} 条记录，重复的数据有：【{count_value}】 条，没有必要为该字段创建索引。")
373 |                     else:
374 |                         add_index_fields.append(order_field)
375 | 
376 |             # add_index_fields = list(set(add_index_fields))  # 字段名如果一样，则去重
377 |             add_index_fields = list(dict.fromkeys(add_index_fields).keys())  # 字段名如果一样，则去重，并确保元素的位置不发生改变
378 | 
379 |             if len(add_index_fields) == 0:
380 |                 if 'index_result' not in globals():
381 |                     print(f"\n\u2192 \033[1;92m【{table_real_name}】 表，无需添加任何索引。\033[0m\n")
382 |                 elif index_result:
383 |                     print(f"\n\u2192 \033[1;92m【{table_real_name}】 表，无需添加任何索引。\033[0m\n")
384 |                 else:
385 |                     pass
386 |             elif len(add_index_fields) == 1:
387 |                 index_name = add_index_fields[0]
388 |                 index_columns = add_index_fields[0]
389 |                 index_result = check_index_exist(mysql_settings, table_name=table_real_name, index_column=index_columns)
390 |                 if not index_result:
391 |                     if row['key'] is None:
392 |                         print(
393 |                             f"\033[93m建议添加索引：ALTER TABLE {table_real_name} ADD INDEX idx_{index_name}({index_columns});\033[0m")
394 |                     elif row['key'] is not None and row['rows'] >= 1:
395 |                         print(
396 |                             f"\033[93m建议添加索引：ALTER TABLE {table_real_name} ADD INDEX idx_{index_name}({index_columns});\033[0m")
397 |                 else:
398 |                     print(f"\n\u2192 \033[1;92m【{table_real_name}】表 【{index_columns}】字段，索引已经存在，无需添加任何索引。\033[0m")
399 |                 print(f"\n【{table_real_name}】表 【{index_columns}】字段，索引分析：")
400 |                 index_static = execute_index_query(mysql_settings, database=mysql_settings["database"],
401 |                                                    table_name=table_real_name, index_columns=index_columns)
402 |                 print(index_static)
403 |                 print()
404 |             else:
405 |                 merged_name = '_'.join(add_index_fields)
406 |                 merged_columns = ','.join(add_index_fields)
407 |                 index_result_list = check_index_exist_multi(mysql_settings, database=mysql_settings["database"],
408 |                                                             table_name=table_real_name, index_columns=merged_columns,
409 |                                                             index_number=len(add_index_fields))
410 |                 if index_result_list is None:
411 |                     if row['key'] is None:
412 |                         print(
413 |                             f"\033[93m建议添加索引：ALTER TABLE {table_real_name} ADD INDEX idx_{merged_name}({merged_columns});\033[0m")
414 |                     elif row['key'] is not None and row['rows'] >= 1:
415 |                         print(
416 |                             f"\033[93m建议添加索引：ALTER TABLE {table_real_name} ADD INDEX idx_{merged_name}({merged_columns});\033[0m")
417 |                 else:
418 |                     print(f"\n\u2192 \033[1;92m【{table_real_name}】表 【{merged_columns}】字段，联合索引已经存在，无需添加任何索引。\033[0m")
419 |                 print(f"\n【{table_real_name}】表 【{merged_columns}】字段，索引分析：")
420 |                 index_static = execute_index_query(mysql_settings, database=mysql_settings["database"],
421 |                                                    table_name=table_real_name, index_columns=merged_columns)
422 |                 print(index_static)
423 |                 print()
424 | 
425 | # 关闭游标和连接
426 | cur.close()
427 | conn.close()
428 | 
429 | print("\n")
430 | print("4) 额外的建议：")
431 | print("-" * 100)
432 | 
433 | where_clause = parse_where_condition_full(formatted_sql)
434 | #print(f"where子句：{where_clause}")
435 | if where_clause:
436 |     like_r, like_expression = check_percent_position(where_clause)
437 |     if like_r is True:
438 |         print(f"like模糊匹配，百分号在首位，【{like_expression}】是不能用到索引的，例如like '%张三%'，可以考虑改成like '张三%'，这样是可以用到索引的，如果业务上不能改，可以考虑用全文索引。\n")
439 | 
440 |     function_r = extract_function_index(where_clause)
441 |     if function_r is not False:
442 |         print(f"索引列使用了函数作计算：【{function_r}】，会导致索引失效。"
443 |               f"如果你是MySQL 8.0可以考虑创建函数索引；如果你是MySQL 5.7，你要更改你的SQL逻辑了。\n")
444 | 
445 | print("\n")
446 | print("\033[1m5) AI的建议：\033[0m")
447 | print("-" * 100)
448 | optimized_sql = optimize_sql(formatted_sql)
449 | 


--------------------------------------------------------------------------------
/src/where_clause.py:
--------------------------------------------------------------------------------
 1 | import sqlparse
 2 | 
 3 | def parse_where_condition(sql, column):
 4 |     parsed = sqlparse.parse(sql)
 5 |     stmt = parsed[0]
 6 | 
 7 |     where_column = ""
 8 |     where_expression = ""
 9 |     where_value = ""
10 |     where_clause = ""
11 |     found = False
12 |     result = ""
13 | 
14 |     for token in stmt.tokens:
15 |         if isinstance(token, sqlparse.sql.Where):
16 |             where_clause = token.value
17 |             conditions = []
18 |             for cond_tok in token.tokens:
19 |                 if isinstance(cond_tok, sqlparse.sql.Comparison):
20 |                     left_token = cond_tok.left.value.strip()
21 |                     if column == left_token:
22 |                         #return f"比较运算符: {cond_tok.value.strip()}"
23 |                         return cond_tok.value.strip()
24 | 
25 |                 if isinstance(cond_tok, sqlparse.sql.Identifier) and cond_tok.value == column:
26 |                     found = True
27 | 
28 |                 if found:
29 |                     if isinstance(cond_tok, sqlparse.sql.Token) and cond_tok.value.upper() in ["OR","AND"]:
30 |                         break
31 |                     else:
32 |                         result += cond_tok.value
33 | 
34 |                 if isinstance(cond_tok, sqlparse.sql.Parenthesis) and found:
35 |                     break
36 | 
37 |     if len(result) != 0:
38 |         #return f"逻辑运算符: {result.strip()}"
39 |         return result.strip()
40 |     else:
41 |         #return "没有找到该字段的条件表达式"
42 |         return None
43 | 
44 | 
45 | def parse_where_condition_full(sql):
46 |     parsed = sqlparse.parse(sql)
47 |     stmt = parsed[0]
48 | 
49 |     where_clause = ""
50 |     found = False
51 | 
52 |     for token in stmt.tokens:
53 |         if found:
54 |             where_clause += token.value
55 |         if isinstance(token, sqlparse.sql.Where):
56 |             found = True
57 |             where_clause += token.value
58 | 
59 |     return where_clause.strip() if where_clause else None
60 | 
61 | 


--------------------------------------------------------------------------------
/test.yaml:
--------------------------------------------------------------------------------
1 | host: 192.168.198.239
2 | port: 3306
3 | user: admin
4 | passwd: 123456
5 | database: test
6 | 


--------------------------------------------------------------------------------
/web/sql_helper/README.md:
--------------------------------------------------------------------------------
 1 | ### sql_helper Web端接口（适用于Centos7 系统）
 2 | 
 3 | 一、环境搭建
 4 | 
 5 | ```
 6 | shell> yum install httpd php php-mysqlnd
 7 | shell> systemctl restart httpd.service
 8 | ```
 9 | 
10 | 1） 把 https://github.com/hcymysql/sql_helper/archive/refs/heads/main.zip 安装包解压缩到 /var/www/html/目录下
11 | 
12 | 目录结构如下：
13 | ```
14 | sql_helper
15 | ├── conn.php
16 | ├── css
17 | │   └── bootstrap.min.css
18 | ├── schema
19 | │   └── sql_helper_schema.sql
20 | ├── sql_helper_args
21 | ├── sql_helper.php
22 | └── sql_helper_result.php
23 | ```
24 | 
25 | 2）赋予sql_helper_args文件可执行权限
26 | ```
27 | shell> cd /var/www/html/sql_helper/
28 | shell> chmod 755 sql_helper_args
29 | ```
30 | 
31 | 二、sql_helper 网页端部署
32 | 
33 | 1）导入sql_helper工具表结构
34 | ```
35 | shell> mysql -uroot -p123456 < ./schema/sql_helper_schema.sql
36 | ```
37 | 
38 | 2）录入你线上的数据库信息（你可以填写从库信息）
39 | ```
40 | mysql> insert into sql_helper.dbinfo values(1,'192.168.0.11','test','admin','123456',3306);
41 | ```
42 | 
43 | 3）配置conn.php文件
44 | 
45 | -- 改成你的sql_helper库连接信息
46 | 
47 | 4）页面访问
48 | 
49 | http://yourIP/sql_helper/sql_helper.php
50 | 
51 | 加一个超链接，可方便地接入你们的自动化运维平台里。
52 | 
53 | ![image](https://github.com/hcymysql/sql_helper/assets/19261879/5657b325-0e0e-460f-a9c7-9891f9730bb3)
54 | 
55 | ![image](https://github.com/hcymysql/sql_helper/assets/19261879/1b9dfc6f-4542-406a-9cc4-0c61c44e80be)
56 | 
57 | 
58 | 
59 | 
60 | 
61 | 
62 | 


--------------------------------------------------------------------------------
/web/sql_helper/conn.php:
--------------------------------------------------------------------------------
1 | <?php
2 | 
3 | $conn = mysqli_connect("127.0.0.1", "admin", "123456", "sql_helper", "3306") or die("数据库链接错误" . mysqli_error());
4 | mysqli_query($conn, "set names utf8");
5 | 
6 | ?>
7 | 


--------------------------------------------------------------------------------
/web/sql_helper/schema/sql_helper_schema.sql:
--------------------------------------------------------------------------------
 1 | CREATE DATABASE IF NOT EXISTS `sql_helper`;
 2 | 
 3 | USE `sql_helper`;
 4 | 
 5 | DROP TABLE IF EXISTS `dbinfo`;
 6 | 
 7 | CREATE TABLE `dbinfo` (
 8 |   `id` int(11) NOT NULL AUTO_INCREMENT,
 9 |   `ip` varchar(100) DEFAULT NULL,
10 |   `dbname` varchar(100) DEFAULT NULL,
11 |   `user` varchar(500) DEFAULT NULL,
12 |   `pwd` varchar(500) DEFAULT NULL,
13 |   `port` int(11) DEFAULT NULL,
14 |   PRIMARY KEY (`id`),
15 |   KEY dbname (`dbname`)
16 | ) ENGINE=InnoDB AUTO_INCREMENT=1 DEFAULT CHARSET=utf8;
17 | 


--------------------------------------------------------------------------------
/web/sql_helper/sql_helper.php:
--------------------------------------------------------------------------------
 1 | <html>
 2 | <head>
 3 |     <meta charset="utf-8">
 4 |     <meta http-equiv="X-UA-Compatible" content="IE=edge">
 5 |     <title>SQL自助查询优化</title>
 6 |     <meta name="description" content="Ela Admin - HTML5 Admin Template">
 7 |     <meta name="viewport" content="width=device-width, initial-scale=1">
 8 | 
 9 | 
10 | <link rel="stylesheet" href="css/bootstrap.min.css">
11 | 
12 | </head>
13 | <body>
14 |                     <div class="col-lg-10" >		
15 |                         <div class="card" >
16 | 			<p><font color="#0000FF">注：选择你的数据库，然后提交SQL，平台会自动返回优化建议。例如你可以将线上跑的比较慢的SQL，拿到平台里执行，平台会根据你的SQL自动反馈优化建议。</font></p>
17 |                             <div class="card-header" align="center">
18 |                                 <strong>SQL自助查询优化（自动反馈优化建议）</strong> 
19 |                             </div>
20 |                             <div class="card-body card-block"  align="center">
21 | 					<form action="sql_helper_result.php" method="post" enctype="multipart/form-data" class="form-horizontal">
22 | 					<div class="row form-group">
23 |                                         <div class="col col-md-3"><label for="select" class=" form-control-label">选择你的数据库</label></div>
24 |                                         <div class="col-12 col-md-9">
25 | 
26 | <script>
27 | function select_db(){
28 | var select = document.getElementById("select").value;
29 | if (!select)
30 | {
31 | alert("请选择数据库！");
32 | }
33 | }
34 | </script>						
35 |                                             <select name="select_yourdb" id="select" class="form-control">
36 | 						<option value="">Please select</option>
37 | 	<?php
38 |                 require 'conn.php';
39 | 		$result = mysqli_query($conn,"SELECT dbname FROM dbinfo order by dbname ASC");
40 | 		while($row = mysqli_fetch_array($result)){
41 | 			echo "<option value=\"".$row[0]."\">".$row[0]."</option>"."<br>";
42 |     		}
43 |     	?>
44 |                                             </select>
45 |                                         </div>
46 |                                     </div>				
47 | 					
48 | 					<div class="row form-group" align="center">
49 |                                         <div class="col col-md-3"><label for="textarea-input" class=" form-control-label">输入你想优化的SQL</label></div>
50 |                                         <div class="col-12 col-md-9"><textarea name="input_yoursql" id="textarea-input" rows="9" placeholder="Content..." class="form-control"></textarea></div>
51 |                                     </div>
52 | 									
53 |                                 <div class="form-actions form-group" align="center">
54 |                                     <button type="submit" class="btn btn-primary btn-sm" onclick="select_db()">反馈优化建议</button>
55 |                                 </div>
56 | 					</div>
57 | 					</div>
58 | 					</div>
59 | 					</div>
60 | </body>
61 | </html>
62 | 


--------------------------------------------------------------------------------
/web/sql_helper/sql_helper_result.php:
--------------------------------------------------------------------------------
 1 | <html>
 2 | <head>
 3 |     <meta charset="utf-8">
 4 |     <meta http-equiv="X-UA-Compatible" content="IE=edge">
 5 |     <title>SQL自助查询优化</title>
 6 |     <meta name="description" content="Ela Admin - HTML5 Admin Template">
 7 |     <meta name="viewport" content="width=device-width, initial-scale=1">
 8 | 
 9 | 
10 | <link rel="stylesheet" href="css/bootstrap.min.css">
11 | 
12 | </head>
13 | <body>
14 | 
15 | <?php
16 | 
17 | $get_dbname=$_POST['select_yourdb'];
18 | $get_sql = $_POST['input_yoursql'];
19 | $get_sql = str_replace('`', '', $get_sql);
20 | 
21 |     if(empty($get_dbname)){
22 | 	header("Location:sql_helper.php");
23 |     }
24 | 
25 | require_once 'conn.php';
26 | 
27 | $get_db_ip="select ip,dbname,user,pwd,port from dbinfo where dbname='${get_dbname}'";
28 | 
29 | $result = mysqli_query($conn,$get_db_ip);      
30 | 
31 | list($ip,$dbname,$user,$pwd,$port) = mysqli_fetch_array($result);
32 | 
33 | $command = "./sqlai_helper -H $ip -P $port -u $user -p '$pwd' -d $dbname -q \"$get_sql\" --sample 100000";  //采集的数据越多，判断索引是否增加的概率就越高，默认采集10万条数据。
34 | 
35 | // 调用命令并获取输出结果
36 | $output = shell_exec($command);
37 | 
38 | echo '<div class="card-header">SQL 查询结果</div>';
39 | 
40 | // 使用 <pre> 标签将输出保留原始格式
41 | echo '<div class="container">';
42 | echo '<br>';
43 | echo '<pre>' . $output . '</pre>';
44 | 
45 | echo '<br><h6><a href="javascript:history.back(-1);">点击此处返回</a></h3></br>';
46 | echo '</div>';
47 | 
48 | echo "</br>";
49 | echo "<hr />";
50 | 
51 | ?>
52 | 
53 | </body>
54 | </html>
55 | 


--------------------------------------------------------------------------------