├── Flask_src ├── app.py ├── requirements.txt ├── sql_alias.py ├── sql_count_value.py ├── sql_extra.py ├── sql_format_class.py ├── sql_index.py ├── sqlai.py ├── templates │ └── index.html └── where_clause.py ├── README.md ├── deepseek_flash_src ├── app.py ├── requirements.txt ├── sql_alias.py ├── sql_count_value.py ├── sql_deepseek.py ├── sql_extra.py ├── sql_format_class.py ├── sql_index.py ├── templates │ └── index.html └── where_clause.py ├── src ├── sql_alias.py ├── sql_count_value.py ├── sql_extra.py ├── sql_format_class.py ├── sql_index.py ├── sqlai.py ├── sqlai_helper.py └── where_clause.py ├── test.yaml └── web └── sql_helper ├── README.md ├── conn.php ├── css └── bootstrap.min.css ├── schema └── sql_helper_schema.sql ├── sql_helper.php └── sql_helper_result.php /Flask_src/app.py: -------------------------------------------------------------------------------- 1 | import sys, re 2 | import textwrap 3 | from tabulate import tabulate 4 | import pymysql 5 | from sql_metadata import Parser 6 | from sql_format_class import SQLFormatter 7 | from sql_alias import has_table_alias 8 | from sql_count_value import count_column_value, count_column_clause_value 9 | from sql_index import execute_index_query, check_index_exist, check_index_exist_multi 10 | from where_clause import * 11 | from sql_extra import * 12 | import yaml 13 | import argparse 14 | from sqlai import * 15 | from flask import Flask, request, render_template 16 | 17 | app = Flask(__name__) 18 | app.config['JSON_AS_ASCII'] = False 19 | app.config['JSON_SORT_KEYS'] = False 20 | app.config['TEMPLATES_AUTO_RELOAD'] = True 21 | app.config['DEFAULT_CHARSET'] = 'utf-8' 22 | 23 | 24 | def analyze_sql(sql_query, db_config, sample_size=100000): 25 | mysql_settings = { 26 | "host": db_config["host"], 27 | "port": db_config["port"], 28 | "user": db_config["user"], 29 | "passwd": db_config["passwd"], 30 | "database": db_config["database"], 31 | "cursorclass": pymysql.cursors.DictCursor, 32 | "charset": 'utf8mb4' 33 | } 34 | 35 | try: 36 | formatted_sql = SQLFormatter().format_sql(sql_query) 37 | except UnicodeDecodeError as e: 38 | print(f"格式化 SQL 时出错: {e}, sql_query: {sql_query}") 39 | return {"error": f"格式化 SQL 时出错: {e}"} 40 | 41 | try: 42 | parser = Parser(sql_query) 43 | table_names = parser.tables 44 | table_aliases = parser.tables_aliases 45 | data = parser.columns_dict 46 | select_fields = data.get('select', []) 47 | join_fields = data.get('join', []) 48 | where_fields = data.get('where', []) 49 | order_by_fields = data.get('order_by', []) 50 | group_by_fields = data.get('group_by', []) 51 | if 'SELECT' not in sql_query.upper(): 52 | return {"error": "sql_helper工具仅支持select语句"} 53 | except Exception as e: 54 | return {"error": f"解析 SQL 出现语法错误:{str(e)}"} 55 | 56 | conn = None 57 | try: 58 | conn = pymysql.connect(**mysql_settings) 59 | cur = conn.cursor() 60 | 61 | sql = f"EXPLAIN {sql_query}" 62 | try: 63 | cur.execute(sql) 64 | except pymysql.err.ProgrammingError as e: 65 | return {"error": f"MySQL 内部错误:{e}"} 66 | except Exception as e: 67 | return {"error": f"MySQL 内部错误:{e}"} 68 | explain_result = cur.fetchall() 69 | 70 | e_column_names = list(explain_result[0].keys()) 71 | e_result_values = [] 72 | for row in explain_result: 73 | values = list(row.values()) 74 | wrapped_values = [textwrap.fill(str(value), width=20) for value in values] 75 | e_result_values.append(wrapped_values) 76 | e_table = tabulate(e_result_values, headers=e_column_names, tablefmt="grid", numalign="left") 77 | except Exception as e: 78 | return {"error": f"执行 EXPLAIN 或处理结果时出错: {e}"} 79 | finally: 80 | if conn: 81 | conn.close() 82 | 83 | index_suggestions = [] 84 | contains_dot = False 85 | if len(where_fields) == 0: 86 | index_suggestions.append(f"你的SQL没有where条件.") 87 | else: 88 | contains_dot = any('.' in field for field in where_fields) 89 | 90 | if len(join_fields) != 0: 91 | table_field_dict = {} 92 | for field in join_fields: 93 | table_field = field.split('.') 94 | if len(table_field) == 2: 95 | table_name = table_field[0] 96 | field_name = table_field[1] 97 | if table_name not in table_field_dict: 98 | table_field_dict[table_name] = [] 99 | table_field_dict[table_name].append(field_name) 100 | 101 | for table_name, on_columns in table_field_dict.items(): 102 | for on_column in on_columns: 103 | try: 104 | show_index_sql = f"show index from {table_name} where Column_name = '{on_column}'" 105 | conn = pymysql.connect(**mysql_settings) 106 | cur = conn.cursor() 107 | cur.execute(show_index_sql) 108 | index_result = cur.fetchall() 109 | if not index_result: 110 | suggestion_text = "join联表查询,on关联字段必须增加索引!\n" 111 | suggestion_text += f"\033[91m需要添加索引:ALTER TABLE {table_name} ADD INDEX idx_{on_column}({on_column});\033[0m\n" 112 | suggestion_text += f"【{table_name}】表 【{on_column}】字段,索引分析:\n" 113 | index_static = execute_index_query(mysql_settings, database=mysql_settings["database"], 114 | table_name=table_name, index_columns=on_column) 115 | suggestion_text += index_static + "\n" 116 | index_suggestions.append(suggestion_text) 117 | except Exception as e: 118 | print(f"join查询分析出错: {e}") 119 | index_suggestions.append(f"join查询分析出错: {e}") 120 | finally: 121 | if conn: 122 | conn.close() 123 | 124 | 125 | for row in explain_result: 126 | table_name = row['table'] 127 | if table_name.lower().startswith('= 1)) or (len(join_fields) == 0 and ((row['type'] == 'ALL' and row['key'] is None) or int(row['rows']) >= 1000)): 134 | if has_table_alias(table_aliases) is False and contains_dot is False: 135 | if len(where_fields) != 0: 136 | for where_field in where_fields: 137 | where_clause_value = parse_where_condition(formatted_sql, where_field) 138 | if where_clause_value is not None: 139 | where_clause_value = where_clause_value.replace('\n', '').replace('\r', '') 140 | where_clause_value = re.sub(r'\s+', ' ', where_clause_value) 141 | where_clause_value = re.sub(r'\s+', ' ', where_clause_value) 142 | Cardinality = count_column_clause_value(table_name, where_field, where_clause_value, mysql_settings, sample_size) 143 | else: 144 | Cardinality = count_column_value(table_name, where_field, mysql_settings, sample_size) 145 | if Cardinality: 146 | count_value = Cardinality[0]['count'] 147 | if where_clause_value is not None: 148 | suggestion_text = f"取出表 【{table_name}】 where条件表达式 【{where_clause_value}】 {sample_size} 条记录,重复的数据有:【{count_value}】 条,没有必要为该字段创建索引。\n" 149 | index_suggestions.append(suggestion_text) 150 | else: 151 | suggestion_text = f"取出表 【{table_name}】 where条件字段 【{where_field}】 {sample_size} 条记录,重复的数据有:【{count_value}】 条,没有必要为该字段创建索引。\n" 152 | index_suggestions.append(suggestion_text) 153 | else: 154 | add_index_fields.append(where_field) 155 | 156 | if group_by_fields is not None and len(group_by_fields) != 0: 157 | for group_field in group_by_fields: 158 | Cardinality = count_column_value(table_name, group_field, mysql_settings, sample_size) 159 | if Cardinality: 160 | count_value = Cardinality[0]['count'] 161 | suggestion_text = f"取出表 【{table_name}】 group by条件字段 【{group_field}】 {sample_size} 条记录,重复的数据有:【{count_value}】 条,没有必要为该字段创建索引。\n" 162 | index_suggestions.append(suggestion_text) 163 | else: 164 | add_index_fields.append(group_field) 165 | 166 | if len(order_by_fields) != 0: 167 | for order_field in order_by_fields: 168 | Cardinality = count_column_value(table_name, order_field, mysql_settings, sample_size) 169 | if Cardinality: 170 | count_value = Cardinality[0]['count'] 171 | suggestion_text = f"取出表 【{table_name}】 order by条件字段 【{order_field}】 {sample_size} 条记录,重复的数据有:【{count_value}】 条,没有必要为该字段创建索引。\n" 172 | index_suggestions.append(suggestion_text) 173 | else: 174 | add_index_fields.append(order_field) 175 | 176 | add_index_fields = list(dict.fromkeys(add_index_fields).keys()) 177 | 178 | if len(add_index_fields) == 0: 179 | if not index_suggestions: 180 | index_suggestions.append(f"\n\u2192 \033[1;92m【{table_name}】 表,无需添加任何索引。\033[0m\n") 181 | else: 182 | index_suggestions.append(f"\n\u2192 \033[1;92m【{table_name}】 表,无需添加任何索引。\033[0m\n") 183 | elif len(add_index_fields) == 1: 184 | index_name = add_index_fields[0] 185 | index_columns = add_index_fields[0] 186 | try: 187 | conn = pymysql.connect(**mysql_settings) 188 | cur = conn.cursor() 189 | index_result = check_index_exist(mysql_settings, table_name=table_name, index_column=index_columns) 190 | if not index_result: 191 | suggestion_text = "" 192 | if row['key'] is None: 193 | suggestion_text += f"\033[93m建议添加索引:ALTER TABLE {table_name} ADD INDEX idx_{index_name}({index_columns});\033[0m\n" 194 | elif row['key'] is not None and row['rows'] >= 1: 195 | suggestion_text += f"\033[93m建议添加索引:ALTER TABLE {table_name} ADD INDEX idx_{index_name}({index_columns});\033[0m\n" 196 | suggestion_text += f"\n【{table_name}】表 【{index_columns}】字段,索引分析:\n" 197 | index_static = execute_index_query(mysql_settings, database=mysql_settings["database"], 198 | table_name=table_name, index_columns=index_columns) 199 | suggestion_text += index_static + "\n" 200 | index_suggestions.append(suggestion_text) 201 | else: 202 | suggestion_text = f"\n\u2192 \033[1;92m【{table_name}】表 【{index_columns}】字段,索引已经存在,无需添加任何索引。\033[0m\n" 203 | suggestion_text += f"\n【{table_name}】表 【{index_columns}】字段,索引分析:\n" 204 | index_static = execute_index_query(mysql_settings, database=mysql_settings["database"], 205 | table_name=table_name, index_columns=index_columns) 206 | suggestion_text += index_static + "\n" 207 | index_suggestions.append(suggestion_text) 208 | except Exception as e: 209 | print(f"单个索引分析出错: {e}") 210 | index_suggestions.append(f"单个索引分析出错: {e}") 211 | finally: 212 | if conn: 213 | conn.close() 214 | 215 | else: 216 | merged_name = '_'.join(add_index_fields) 217 | merged_columns = ','.join(add_index_fields) 218 | try: 219 | conn = pymysql.connect(**mysql_settings) 220 | cur = conn.cursor() 221 | index_result_list = check_index_exist_multi(mysql_settings, database=mysql_settings["database"], 222 | table_name=table_name, index_columns=merged_columns, 223 | index_number=len(add_index_fields)) 224 | if index_result_list is None: 225 | suggestion_text = "" 226 | if row['key'] is None: 227 | suggestion_text += f"\033[93m建议添加索引:ALTER TABLE {table_name} ADD INDEX idx_{merged_name}({merged_columns});\033[0m\n" 228 | elif row['key'] is not None and row['rows'] >= 1: 229 | suggestion_text += f"\033[93m建议添加索引:ALTER TABLE {table_name} ADD INDEX idx_{merged_name}({merged_columns});\033[0m\n" 230 | suggestion_text += f"\n【{table_name}】表 【{merged_columns}】字段,索引分析:\n" 231 | index_static = execute_index_query(mysql_settings, database=mysql_settings["database"], 232 | table_name=table_name, index_columns=merged_columns) 233 | suggestion_text += index_static + "\n" 234 | index_suggestions.append(suggestion_text) 235 | else: 236 | suggestion_text = f"\n\u2192 \033[1;92m【{table_name}】表 【{merged_columns}】字段,联合索引已经存在,无需添加任何索引。\033[0m\n" 237 | suggestion_text += f"\n【{table_name}】表 【{merged_columns}】字段,索引分析:\n" 238 | index_static = execute_index_query(mysql_settings, database=mysql_settings["database"], 239 | table_name=table_name, index_columns=merged_columns) 240 | suggestion_text += index_static + "\n" 241 | index_suggestions.append(suggestion_text) 242 | except Exception as e: 243 | print(f"联合索引分析出错: {e}") 244 | index_suggestions.append(f"联合索引分析出错: {e}") 245 | finally: 246 | if conn: 247 | conn.close() 248 | 249 | 250 | if has_table_alias(table_aliases) is True or contains_dot is True: 251 | if has_table_alias(table_aliases) is True: 252 | try: 253 | table_real_name = table_aliases[table_name] 254 | except KeyError: 255 | if table_name.startswith('= 1: 336 | suggestion_text += f"\033[93m建议添加索引:ALTER TABLE {table_real_name} ADD INDEX idx_{index_name}({index_columns});\033[0m\n" 337 | suggestion_text += f"\n【{table_real_name}】表 【{index_columns}】字段,索引分析:\n" 338 | index_static = execute_index_query(mysql_settings, database=mysql_settings["database"], 339 | table_name=table_real_name, index_columns=index_columns) 340 | suggestion_text += index_static + "\n" 341 | index_suggestions.append(suggestion_text) 342 | else: 343 | suggestion_text = f"\n\u2192 \033[1;92m【{table_real_name}】表 【{index_columns}】字段,索引已经存在,无需添加任何索引。\033[0m\n" 344 | suggestion_text += f"\n【{table_real_name}】表 【{index_columns}】字段,索引分析:\n" 345 | index_static = execute_index_query(mysql_settings, database=mysql_settings["database"], 346 | table_name=table_real_name, index_columns=index_columns) 347 | suggestion_text += index_static + "\n" 348 | index_suggestions.append(suggestion_text) 349 | except Exception as e: 350 | print(f"单个别名表索引分析出错: {e}") 351 | index_suggestions.append(f"单个别名表索引分析出错: {e}") 352 | finally: 353 | if conn: 354 | conn.close() 355 | else: 356 | merged_name = '_'.join(add_index_fields) 357 | merged_columns = ','.join(add_index_fields) 358 | try: 359 | conn = pymysql.connect(**mysql_settings) 360 | cur = conn.cursor() 361 | index_result_list = check_index_exist_multi(mysql_settings, database=mysql_settings["database"], 362 | table_name=table_real_name, index_columns=merged_columns, 363 | index_number=len(add_index_fields)) 364 | if index_result_list is None: 365 | suggestion_text = "" 366 | if row['key'] is None: 367 | suggestion_text += f"\033[93m建议添加索引:ALTER TABLE {table_real_name} ADD INDEX idx_{merged_name}({merged_columns});\033[0m\n" 368 | elif row['key'] is not None and row['rows'] >= 1: 369 | suggestion_text += f"\033[93m建议添加索引:ALTER TABLE {table_real_name} ADD INDEX idx_{merged_name}({merged_columns});\033[0m\n" 370 | suggestion_text += f"\n【{table_real_name}】表 【{merged_columns}】字段,索引分析:\n" 371 | index_static = execute_index_query(mysql_settings, database=mysql_settings["database"], 372 | table_name=table_real_name, index_columns=merged_columns) 373 | suggestion_text += index_static + "\n" 374 | index_suggestions.append(suggestion_text) 375 | else: 376 | suggestion_text = f"\n\u2192 \033[1;92m【{table_real_name}】表 【{merged_columns}】字段,联合索引已经存在,无需添加任何索引。\033[0m\n" 377 | suggestion_text += f"\n【{table_real_name}】表 【{merged_columns}】字段,索引分析:\n" 378 | index_static = execute_index_query(mysql_settings, database=mysql_settings["database"], 379 | table_name=table_real_name, index_columns=merged_columns) 380 | suggestion_text += index_static + "\n" 381 | index_suggestions.append(suggestion_text) 382 | except Exception as e: 383 | print(f"联合别名表索引分析出错: {e}") 384 | index_suggestions.append(f"联合别名表索引分析出错: {e}") 385 | finally: 386 | if conn: 387 | conn.close() 388 | 389 | extra_suggestions = [] 390 | where_clause = parse_where_condition_full(formatted_sql) 391 | if where_clause: 392 | like_r, like_expression = check_percent_position(where_clause) 393 | if like_r is True: 394 | extra_suggestions.append(f"like模糊匹配,百分号在首位,【{like_expression}】是不能用到索引的,例如like '%张三%',可以考虑改成like '张三%',这样是可以用到索引的,如果业务上不能改,可以考虑用全文索引。\n") 395 | 396 | function_r = extract_function_index(where_clause) 397 | if function_r is not False: 398 | extra_suggestions.append(f"索引列使用了函数作计算:【{function_r}】,会导致索引失效。" 399 | f"如果你是MySQL 8.0可以考虑创建函数索引;如果你是MySQL 5.7,你要更改你的SQL逻辑了。\n") 400 | 401 | try: 402 | ai_suggestions = optimize_sql(formatted_sql) 403 | if not isinstance(ai_suggestions, str): 404 | ai_suggestions = str(ai_suggestions) 405 | #ai_suggestions = ai_suggestions.replace("If you want to run the SQL query, connect to a database first. See here: https://vanna.ai/docs/databases.html", "").strip() 406 | #ai_suggestions = ai_suggestions.replace("-------------------------------------------------------", "").strip() 407 | except Exception as e: 408 | ai_suggestions = f"调用 AI 优化出错: {e}" 409 | print(f"调用 AI 优化出错: {e}") 410 | 411 | return { 412 | "formatted_sql": formatted_sql, 413 | "explain_table": e_table, 414 | "index_suggestions": "".join(index_suggestions), 415 | "extra_suggestions": "".join(extra_suggestions), 416 | "ai_suggestions": ai_suggestions, 417 | "no_suggestions": not index_suggestions and not extra_suggestions and not ai_suggestions 418 | } 419 | 420 | 421 | @app.route("/", methods=["GET", "POST"]) 422 | def index(): 423 | if request.method == "POST": 424 | sql_query = request.form.get("sql_query") 425 | host = request.form.get("host") 426 | port = request.form.get("port", type=int, default=3306) 427 | user = request.form.get("user") 428 | password = request.form.get("password") 429 | database = request.form.get("database") 430 | 431 | db_config = {} 432 | config_source = "" 433 | 434 | if not all([host, user, password, database]): 435 | return render_template("index.html", error="请提供完整的 MySQL 数据库连接信息 (请手动填写所有数据库参数).", 436 | sql_query=sql_query, 437 | host=host, port=port, user=user, database=database) 438 | db_config = { 439 | "host": host, 440 | "port": port, 441 | "user": user, 442 | "passwd": password, 443 | "database": database 444 | } 445 | config_source = "params" 446 | 447 | analysis_result = analyze_sql(sql_query, db_config) 448 | 449 | if "error" in analysis_result: 450 | return render_template("index.html", error=analysis_result["error"], 451 | sql_query=sql_query, 452 | host=host, port=port, user=user, database=database, 453 | config_source=config_source) 454 | 455 | return render_template("index.html", 456 | formatted_sql=analysis_result["formatted_sql"], 457 | explain_table=analysis_result["explain_table"], 458 | index_suggestions=analysis_result["index_suggestions"], 459 | extra_suggestions=analysis_result["extra_suggestions"], 460 | ai_suggestions=analysis_result["ai_suggestions"], 461 | no_suggestions=analysis_result["no_suggestions"], 462 | sql_query=sql_query, 463 | host=host, port=port, user=user, database=database, 464 | config_source=config_source 465 | ) 466 | 467 | return render_template("index.html") 468 | 469 | if __name__ == "__main__": 470 | app.run(host='0.0.0.0', debug=True) 471 | -------------------------------------------------------------------------------- /Flask_src/requirements.txt: -------------------------------------------------------------------------------- 1 | Flask 2 | PyMySQL 3 | sql-metadata 4 | tabulate 5 | rich 6 | PyYAML 7 | vanna==0.0.36 8 | sqlparse 9 | -------------------------------------------------------------------------------- /Flask_src/sql_alias.py: -------------------------------------------------------------------------------- 1 | def has_table_alias(table_alias): 2 | if isinstance(table_alias, dict): 3 | table_alias = {k.lower(): v.lower() for k, v in table_alias.items()} 4 | if 'join' in table_alias or 'on' in table_alias or 'where' in table_alias or 'group by' in table_alias or 'order by' in table_alias or 'limit' in table_alias or not table_alias: 5 | return False # 没有别名 6 | else: 7 | return True #有别名 8 | elif isinstance(table_alias, list): 9 | return False # 没有别名 10 | else: 11 | pass 12 | -------------------------------------------------------------------------------- /Flask_src/sql_count_value.py: -------------------------------------------------------------------------------- 1 | import pymysql 2 | from rich.progress import Progress, TimeElapsedColumn, TextColumn, BarColumn 3 | 4 | def count_column_value(table_name, field_name, mysql_settings, sample_size): 5 | with pymysql.connect(**mysql_settings) as conn: 6 | with conn.cursor() as cursor: 7 | """ 8 | 在这个查询中,使用了CASE语句来判断数据行数是否小于100000。如果数据行数小于100000,则使用 9 | (SELECT COUNT(*) FROM {table_name}) / 2 10 | 作为阈值,即表的实际大小除以2;否则使用 {sample_size} / 2 作为阈值。 11 | """ 12 | 13 | # 如果你的数据库是MySQL 8.0,那么推荐用 CTE(公共表达式)的形式 14 | ''' 15 | sql = f""" 16 | WITH subquery AS ( 17 | SELECT {field_name} 18 | FROM {table_name} 19 | LIMIT {sample_size} 20 | ) 21 | SELECT COUNT(*) as count 22 | FROM subquery 23 | GROUP BY {field_name} 24 | HAVING COUNT(*) >= 25 | CASE 26 | WHEN (SELECT COUNT(*) FROM subquery) < {sample_size} THEN (SELECT COUNT(*) FROM {table_name}) / 2 27 | ELSE {sample_size} / 2 28 | END; 29 | """ 30 | ''' 31 | 32 | # 默认采用子查询兼容MySQL 5.7版本 33 | sql = f""" 34 | SELECT COUNT(*) as count 35 | FROM ( 36 | SELECT {field_name} 37 | FROM {table_name} 38 | LIMIT {sample_size} 39 | ) AS subquery 40 | GROUP BY {field_name} 41 | HAVING COUNT(*) >= CASE WHEN (SELECT COUNT(*) FROM {table_name} LIMIT {sample_size}) < {sample_size} 42 | THEN (SELECT COUNT(*) FROM {table_name}) / 2 ELSE {sample_size} / 2 END; 43 | """ 44 | 45 | # print(sql) 46 | with Progress(TextColumn("[progress.description]{task.description}", justify="right"), BarColumn(), TextColumn("[progress.percentage]{task.percentage:>3.0f}%"), TimeElapsedColumn()) as progress: 47 | task = progress.add_task(f"[cyan]Executing SQL query [bold magenta]{table_name}[/bold magenta] [cyan]where_clause [bold magenta]{field_name}[/bold magenta]...", total=1) 48 | cursor.execute(sql) 49 | progress.update(task, completed=1) 50 | 51 | results = cursor.fetchall() 52 | 53 | if results: 54 | # 如果有超过半数的重复数据 55 | return results 56 | else: 57 | return False 58 | 59 | 60 | def count_column_clause_value(table_name, field_name, where_clause_value, mysql_settings, sample_size): 61 | with pymysql.connect(**mysql_settings) as conn: 62 | with conn.cursor() as cursor: 63 | 64 | """ 65 | 在这个查询中,使用了CASE语句来判断数据行数是否小于100000。如果数据行数小于100000,则使用 66 | (SELECT COUNT(*) FROM {table_name}) / 2 67 | 作为阈值,即表的实际大小除以2;否则使用 {sample_size} / 2 作为阈值。 68 | """ 69 | 70 | # 如果你的数据库是MySQL 8.0,那么推荐用 CTE(公共表达式)的形式 71 | ''' 72 | sql = f""" 73 | WITH subquery AS ( 74 | SELECT {field_name} 75 | FROM {table_name} 76 | LIMIT {sample_size} 77 | ) 78 | SELECT COUNT(*) as count 79 | FROM subquery 80 | GROUP BY {field_name} 81 | HAVING COUNT(*) >= 82 | CASE 83 | WHEN (SELECT COUNT(*) FROM subquery) < {sample_size} THEN (SELECT COUNT(*) FROM {table_name}) / 2 84 | ELSE {sample_size} / 2 85 | END; 86 | """ 87 | ''' 88 | 89 | # 默认采用子查询兼容MySQL 5.7版本 90 | sql = f""" 91 | SELECT COUNT(*) as count 92 | FROM ( 93 | SELECT {field_name} 94 | FROM {table_name} 95 | WHERE {where_clause_value} 96 | LIMIT {sample_size} 97 | ) AS subquery 98 | GROUP BY {field_name} 99 | HAVING COUNT(*) >= CASE WHEN (SELECT COUNT(*) FROM {table_name} LIMIT {sample_size}) < {sample_size} 100 | THEN (SELECT COUNT(*) FROM {table_name}) / 2 ELSE {sample_size} / 2 END; 101 | """ 102 | 103 | #print(sql) 104 | with Progress(TextColumn("[progress.description]{task.description}", justify="right"), BarColumn(), TextColumn("[progress.percentage]{task.percentage:>3.0f}%"), TimeElapsedColumn()) as progress: 105 | task = progress.add_task(f"[cyan]Executing SQL query [bold magenta]{table_name}[/bold magenta] [cyan]where_clause [bold magenta]{where_clause_value}[/bold magenta]...", total=1) 106 | cursor.execute(sql) 107 | progress.update(task, completed=1) 108 | 109 | results = cursor.fetchall() 110 | 111 | if results: 112 | # 如果有超过半数的重复数据 113 | return results 114 | else: 115 | return False 116 | -------------------------------------------------------------------------------- /Flask_src/sql_extra.py: -------------------------------------------------------------------------------- 1 | import re 2 | 3 | def check_percent_position(string): 4 | string = string.lower() 5 | pattern = r".*like\s+'%|.*like\s+concat\(+'%|.*regexp\s+" 6 | matches = re.findall(pattern, string) 7 | if matches: 8 | like_pattern = r"like\s+(?:concat\(.*?\)|'%%'|\'.*?%(?:.*?)?\')" 9 | like_match = re.search(like_pattern, string) 10 | if like_match: 11 | return True, like_match.group() 12 | #return True 13 | return False, None 14 | 15 | 16 | def extract_function_index(string): 17 | #pattern = r'\b(\w+)\(' 18 | #pattern = r'\b(\w+)\(.*\).*[>=]' 19 | pattern = r'\b(\w+(\(.*\).*[>=]))' 20 | matches = re.findall(pattern, string) 21 | #function_indexes = set(matches) 22 | function_indexes = [match[0] for match in matches] 23 | if function_indexes: 24 | return ', '.join(function_indexes) 25 | else: 26 | return False 27 | -------------------------------------------------------------------------------- /Flask_src/sql_format_class.py: -------------------------------------------------------------------------------- 1 | import sqlparse 2 | 3 | class SQLFormatter: 4 | def format_sql(self, sql_query): 5 | """ 6 | 格式化 SQL 查询语句 7 | """ 8 | formatted_sql = sqlparse.format(sql_query, reindent=True, keyword_case='upper') 9 | 10 | return formatted_sql 11 | 12 | 13 | -------------------------------------------------------------------------------- /Flask_src/sql_index.py: -------------------------------------------------------------------------------- 1 | import textwrap 2 | from tabulate import tabulate 3 | import pymysql 4 | 5 | def execute_index_query(mysql_settings, database, table_name, index_columns): 6 | index_columns = index_columns 7 | index_columns = index_columns.split(',') 8 | updated_columns = [f"'{column.strip()}'" for column in index_columns] 9 | final_columns = ', '.join(updated_columns) 10 | sql = f"SELECT TABLE_NAME,INDEX_NAME,COLUMN_NAME,CARDINALITY FROM information_schema.STATISTICS WHERE TABLE_SCHEMA = '{database}' AND TABLE_NAME = '{table_name}' AND COLUMN_NAME IN ({final_columns})" 11 | #print(sql) 12 | try: 13 | conn = pymysql.connect(**mysql_settings) 14 | cur = conn.cursor() 15 | cur.execute(sql) 16 | index_result = cur.fetchall() 17 | 18 | if not index_result: 19 | print(f"没有检测到 {table_name} 表 字段 {final_columns} 有索引。") 20 | 21 | # 提取列名 22 | e_column_names = [desc[0] for desc in cur.description] 23 | 24 | # 提取结果值并进行自动换行处理 25 | e_result_values = [] 26 | for row in index_result: 27 | values = list(row.values()) 28 | wrapped_values = [textwrap.fill(str(value), width=30) for value in values] 29 | e_result_values.append(wrapped_values) 30 | 31 | # 将结果格式化为表格(包含竖线) 32 | e_table = tabulate(e_result_values, headers=e_column_names, tablefmt="grid", numalign="left") 33 | 34 | return e_table 35 | 36 | except pymysql.err.ProgrammingError as e: 37 | print("MySQL 内部错误:",e) 38 | return None 39 | except Exception as e: 40 | print("MySQL 内部错误:",e) 41 | return None 42 | finally: 43 | if cur: 44 | cur.close() 45 | if conn: 46 | conn.close() 47 | 48 | ######################################################### 49 | 50 | def check_index_exist(mysql_settings, table_name, index_column): 51 | show_index_sql = f"show index from {table_name} where Column_name = '{index_column}'" 52 | try: 53 | conn = pymysql.connect(**mysql_settings) 54 | cur = conn.cursor() 55 | cur.execute(show_index_sql) 56 | index_result = cur.fetchall() 57 | 58 | #if not index_result: 59 | #print(f"没有检测到 {table_name} 表 字段 {final_columns} 有索引。") 60 | 61 | return index_result 62 | 63 | except pymysql.err.ProgrammingError as e: 64 | print("MySQL 内部错误:",e) 65 | return None 66 | except Exception as e: 67 | print("MySQL 内部错误:",e) 68 | return None 69 | finally: 70 | if cur: 71 | cur.close() 72 | if conn: 73 | conn.close() 74 | 75 | ######################################################### 76 | 77 | def check_index_exist_multi(mysql_settings, database, table_name, index_columns, index_number): 78 | index_columns = index_columns 79 | index_columns = index_columns.split(',') 80 | updated_columns = [f"'{column.strip()}'" for column in index_columns] 81 | final_columns = ', '.join(updated_columns) 82 | sql = f"SELECT TABLE_NAME,INDEX_NAME,COLUMN_NAME,CARDINALITY FROM information_schema.STATISTICS WHERE TABLE_SCHEMA = '{database}' AND TABLE_NAME = '{table_name}' AND COLUMN_NAME IN ({final_columns}) GROUP BY INDEX_NAME HAVING COUNT(INDEX_NAME) = {index_number}" 83 | #print(sql) 84 | try: 85 | conn = pymysql.connect(**mysql_settings) 86 | cur = conn.cursor() 87 | cur.execute(sql) 88 | index_result = cur.fetchall() 89 | 90 | if not index_result: 91 | return None 92 | 93 | return index_result 94 | 95 | except pymysql.err.ProgrammingError as e: 96 | print("MySQL 内部错误:",e) 97 | return None 98 | except Exception as e: 99 | print("MySQL 内部错误:",e) 100 | return None 101 | finally: 102 | if cur: 103 | cur.close() 104 | if conn: 105 | conn.close() 106 | 107 | -------------------------------------------------------------------------------- /Flask_src/sqlai.py: -------------------------------------------------------------------------------- 1 | from vanna.remote import VannaDefault 2 | 3 | def optimize_sql(original_sql): 4 | """ 5 | 调用 vanna.ai LLM 接口优化 SQL 查询语句,并返回优化后的 SQL 字符串。 6 | 7 | Args: 8 | original_sql (str): 原始的 SQL 查询语句。 9 | 10 | Returns: 11 | str: 优化后的 SQL 查询语句。 如果调用过程中发生错误,则返回包含错误信息的字符串。 12 | """ 13 | try: 14 | # 创建 VannaDefault 实例 15 | vn = VannaDefault(model='sql_helper', api_key='xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx') 16 | 17 | # 输出调用信息 (可选,仅用于调试) 18 | print('\033[94m以下是调用的vanna.ai LLM接口.\033[0m') 19 | print('优化前的SQL是:') 20 | print(original_sql) 21 | print('-' * 55) 22 | 23 | # 获取优化后的 SQL 24 | #vn.ask('How to optimize this SQL : {}'.format(original_sql)) 25 | optimized_sql = vn.generate_sql('How to optimize this SQL : {}'.format(original_sql)) 26 | 27 | # 输出优化后的 SQL (可选,仅用于调试) 28 | print('\033[92m优化后的SQL是:\033[0m') 29 | 30 | return optimized_sql 31 | 32 | except Exception as e: 33 | # 捕获异常,返回包含错误信息的字符串 34 | error_message = f"调用 vanna.ai 优化出错: {e}" 35 | print(error_message) # 打印错误信息到控制台 (可选) 36 | return error_message 37 | -------------------------------------------------------------------------------- /Flask_src/templates/index.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | SQLAI Helper 工具 6 | 7 | 145 | 146 | 147 |
148 |

SQLAI Helper 工具

149 | 150 |
151 |
152 | 153 | 154 | 155 |

数据库连接配置 (手动填写数据库参数)

156 | 157 |
158 |
159 | 160 | 161 |
162 |
163 | 164 | 165 |
166 |
167 | 168 | 169 |
170 |
171 | 172 | 173 |
174 |
175 | 176 | 177 |
178 |
179 | 180 |
181 |
182 | 183 |
184 |
185 |
186 | 187 | 188 | 192 | 193 | {% if error %} 194 |
195 |

错误信息:

196 |
{{ error|default('未检测到错误')|safe }}
197 |
198 | {% endif %} 199 | 200 | {% if formatted_sql %} 201 |
202 |

1) 你刚才输入的 SQL 语句是:

203 |
{{ formatted_sql|default('无输入 SQL')|safe }}
204 |
205 | {% endif %} 206 | 207 | {% if explain_table %} 208 |
209 |

2) EXPLAIN 执行计划:

210 | {{ explain_table|safe }} 211 |
212 | {% endif %} 213 | 214 | {% if index_suggestions %} 215 |
216 |

3) 索引优化建议:

217 |
{{ index_suggestions|default('无索引优化建议')|safe }}
218 |
219 | {% endif %} 220 | 221 | {% if extra_suggestions %} 222 |
223 |

4) 额外的建议:

224 |
{{ extra_suggestions|default('无额外建议')|safe }}
225 |
226 | {% endif %} 227 | 228 | {% if ai_suggestions %} 229 |
230 |

5) DeepSeek 的建议:

231 | 232 | 233 | 234 | 235 | 236 | 237 | 238 | 239 | 240 | 241 | 242 |
优化后的 SQL 语句
{{ ai_suggestions|default('暂无 AI 建议')|safe }}
243 |
244 | {% endif %} 245 | 246 | {% if no_suggestions and formatted_sql and explain_table %} 247 |
248 |

分析结果:

249 |
SQL 语句分析完成,当前 SQL 语句执行计划良好,没有额外的优化建议!
250 |
251 | {% endif %} 252 |
253 | 254 | 255 | 256 | 291 | 292 | 293 | -------------------------------------------------------------------------------- /Flask_src/where_clause.py: -------------------------------------------------------------------------------- 1 | import sqlparse 2 | 3 | def parse_where_condition(sql, column): 4 | parsed = sqlparse.parse(sql) 5 | stmt = parsed[0] 6 | 7 | where_column = "" 8 | where_expression = "" 9 | where_value = "" 10 | where_clause = "" 11 | found = False 12 | result = "" 13 | 14 | for token in stmt.tokens: 15 | if isinstance(token, sqlparse.sql.Where): 16 | where_clause = token.value 17 | conditions = [] 18 | for cond_tok in token.tokens: 19 | if isinstance(cond_tok, sqlparse.sql.Comparison): 20 | left_token = cond_tok.left.value.strip() 21 | if column == left_token: 22 | #return f"比较运算符: {cond_tok.value.strip()}" 23 | return cond_tok.value.strip() 24 | 25 | if isinstance(cond_tok, sqlparse.sql.Identifier) and cond_tok.value == column: 26 | found = True 27 | 28 | if found: 29 | if isinstance(cond_tok, sqlparse.sql.Token) and cond_tok.value.upper() in ["OR","AND"]: 30 | break 31 | else: 32 | result += cond_tok.value 33 | 34 | if isinstance(cond_tok, sqlparse.sql.Parenthesis) and found: 35 | break 36 | 37 | if len(result) != 0: 38 | #return f"逻辑运算符: {result.strip()}" 39 | return result.strip() 40 | else: 41 | #return "没有找到该字段的条件表达式" 42 | return None 43 | 44 | 45 | def parse_where_condition_full(sql): 46 | parsed = sqlparse.parse(sql) 47 | stmt = parsed[0] 48 | 49 | where_clause = "" 50 | found = False 51 | 52 | for token in stmt.tokens: 53 | if found: 54 | where_clause += token.value 55 | if isinstance(token, sqlparse.sql.Where): 56 | found = True 57 | where_clause += token.value 58 | 59 | return where_clause.strip() if where_clause else None 60 | 61 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # sql_helper - 输入SQL自动给出索引优化建议+SQL重写建议 2 | 3 | #### 2025年2月26日更新:sqlai_helper 接入DeepSeek 4 | ``` 5 | 注:openai库需要调用ssl,由于python3.10之后版本不在支持libressl使用ssl,需要用openssl1.1.1版本或者更高版本 6 | 7 | 参见:python3.10编译安装报SSL失败解决方法 8 | https://blog.csdn.net/mdh17322249/article/details/123966953 9 | ``` 10 | ``` 11 | 第一步、先去 https://platform.deepseek.com/api_keys 申请密钥并充值1元,替换sql_deepseek.py文件里的密钥。 12 | 第二步,运行: 13 | shell> cd sql_helper_1.1/deepseek_flash_src/ 14 | shell> pip3 install -r requirements.txt -i "http://mirrors.aliyun.com/pypi/simple" --trusted-host "mirrors.aliyun.com" 15 | shell> python3 app.py 16 | * Running on http://192.168.137.131:5000 17 | ``` 18 | 19 | #### 2025年2月20日更新:sqlai_helper 增加 Flask,无需部署PHP,可直接访问。 20 | ``` 21 | shell> pip3 install -r requirements.txt -i "http://mirrors.aliyun.com/pypi/simple" --trusted-host "mirrors.aliyun.com" 22 | shell> python3 app.py 23 | * Running on http://192.168.137.131:5000 24 | ``` 25 | 26 | ![image](https://github.com/user-attachments/assets/f66d977b-8948-4990-be58-d9647a0f6906) 27 | 28 | #### sqlai_helper工具版本号: 2.1.3,更新日期:2024-10-10 <-> 支持SQL改写,合并LLM模型接口 29 | 链接: https://github.com/hcymysql/sql_helper/releases/tag/sqlai_helper_v2.1.3 30 | ``` 31 | #### ★贡献:ThinkSQL 类似 ThinkPHP 的数据库引擎,集成sql_helper 32 | #### ※ThinkSQL地址: https://pypi.org/project/think-sql/ 33 | 34 | #### 2023-09-05日更新:1.1.1版本-新增where条件表达式值判断,索引推荐更加精准。 35 | #### 2023-09-06日更新:1.1.2版本-新增额外的建议:like模糊匹配检查、索引列使用了函数作计算 36 | ``` 37 | 38 | 索引在数据库中非常重要,它可以加快查询速度并提高数据库性能。对于经常被用作查询条件的字段,添加索引可以显著改善查询效率。然而,索引的创建和维护需要考虑多个因素,包括数据量、查询频率、更新频率等。 39 | 40 | sql_helper 工具是一个开源项目,其主要功能是自动判断条件字段是否需要增加索引,适用于MySQL5.7/8.0和MariaDB数据库,并且旨在帮助开发人员优化数据库查询性能。通过分析SQL语句,该工具可以检测出哪些条件字段可以考虑添加索引来提高查询效率。 41 | 42 | ### 工作流程 43 | 44 | 第一步、通过SQL语法解析器,提炼出表名,别名,关联字段名,条件字段名,排序字段名,分组字段名。 45 | 46 | 第二步、检查是否有where条件,如没有则给出提示。 47 | 48 | 第三步、检测到a join b on a.id = b.id(关联查询时),通过查询表结构,检查关联字段是否有索引,如没有给出创建索引提示。 49 | 50 | 第四步、通过调用Explain执行计划,如果type值是ALL,或者rows大于1000,检查该表(如有别名,找到其对应的原始表名)和where条件字段的数据分布,工具默认会采样10万条数据作为样本,检查Cardinality基数,例如sex性别字段,有男女两个值,如果占比超过半数(50%),则不建议对该字段创建索引。 51 | 52 | 第五步、检查group by和order by字段(同样的算法),之后与where条件字段合并,组合成联合索引。 53 | 54 | 第六步、检查这些字段之前是否创建过索引,如果没有给与提示创建,如果之前就有索引,不提示。 55 | 56 | 需要注意的是:sql_helper工具假定您的sql语句条件表达式都为and的前提下,提示创建联合索引。 57 | 58 | 如果是or,sql解析器解析起来会有些困难(sql灵活多变,且不固定,无法用通用的算法组合字段)。 59 | 60 | 例如where c1 = 1 or c2 = 2 61 | 62 | 工具会提示(c1,c2)创建一个联合索引,但实际上应该单独对c1和c2创建一个独立索引。 63 | 64 | 即select ... from t where c1 = 1 65 | union all 66 | select ... from t where c2 = 2 67 | 68 | ![image](https://github.com/hcymysql/sql_helper/assets/19261879/ca3d23a7-f2d3-4a14-80af-3688e2bb061e) 69 | 70 | 演示:https://www.douyin.com/video/7277857326072122676 71 | 72 | ### 命令行方式使用 | [web端接口使用](https://github.com/hcymysql/sql_helper/blob/main/web/sql_helper/README.md) 73 | ``` 74 | shell> chmod 755 sql_helper 75 | shell> ./sql_helper -f test.yaml -q "select * from sbtest1 limit 1;" 76 | 或者 77 | shell> sql_helper -f test.yaml -q "select(SQL太长可以直接回车分割) 78 | > * from sbtest1 limit 10" 79 | ``` 80 | 81 | 注:test.yaml为MySQL配置文件,如果SQL里包含反引号,请直接去掉反引号。 82 | 83 | --sample参数:默认采样10万条数据(你可以在从库上获取样本数据),根据你的实际情况,适当增加采样数据,比如100-1000万行,这样工具会更精准的判断是否添加索引。 84 | 85 | 仅支持SELECT查询(主要针对慢日志里的SQL) 86 | 87 | ### Docker方式使用 88 | ``` 89 | shell> vim Dockerfile 90 | FROM centos:7 91 | 92 | COPY sqlai_helper /root/ 93 | RUN chmod 755 /root/sqlai_helper 94 | 95 | shell> docker build -t sqlai_helper . 96 | shell> docker run -itd --name sqlai_helper sqlai_helper /bin/bash 97 | shell> docker exec -it sqlai_helper /root/sqlai_helper -f /root/test.yaml -q "select * from t1 where cid=11" 98 | ``` 99 | 100 | ![image](https://github.com/hcymysql/sql_helper/assets/19261879/a603a7fd-7163-4c05-a5fd-4e605f02acc5) 101 | 102 | ![image](https://github.com/hcymysql/sql_helper/assets/19261879/39da7b69-aebb-4c27-ac18-f0abc497064d) 103 | 104 | 请注意,自动判断是否增加索引只是一个辅助功能,最终的决策还应该根据具体的业务需求和数据库性能优化的考虑来进行。此外,索引的创建和维护需要谨慎操作,需要考虑数据量、查询频率、更新频率等因素,以避免对数据库性能产生负面影响。 105 | 106 | 工具适用于Centos7 系统 107 | -------------------------------------------------------------------------------- /deepseek_flash_src/app.py: -------------------------------------------------------------------------------- 1 | import sys, re 2 | import textwrap 3 | from tabulate import tabulate 4 | import pymysql 5 | from sql_metadata import Parser 6 | from sql_format_class import SQLFormatter 7 | from sql_alias import has_table_alias 8 | from sql_count_value import count_column_value, count_column_clause_value 9 | from sql_index import execute_index_query, check_index_exist, check_index_exist_multi 10 | from where_clause import * 11 | from sql_extra import * 12 | import yaml 13 | import argparse 14 | from sql_deepseek import * 15 | from flask import Flask, request, render_template 16 | 17 | app = Flask(__name__) 18 | app.config['JSON_AS_ASCII'] = False 19 | app.config['JSON_SORT_KEYS'] = False 20 | app.config['TEMPLATES_AUTO_RELOAD'] = True 21 | app.config['DEFAULT_CHARSET'] = 'utf-8' 22 | 23 | def analyze_sql(sql_query, db_config, sample_size=100000): 24 | mysql_settings = { 25 | "host": db_config["host"], 26 | "port": db_config["port"], 27 | "user": db_config["user"], 28 | "passwd": db_config["passwd"], 29 | "database": db_config["database"], 30 | "cursorclass": pymysql.cursors.DictCursor, 31 | "charset": 'utf8mb4' 32 | } 33 | 34 | try: 35 | formatted_sql = SQLFormatter().format_sql(sql_query) 36 | except UnicodeDecodeError as e: 37 | print(f"格式化 SQL 时出错: {e}, sql_query: {sql_query}") 38 | return {"error": f"格式化 SQL 时出错: {e}", "is_processing": False} 39 | 40 | try: 41 | parser = Parser(sql_query) 42 | table_names = parser.tables 43 | table_aliases = parser.tables_aliases 44 | data = parser.columns_dict 45 | select_fields = data.get('select', []) 46 | join_fields = data.get('join', []) 47 | where_fields = data.get('where', []) 48 | order_by_fields = data.get('order_by', []) 49 | group_by_fields = data.get('group_by', []) 50 | if 'SELECT' not in sql_query.upper(): 51 | return {"error": "sql_helper工具仅支持select语句", "is_processing": False} 52 | except Exception as e: 53 | return {"error": f"解析 SQL 出现语法错误:{str(e)}", "is_processing": False} 54 | 55 | conn = None 56 | try: 57 | conn = pymysql.connect(**mysql_settings) 58 | cur = conn.cursor() 59 | 60 | sql = f"EXPLAIN {sql_query}" 61 | try: 62 | cur.execute(sql) 63 | except pymysql.err.ProgrammingError as e: 64 | return {"error": f"MySQL 内部错误:{e}", "is_processing": False} 65 | except Exception as e: 66 | return {"error": f"MySQL 内部错误:{e}", "is_processing": False} 67 | explain_result = cur.fetchall() 68 | 69 | e_column_names = list(explain_result[0].keys()) 70 | e_result_values = [] 71 | for row in explain_result: 72 | values = [str(value) for value in row.values()] # 直接转为字符串 73 | e_result_values.append(values) 74 | e_table = tabulate(e_result_values, headers=e_column_names, tablefmt="html", numalign="left") 75 | except Exception as e: 76 | return {"error": f"执行 EXPLAIN 或处理结果时出错: {e}", "is_processing": False} 77 | finally: 78 | if conn: 79 | conn.close() 80 | 81 | index_suggestions = [] 82 | contains_dot = False 83 | if len(where_fields) == 0: 84 | index_suggestions.append(f"你的SQL没有where条件.") 85 | else: 86 | contains_dot = any('.' in field for field in where_fields) 87 | 88 | if len(join_fields) != 0: 89 | table_field_dict = {} 90 | for field in join_fields: 91 | table_field = field.split('.') 92 | if len(table_field) == 2: 93 | table_name = table_field[0] 94 | field_name = table_field[1] 95 | if table_name not in table_field_dict: 96 | table_field_dict[table_name] = [] 97 | table_field_dict[table_name].append(field_name) 98 | 99 | for table_name, on_columns in table_field_dict.items(): 100 | for on_column in on_columns: 101 | try: 102 | show_index_sql = f"show index from {table_name} where Column_name = '{on_column}'" 103 | conn = pymysql.connect(**mysql_settings) 104 | cur = conn.cursor() 105 | cur.execute(show_index_sql) 106 | index_result = cur.fetchall() 107 | if not index_result: 108 | suggestion_text = "join联表查询,on关联字段必须增加索引!\n" 109 | suggestion_text += f"\n需要添加索引:ALTER TABLE {table_name} ADD INDEX idx_{on_column}({on_column});\n" 110 | suggestion_text += f"【{table_name}】表 【{on_column}】字段,索引分析:\n" 111 | index_static = execute_index_query(mysql_settings, database=mysql_settings["database"], 112 | table_name=table_name, index_columns=on_column) 113 | suggestion_text += index_static + "\n" 114 | index_suggestions.append(suggestion_text) 115 | except Exception as e: 116 | print(f"join查询分析出错: {e}") 117 | index_suggestions.append(f"join查询分析出错: {e}") 118 | finally: 119 | if conn: 120 | conn.close() 121 | 122 | for row in explain_result: 123 | # 修改处:防止 table_name 为 None 124 | #table_name = row.get('table', '') # 使用 get() 设置默认值为空字符串 125 | table_name = row.get('table') or '' 126 | if table_name.lower().startswith('= 1)) or \ 133 | (len(join_fields) == 0 and ((row['type'] == 'ALL' and row['key'] is None) or int(row['rows']) >= 1000)): 134 | if has_table_alias(table_aliases) is False and contains_dot is False: 135 | if len(where_fields) != 0: 136 | for where_field in where_fields: 137 | where_clause_value = parse_where_condition(formatted_sql, where_field) 138 | if where_clause_value is not None: 139 | where_clause_value = where_clause_value.replace('\n', '').replace('\r', '') 140 | where_clause_value = re.sub(r'\s+', ' ', where_clause_value) 141 | where_clause_value = re.sub(r'\s+', ' ', where_clause_value) 142 | Cardinality = count_column_clause_value(table_name, where_field, where_clause_value, mysql_settings, sample_size) 143 | else: 144 | Cardinality = count_column_value(table_name, where_field, mysql_settings, sample_size) 145 | if Cardinality: 146 | count_value = Cardinality[0]['count'] 147 | if where_clause_value is not None: 148 | suggestion_text = f"\n取出表 【{table_name}】 where条件表达式 【{where_clause_value}】 {sample_size} 条记录,重复的数据有:【{count_value}】 条,没有必要为该字段创建索引。\n" 149 | index_suggestions.append(suggestion_text) 150 | else: 151 | suggestion_text = f"\n取出表 【{table_name}】 where条件字段 【{where_field}】 {sample_size} 条记录,重复的数据有:【{count_value}】 条,没有必要为该字段创建索引。\n" 152 | index_suggestions.append(suggestion_text) 153 | else: 154 | add_index_fields.append(where_field) 155 | 156 | if group_by_fields is not None and len(group_by_fields) != 0: 157 | for group_field in group_by_fields: 158 | Cardinality = count_column_value(table_name, group_field, mysql_settings, sample_size) 159 | if Cardinality: 160 | count_value = Cardinality[0]['count'] 161 | suggestion_text = f"\n取出表 【{table_name}】 group by条件字段 【{group_field}】 {sample_size} 条记录,重复的数据有:【{count_value}】 条,没有必要为该字段创建索引。\n" 162 | index_suggestions.append(suggestion_text) 163 | else: 164 | add_index_fields.append(group_field) 165 | 166 | if len(order_by_fields) != 0: 167 | for order_field in order_by_fields: 168 | Cardinality = count_column_value(table_name, order_field, mysql_settings, sample_size) 169 | if Cardinality: 170 | count_value = Cardinality[0]['count'] 171 | suggestion_text = f"\n取出表 【{table_name}】 order by条件字段 【{order_field}】 {sample_size} 条记录,重复的数据有:【{count_value}】 条,没有必要为该字段创建索引。\n" 172 | index_suggestions.append(suggestion_text) 173 | else: 174 | add_index_fields.append(order_field) 175 | 176 | add_index_fields = list(dict.fromkeys(add_index_fields).keys()) 177 | 178 | if len(add_index_fields) == 0: 179 | if not index_suggestions: 180 | index_suggestions.append(f"\n→ 【{table_name}】 表,无需添加任何索引。\n") 181 | else: 182 | index_suggestions.append(f"\n→ 【{table_name}】 表,无需添加任何索引。\n") 183 | elif len(add_index_fields) == 1: 184 | index_name = add_index_fields[0] 185 | index_columns = add_index_fields[0] 186 | try: 187 | conn = pymysql.connect(**mysql_settings) 188 | cur = conn.cursor() 189 | index_result = check_index_exist(mysql_settings, table_name=table_name, index_column=index_columns) 190 | if not index_result: 191 | suggestion_text = "" 192 | if row['key'] is None: 193 | suggestion_text += f"\n建议添加索引:ALTER TABLE {table_name} ADD INDEX idx_{index_name}({index_columns});\n" 194 | elif row['key'] is not None and row['rows'] >= 1: 195 | suggestion_text += f"\n建议添加索引:ALTER TABLE {table_name} ADD INDEX idx_{index_name}({index_columns});\n" 196 | suggestion_text += f"\n【{table_name}】表 【{index_columns}】字段,索引分析:\n" 197 | index_static = execute_index_query(mysql_settings, database=mysql_settings["database"], 198 | table_name=table_name, index_columns=index_columns) 199 | suggestion_text += index_static + "\n" 200 | index_suggestions.append(suggestion_text) 201 | else: 202 | suggestion_text = f"\n→ 【{table_name}】表 【{index_columns}】字段,索引已经存在,无需添加任何索引。\n" 203 | suggestion_text += f"\n【{table_name}】表 【{index_columns}】字段,索引分析:\n" 204 | index_static = execute_index_query(mysql_settings, database=mysql_settings["database"], 205 | table_name=table_name, index_columns=index_columns) 206 | suggestion_text += index_static + "\n" 207 | index_suggestions.append(suggestion_text) 208 | except Exception as e: 209 | print(f"单个索引分析出错: {e}") 210 | index_suggestions.append(f"单个索引分析出错: {e}") 211 | finally: 212 | if conn: 213 | conn.close() 214 | 215 | else: 216 | merged_name = '_'.join(add_index_fields) 217 | merged_columns = ','.join(add_index_fields) 218 | try: 219 | conn = pymysql.connect(**mysql_settings) 220 | cur = conn.cursor() 221 | index_result_list = check_index_exist_multi(mysql_settings, database=mysql_settings["database"], 222 | table_name=table_name, index_columns=merged_columns, 223 | index_number=len(add_index_fields)) 224 | if index_result_list is None: 225 | suggestion_text = "" 226 | if row['key'] is None: 227 | suggestion_text += f"\n建议添加索引:ALTER TABLE {table_name} ADD INDEX idx_{merged_name}({merged_columns});\n" 228 | elif row['key'] is not None and row['rows'] >= 1: 229 | suggestion_text += f"\n建议添加索引:ALTER TABLE {table_name} ADD INDEX idx_{merged_name}({merged_columns});\n" 230 | suggestion_text += f"\n【{table_name}】表 【{merged_columns}】字段,索引分析:\n" 231 | index_static = execute_index_query(mysql_settings, database=mysql_settings["database"], 232 | table_name=table_name, index_columns=merged_columns) 233 | suggestion_text += index_static + "\n" 234 | index_suggestions.append(suggestion_text) 235 | else: 236 | suggestion_text = f"\n→ 【{table_name}】表 【{merged_columns}】字段,联合索引已经存在,无需添加任何索引。\n" 237 | suggestion_text += f"\n【{table_name}】表 【{merged_columns}】字段,索引分析:\n" 238 | index_static = execute_index_query(mysql_settings, database=mysql_settings["database"], 239 | table_name=table_name, index_columns=merged_columns) 240 | suggestion_text += index_static + "\n" 241 | index_suggestions.append(suggestion_text) 242 | except Exception as e: 243 | print(f"联合索引分析出错: {e}") 244 | index_suggestions.append(f"联合索引分析出错: {e}") 245 | finally: 246 | if conn: 247 | conn.close() 248 | 249 | if has_table_alias(table_aliases) is True or contains_dot is True: 250 | if has_table_alias(table_aliases) is True: 251 | try: 252 | table_real_name = table_aliases[table_name] 253 | except KeyError: 254 | if table_name.startswith('→ 【{table_real_name}】 表,无需添加任何索引。\n") 323 | elif len(add_index_fields) == 1: 324 | index_name = add_index_fields[0] 325 | index_columns = add_index_fields[0] 326 | try: 327 | conn = pymysql.connect(**mysql_settings) 328 | cur = conn.cursor() 329 | index_result = check_index_exist(mysql_settings, table_name=table_real_name, index_column=index_columns) 330 | if not index_result: 331 | suggestion_text = "" 332 | if row['key'] is None: 333 | suggestion_text += f"\n建议添加索引:ALTER TABLE {table_real_name} ADD INDEX idx_{index_name}({index_columns});\n" 334 | elif row['key'] is not None and row['rows'] >= 1: 335 | suggestion_text += f"\n建议添加索引:ALTER TABLE {table_real_name} ADD INDEX idx_{index_name}({index_columns});\n" 336 | suggestion_text += f"\n【{table_real_name}】表 【{index_columns}】字段,索引分析:\n" 337 | index_static = execute_index_query(mysql_settings, database=mysql_settings["database"], 338 | table_name=table_real_name, index_columns=index_columns) 339 | suggestion_text += index_static + "\n" 340 | index_suggestions.append(suggestion_text) 341 | else: 342 | suggestion_text = f"\n→ 【{table_real_name}】表 【{index_columns}】字段,索引已经存在,无需添加任何索引。\n" 343 | suggestion_text += f"\n【{table_real_name}】表 【{index_columns}】字段,索引分析:\n" 344 | index_static = execute_index_query(mysql_settings, database=mysql_settings["database"], 345 | table_name=table_real_name, index_columns=index_columns) 346 | suggestion_text += index_static + "\n" 347 | index_suggestions.append(suggestion_text) 348 | except Exception as e: 349 | print(f"单个别名表索引分析出错: {e}") 350 | index_suggestions.append(f"单个别名表索引分析出错: {e}") 351 | finally: 352 | if conn: 353 | conn.close() 354 | else: 355 | merged_name = '_'.join(add_index_fields) 356 | merged_columns = ','.join(add_index_fields) 357 | try: 358 | conn = pymysql.connect(**mysql_settings) 359 | cur = conn.cursor() 360 | index_result_list = check_index_exist_multi(mysql_settings, database=mysql_settings["database"], 361 | table_name=table_real_name, index_columns=merged_columns, 362 | index_number=len(add_index_fields)) 363 | if index_result_list is None: 364 | suggestion_text = "" 365 | if row['key'] is None: 366 | suggestion_text += f"\n建议添加索引:ALTER TABLE {table_real_name} ADD INDEX idx_{merged_name}({merged_columns});\n" 367 | elif row['key'] is not None and row['rows'] >= 1: 368 | suggestion_text += f"\n建议添加索引:ALTER TABLE {table_real_name} ADD INDEX idx_{merged_name}({merged_columns});\n" 369 | suggestion_text += f"\n【{table_real_name}】表 【{merged_columns}】字段,索引分析:\n" 370 | index_static = execute_index_query(mysql_settings, database=mysql_settings["database"], 371 | table_name=table_real_name, index_columns=merged_columns) 372 | suggestion_text += index_static + "\n" 373 | index_suggestions.append(suggestion_text) 374 | else: 375 | suggestion_text = f"\n→ 【{table_real_name}】表 【{merged_columns}】字段,联合索引已经存在,无需添加任何索引。\n" 376 | suggestion_text += f"\n【{table_real_name}】表 【{merged_columns}】字段,索引分析:\n" 377 | index_static = execute_index_query(mysql_settings, database=mysql_settings["database"], 378 | table_name=table_real_name, index_columns=merged_columns) 379 | suggestion_text += index_static + "\n" 380 | index_suggestions.append(suggestion_text) 381 | except Exception as e: 382 | print(f"联合别名表索引分析出错: {e}") 383 | index_suggestions.append(f"联合别名表索引分析出错: {e}") 384 | finally: 385 | if conn: 386 | conn.close() 387 | 388 | extra_suggestions = [] 389 | where_clause = parse_where_condition_full(formatted_sql) 390 | if where_clause: 391 | like_r, like_expression = check_percent_position(where_clause) 392 | if like_r is True: 393 | extra_suggestions.append(f"like模糊匹配,百分号在首位,【{like_expression}】是不能用到索引的,例如like '%张三%',可以考虑改成like '张三%',这样是可以用到索引的,如果业务上不能改,可以考虑用全文索引。\n") 394 | 395 | function_r = extract_function_index(where_clause) 396 | if function_r is not False: 397 | extra_suggestions.append(f"索引列使用了函数作计算:【{function_r}】,会导致索引失效。" 398 | f"如果你是MySQL 8.0可以考虑创建函数索引;如果你是MySQL 5.7,你要更改你的SQL逻辑了。\n") 399 | 400 | try: 401 | # 开始处理 AI 优化,设置 is_processing=True 402 | ai_suggestions = optimize_sql(formatted_sql) 403 | if not isinstance(ai_suggestions, str): 404 | ai_suggestions = str(ai_suggestions) 405 | ai_suggestions = re.sub(r'```html\s*|\s*```', '', ai_suggestions, flags=re.MULTILINE).strip() 406 | except Exception as e: 407 | ai_suggestions = f"调用 AI 优化出错: {e}" 408 | print(f"调用 AI 优化出错: {e}") 409 | 410 | return { 411 | "formatted_sql": formatted_sql, 412 | "explain_table": e_table, 413 | "index_suggestions": "".join(index_suggestions), 414 | "extra_suggestions": "".join(extra_suggestions), 415 | "ai_suggestions": ai_suggestions, 416 | "no_suggestions": not index_suggestions and not extra_suggestions and not ai_suggestions, 417 | "is_processing": False # 处理完成 418 | } 419 | 420 | @app.route("/", methods=["GET", "POST"]) 421 | def index(): 422 | if request.method == "POST": 423 | sql_query = request.form.get("sql_query") 424 | host = request.form.get("host") 425 | port = request.form.get("port", type=int, default=3306) 426 | user = request.form.get("user") 427 | password = request.form.get("password") 428 | database = request.form.get("database") 429 | 430 | db_config = {} 431 | config_source = "" 432 | 433 | if not all([host, user, password, database]): 434 | return render_template("index.html", error="请提供完整的 MySQL 数据库连接信息 (请手动填写所有数据库参数).", 435 | sql_query=sql_query, 436 | host=host, port=port, user=user, database=database, 437 | is_processing=False) 438 | 439 | db_config = { 440 | "host": host, 441 | "port": port, 442 | "user": user, 443 | "passwd": password, 444 | "database": database 445 | } 446 | config_source = "params" 447 | 448 | # 在分析开始时,显示加载动画 449 | analysis_result = analyze_sql(sql_query, db_config) 450 | 451 | if "error" in analysis_result: 452 | return render_template("index.html", error=analysis_result["error"], 453 | sql_query=sql_query, 454 | host=host, port=port, user=user, database=database, 455 | config_source=config_source, 456 | is_processing=False) 457 | 458 | return render_template("index.html", 459 | formatted_sql=analysis_result["formatted_sql"], 460 | explain_table=analysis_result["explain_table"], 461 | index_suggestions=analysis_result["index_suggestions"], 462 | extra_suggestions=analysis_result["extra_suggestions"], 463 | ai_suggestions=analysis_result["ai_suggestions"], 464 | no_suggestions=analysis_result["no_suggestions"], 465 | sql_query=sql_query, 466 | host=host, port=port, user=user, database=database, 467 | config_source=config_source, 468 | is_processing=False) # 处理完成 469 | 470 | return render_template("index.html", is_processing=False) 471 | 472 | if __name__ == "__main__": 473 | app.run(host='0.0.0.0', debug=True) 474 | -------------------------------------------------------------------------------- /deepseek_flash_src/requirements.txt: -------------------------------------------------------------------------------- 1 | Flask 2 | PyMySQL 3 | sql-metadata 4 | tabulate 5 | rich 6 | PyYAML 7 | openai 8 | sqlparse 9 | -------------------------------------------------------------------------------- /deepseek_flash_src/sql_alias.py: -------------------------------------------------------------------------------- 1 | def has_table_alias(table_alias): 2 | if isinstance(table_alias, dict): 3 | table_alias = {k.lower(): v.lower() for k, v in table_alias.items()} 4 | if 'join' in table_alias or 'on' in table_alias or 'where' in table_alias or 'group by' in table_alias or 'order by' in table_alias or 'limit' in table_alias or not table_alias: 5 | return False # 没有别名 6 | else: 7 | return True #有别名 8 | elif isinstance(table_alias, list): 9 | return False # 没有别名 10 | else: 11 | pass 12 | -------------------------------------------------------------------------------- /deepseek_flash_src/sql_count_value.py: -------------------------------------------------------------------------------- 1 | import pymysql 2 | from rich.progress import Progress, TimeElapsedColumn, TextColumn, BarColumn 3 | 4 | def count_column_value(table_name, field_name, mysql_settings, sample_size): 5 | with pymysql.connect(**mysql_settings) as conn: 6 | with conn.cursor() as cursor: 7 | """ 8 | 在这个查询中,使用了CASE语句来判断数据行数是否小于100000。如果数据行数小于100000,则使用 9 | (SELECT COUNT(*) FROM {table_name}) / 2 10 | 作为阈值,即表的实际大小除以2;否则使用 {sample_size} / 2 作为阈值。 11 | """ 12 | 13 | # 如果你的数据库是MySQL 8.0,那么推荐用 CTE(公共表达式)的形式 14 | ''' 15 | sql = f""" 16 | WITH subquery AS ( 17 | SELECT {field_name} 18 | FROM {table_name} 19 | LIMIT {sample_size} 20 | ) 21 | SELECT COUNT(*) as count 22 | FROM subquery 23 | GROUP BY {field_name} 24 | HAVING COUNT(*) >= 25 | CASE 26 | WHEN (SELECT COUNT(*) FROM subquery) < {sample_size} THEN (SELECT COUNT(*) FROM {table_name}) / 2 27 | ELSE {sample_size} / 2 28 | END; 29 | """ 30 | ''' 31 | 32 | # 默认采用子查询兼容MySQL 5.7版本 33 | sql = f""" 34 | SELECT COUNT(*) as count 35 | FROM ( 36 | SELECT {field_name} 37 | FROM {table_name} 38 | LIMIT {sample_size} 39 | ) AS subquery 40 | GROUP BY {field_name} 41 | HAVING COUNT(*) >= CASE WHEN (SELECT COUNT(*) FROM {table_name} LIMIT {sample_size}) < {sample_size} 42 | THEN (SELECT COUNT(*) FROM {table_name}) / 2 ELSE {sample_size} / 2 END; 43 | """ 44 | 45 | # print(sql) 46 | with Progress(TextColumn("[progress.description]{task.description}", justify="right"), BarColumn(), TextColumn("[progress.percentage]{task.percentage:>3.0f}%"), TimeElapsedColumn()) as progress: 47 | task = progress.add_task(f"[cyan]Executing SQL query [bold magenta]{table_name}[/bold magenta] [cyan]where_clause [bold magenta]{field_name}[/bold magenta]...", total=1) 48 | cursor.execute(sql) 49 | progress.update(task, completed=1) 50 | 51 | results = cursor.fetchall() 52 | 53 | if results: 54 | # 如果有超过半数的重复数据 55 | return results 56 | else: 57 | return False 58 | 59 | 60 | def count_column_clause_value(table_name, field_name, where_clause_value, mysql_settings, sample_size): 61 | with pymysql.connect(**mysql_settings) as conn: 62 | with conn.cursor() as cursor: 63 | 64 | """ 65 | 在这个查询中,使用了CASE语句来判断数据行数是否小于100000。如果数据行数小于100000,则使用 66 | (SELECT COUNT(*) FROM {table_name}) / 2 67 | 作为阈值,即表的实际大小除以2;否则使用 {sample_size} / 2 作为阈值。 68 | """ 69 | 70 | # 如果你的数据库是MySQL 8.0,那么推荐用 CTE(公共表达式)的形式 71 | ''' 72 | sql = f""" 73 | WITH subquery AS ( 74 | SELECT {field_name} 75 | FROM {table_name} 76 | LIMIT {sample_size} 77 | ) 78 | SELECT COUNT(*) as count 79 | FROM subquery 80 | GROUP BY {field_name} 81 | HAVING COUNT(*) >= 82 | CASE 83 | WHEN (SELECT COUNT(*) FROM subquery) < {sample_size} THEN (SELECT COUNT(*) FROM {table_name}) / 2 84 | ELSE {sample_size} / 2 85 | END; 86 | """ 87 | ''' 88 | 89 | # 默认采用子查询兼容MySQL 5.7版本 90 | sql = f""" 91 | SELECT COUNT(*) as count 92 | FROM ( 93 | SELECT {field_name} 94 | FROM {table_name} 95 | WHERE {where_clause_value} 96 | LIMIT {sample_size} 97 | ) AS subquery 98 | GROUP BY {field_name} 99 | HAVING COUNT(*) >= CASE WHEN (SELECT COUNT(*) FROM {table_name} LIMIT {sample_size}) < {sample_size} 100 | THEN (SELECT COUNT(*) FROM {table_name}) / 2 ELSE {sample_size} / 2 END; 101 | """ 102 | 103 | #print(sql) 104 | with Progress(TextColumn("[progress.description]{task.description}", justify="right"), BarColumn(), TextColumn("[progress.percentage]{task.percentage:>3.0f}%"), TimeElapsedColumn()) as progress: 105 | task = progress.add_task(f"[cyan]Executing SQL query [bold magenta]{table_name}[/bold magenta] [cyan]where_clause [bold magenta]{where_clause_value}[/bold magenta]...", total=1) 106 | cursor.execute(sql) 107 | progress.update(task, completed=1) 108 | 109 | results = cursor.fetchall() 110 | 111 | if results: 112 | # 如果有超过半数的重复数据 113 | return results 114 | else: 115 | return False 116 | -------------------------------------------------------------------------------- /deepseek_flash_src/sql_deepseek.py: -------------------------------------------------------------------------------- 1 | # https://platform.deepseek.com/api_keys 申请密钥并充值1元 2 | 3 | from openai import OpenAI 4 | import re 5 | 6 | def optimize_sql(original_sql): 7 | """ 8 | 调用 deepseek V3 接口优化 SQL 查询语句,并返回优化后的纯 SQL 字符串。 9 | 10 | Args: 11 | original_sql (str): 原始的 SQL 查询语句。 12 | 13 | Returns: 14 | str: 优化后的纯 SQL 查询语句。如果调用过程中发生错误,则返回包含错误信息的字符串。 15 | """ 16 | try: 17 | client = OpenAI(api_key="sk-xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", base_url="https://api.deepseek.com") 18 | 19 | # 输出调用信息 (可选,仅用于调试) 20 | print('\033[94m以下是调用的deepseek V3接口.\033[0m') 21 | print('优化前的SQL是:') 22 | print(original_sql) 23 | print('-' * 55) 24 | 25 | # 获取优化后的 SQL,不要求返回 HTML 格式 26 | response = client.chat.completions.create( 27 | model="deepseek-chat", 28 | messages=[ 29 | {"role": "system", "content": "你是MySQL专家!你精通SQL优化!也是高级程序员!"}, 30 | {"role": "user", "content": f"帮我优化这个SQL,返回纯 SQL 语句,不要添加任何标记或格式:{original_sql}"}, 31 | ], 32 | stream=False 33 | ) 34 | 35 | # 获取优化后的 SQL 内容 36 | optimized_sql = response.choices[0].message.content 37 | 38 | # 清理可能的标记(例如 ```sql 或 ```),确保返回纯 SQL 39 | optimized_sql = re.sub(r'```sql|```|<\w+>.*?', '', optimized_sql, flags=re.MULTILINE).strip() 40 | 41 | # 输出优化后的 SQL (可选,仅用于调试) 42 | print('\033[92m优化后的SQL是:\033[0m') 43 | print(optimized_sql) 44 | 45 | return optimized_sql 46 | 47 | except Exception as e: 48 | # 捕获异常,返回纯文本错误信息 49 | error_message = f"调用 deepseek API 接口出错: {str(e)}" 50 | print(error_message) # 打印错误信息到控制台 (可选) 51 | return error_message 52 | -------------------------------------------------------------------------------- /deepseek_flash_src/sql_extra.py: -------------------------------------------------------------------------------- 1 | import re 2 | 3 | def check_percent_position(string): 4 | string = string.lower() 5 | pattern = r".*like\s+'%|.*like\s+concat\(+'%|.*regexp\s+" 6 | matches = re.findall(pattern, string) 7 | if matches: 8 | like_pattern = r"like\s+(?:concat\(.*?\)|'%%'|\'.*?%(?:.*?)?\')" 9 | like_match = re.search(like_pattern, string) 10 | if like_match: 11 | return True, like_match.group() 12 | #return True 13 | return False, None 14 | 15 | 16 | def extract_function_index(string): 17 | #pattern = r'\b(\w+)\(' 18 | #pattern = r'\b(\w+)\(.*\).*[>=]' 19 | pattern = r'\b(\w+(\(.*\).*[>=]))' 20 | matches = re.findall(pattern, string) 21 | #function_indexes = set(matches) 22 | function_indexes = [match[0] for match in matches] 23 | if function_indexes: 24 | return ', '.join(function_indexes) 25 | else: 26 | return False 27 | -------------------------------------------------------------------------------- /deepseek_flash_src/sql_format_class.py: -------------------------------------------------------------------------------- 1 | import sqlparse 2 | 3 | class SQLFormatter: 4 | def format_sql(self, sql_query): 5 | """ 6 | 格式化 SQL 查询语句 7 | """ 8 | formatted_sql = sqlparse.format(sql_query, reindent=True, keyword_case='upper') 9 | 10 | return formatted_sql 11 | 12 | 13 | -------------------------------------------------------------------------------- /deepseek_flash_src/sql_index.py: -------------------------------------------------------------------------------- 1 | import textwrap 2 | from tabulate import tabulate 3 | import pymysql 4 | 5 | def execute_index_query(mysql_settings, database, table_name, index_columns): 6 | index_columns = index_columns 7 | index_columns = index_columns.split(',') 8 | updated_columns = [f"'{column.strip()}'" for column in index_columns] 9 | final_columns = ', '.join(updated_columns) 10 | sql = f"SELECT TABLE_NAME,INDEX_NAME,COLUMN_NAME,CARDINALITY FROM information_schema.STATISTICS WHERE TABLE_SCHEMA = '{database}' AND TABLE_NAME = '{table_name}' AND COLUMN_NAME IN ({final_columns})" 11 | #print(sql) 12 | try: 13 | conn = pymysql.connect(**mysql_settings) 14 | cur = conn.cursor() 15 | cur.execute(sql) 16 | index_result = cur.fetchall() 17 | 18 | if not index_result: 19 | print(f"没有检测到 {table_name} 表 字段 {final_columns} 有索引。") 20 | 21 | # 提取列名 22 | e_column_names = [desc[0] for desc in cur.description] 23 | 24 | # 提取结果值并进行自动换行处理 25 | e_result_values = [] 26 | for row in index_result: 27 | values = list(row.values()) 28 | wrapped_values = [textwrap.fill(str(value), width=30) for value in values] 29 | e_result_values.append(wrapped_values) 30 | 31 | # 将结果格式化为表格(包含竖线) 32 | e_table = tabulate(e_result_values, headers=e_column_names, tablefmt="grid", numalign="left") 33 | 34 | return e_table 35 | 36 | except pymysql.err.ProgrammingError as e: 37 | print("MySQL 内部错误:",e) 38 | return None 39 | except Exception as e: 40 | print("MySQL 内部错误:",e) 41 | return None 42 | finally: 43 | if cur: 44 | cur.close() 45 | if conn: 46 | conn.close() 47 | 48 | ######################################################### 49 | 50 | def check_index_exist(mysql_settings, table_name, index_column): 51 | show_index_sql = f"show index from {table_name} where Column_name = '{index_column}'" 52 | try: 53 | conn = pymysql.connect(**mysql_settings) 54 | cur = conn.cursor() 55 | cur.execute(show_index_sql) 56 | index_result = cur.fetchall() 57 | 58 | #if not index_result: 59 | #print(f"没有检测到 {table_name} 表 字段 {final_columns} 有索引。") 60 | 61 | return index_result 62 | 63 | except pymysql.err.ProgrammingError as e: 64 | print("MySQL 内部错误:",e) 65 | return None 66 | except Exception as e: 67 | print("MySQL 内部错误:",e) 68 | return None 69 | finally: 70 | if cur: 71 | cur.close() 72 | if conn: 73 | conn.close() 74 | 75 | ######################################################### 76 | 77 | def check_index_exist_multi(mysql_settings, database, table_name, index_columns, index_number): 78 | index_columns = index_columns 79 | index_columns = index_columns.split(',') 80 | updated_columns = [f"'{column.strip()}'" for column in index_columns] 81 | final_columns = ', '.join(updated_columns) 82 | sql = f"SELECT TABLE_NAME,INDEX_NAME,COLUMN_NAME,CARDINALITY FROM information_schema.STATISTICS WHERE TABLE_SCHEMA = '{database}' AND TABLE_NAME = '{table_name}' AND COLUMN_NAME IN ({final_columns}) GROUP BY INDEX_NAME HAVING COUNT(INDEX_NAME) = {index_number}" 83 | #print(sql) 84 | try: 85 | conn = pymysql.connect(**mysql_settings) 86 | cur = conn.cursor() 87 | cur.execute(sql) 88 | index_result = cur.fetchall() 89 | 90 | if not index_result: 91 | return None 92 | 93 | return index_result 94 | 95 | except pymysql.err.ProgrammingError as e: 96 | print("MySQL 内部错误:",e) 97 | return None 98 | except Exception as e: 99 | print("MySQL 内部错误:",e) 100 | return None 101 | finally: 102 | if cur: 103 | cur.close() 104 | if conn: 105 | conn.close() 106 | 107 | -------------------------------------------------------------------------------- /deepseek_flash_src/templates/index.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | SQLAI Helper 工具 6 | 7 | 175 | 176 | 177 |
178 |

SQLAI Helper 工具

179 | 180 |
181 |
182 | 183 | 184 | 185 |

数据库连接配置 (手动填写数据库参数)

186 | 187 |
188 |
189 | 190 | 191 |
192 |
193 | 194 | 195 |
196 |
197 | 198 | 199 |
200 |
201 | 202 | 203 |
204 |
205 | 206 | 207 |
208 |
209 | 210 |
211 |
212 | 213 |
214 |
215 |
216 | 217 | 218 | 224 | 225 | {% if error %} 226 |
227 |

错误信息:

228 |
{{ error|default('未检测到错误')|safe }}
229 |
230 | {% endif %} 231 | 232 | {% if formatted_sql %} 233 |
234 |

1) 你刚才输入的 SQL 语句是:

235 |
{{ formatted_sql|default('无输入 SQL')|safe }}
236 |
237 | {% endif %} 238 | 239 | {% if explain_table %} 240 |
241 |

2) EXPLAIN 执行计划:

242 | {{ explain_table|safe }} 243 |
244 | {% endif %} 245 | 246 | {% if index_suggestions %} 247 |
248 |

3) 索引优化建议:

249 |
{{ index_suggestions|default('无索引优化建议')|safe }}
250 |
251 | {% endif %} 252 | 253 | {% if extra_suggestions %} 254 |
255 |

4) 额外的建议:

256 |
{{ extra_suggestions|default('无额外建议')|safe }}
257 |
258 | {% endif %} 259 | 260 | {% if ai_suggestions %} 261 |
262 |

5) DeepSeek 的建议:

263 | 264 | 265 | 266 | 267 | 268 | 269 | 270 | 271 | 272 | 273 | 274 |
优化后的 SQL 语句
{{ ai_suggestions|default('暂无 AI 建议')|safe }}
275 |
276 | {% endif %} 277 | 278 | {% if no_suggestions and formatted_sql and explain_table %} 279 |
280 |

分析结果:

281 |
SQL 语句分析完成,当前 SQL 语句执行计划良好,没有额外的优化建议!
282 |
283 | {% endif %} 284 |
285 | 286 | 287 | 288 | 340 | 341 | 342 | -------------------------------------------------------------------------------- /deepseek_flash_src/where_clause.py: -------------------------------------------------------------------------------- 1 | import sqlparse 2 | 3 | def parse_where_condition(sql, column): 4 | parsed = sqlparse.parse(sql) 5 | stmt = parsed[0] 6 | 7 | where_column = "" 8 | where_expression = "" 9 | where_value = "" 10 | where_clause = "" 11 | found = False 12 | result = "" 13 | 14 | for token in stmt.tokens: 15 | if isinstance(token, sqlparse.sql.Where): 16 | where_clause = token.value 17 | conditions = [] 18 | for cond_tok in token.tokens: 19 | if isinstance(cond_tok, sqlparse.sql.Comparison): 20 | left_token = cond_tok.left.value.strip() 21 | if column == left_token: 22 | #return f"比较运算符: {cond_tok.value.strip()}" 23 | return cond_tok.value.strip() 24 | 25 | if isinstance(cond_tok, sqlparse.sql.Identifier) and cond_tok.value == column: 26 | found = True 27 | 28 | if found: 29 | if isinstance(cond_tok, sqlparse.sql.Token) and cond_tok.value.upper() in ["OR","AND"]: 30 | break 31 | else: 32 | result += cond_tok.value 33 | 34 | if isinstance(cond_tok, sqlparse.sql.Parenthesis) and found: 35 | break 36 | 37 | if len(result) != 0: 38 | #return f"逻辑运算符: {result.strip()}" 39 | return result.strip() 40 | else: 41 | #return "没有找到该字段的条件表达式" 42 | return None 43 | 44 | 45 | def parse_where_condition_full(sql): 46 | parsed = sqlparse.parse(sql) 47 | stmt = parsed[0] 48 | 49 | where_clause = "" 50 | found = False 51 | 52 | for token in stmt.tokens: 53 | if found: 54 | where_clause += token.value 55 | if isinstance(token, sqlparse.sql.Where): 56 | found = True 57 | where_clause += token.value 58 | 59 | return where_clause.strip() if where_clause else None 60 | 61 | -------------------------------------------------------------------------------- /src/sql_alias.py: -------------------------------------------------------------------------------- 1 | def has_table_alias(table_alias): 2 | if isinstance(table_alias, dict): 3 | table_alias = {k.lower(): v.lower() for k, v in table_alias.items()} 4 | if 'join' in table_alias or 'on' in table_alias or 'where' in table_alias or 'group by' in table_alias or 'order by' in table_alias or 'limit' in table_alias or not table_alias: 5 | return False # 没有别名 6 | else: 7 | return True #有别名 8 | elif isinstance(table_alias, list): 9 | return False # 没有别名 10 | else: 11 | pass 12 | -------------------------------------------------------------------------------- /src/sql_count_value.py: -------------------------------------------------------------------------------- 1 | import pymysql 2 | from rich.progress import Progress, TimeElapsedColumn, TextColumn, BarColumn 3 | 4 | def count_column_value(table_name, field_name, mysql_settings, sample_size): 5 | with pymysql.connect(**mysql_settings) as conn: 6 | with conn.cursor() as cursor: 7 | """ 8 | 在这个查询中,使用了CASE语句来判断数据行数是否小于100000。如果数据行数小于100000,则使用 9 | (SELECT COUNT(*) FROM {table_name}) / 2 10 | 作为阈值,即表的实际大小除以2;否则使用 {sample_size} / 2 作为阈值。 11 | """ 12 | 13 | # 如果你的数据库是MySQL 8.0,那么推荐用 CTE(公共表达式)的形式 14 | ''' 15 | sql = f""" 16 | WITH subquery AS ( 17 | SELECT {field_name} 18 | FROM {table_name} 19 | LIMIT {sample_size} 20 | ) 21 | SELECT COUNT(*) as count 22 | FROM subquery 23 | GROUP BY {field_name} 24 | HAVING COUNT(*) >= 25 | CASE 26 | WHEN (SELECT COUNT(*) FROM subquery) < {sample_size} THEN (SELECT COUNT(*) FROM {table_name}) / 2 27 | ELSE {sample_size} / 2 28 | END; 29 | """ 30 | ''' 31 | 32 | # 默认采用子查询兼容MySQL 5.7版本 33 | sql = f""" 34 | SELECT COUNT(*) as count 35 | FROM ( 36 | SELECT {field_name} 37 | FROM {table_name} 38 | LIMIT {sample_size} 39 | ) AS subquery 40 | GROUP BY {field_name} 41 | HAVING COUNT(*) >= CASE WHEN (SELECT COUNT(*) FROM {table_name} LIMIT {sample_size}) < {sample_size} 42 | THEN (SELECT COUNT(*) FROM {table_name}) / 2 ELSE {sample_size} / 2 END; 43 | """ 44 | 45 | # print(sql) 46 | with Progress(TextColumn("[progress.description]{task.description}", justify="right"), BarColumn(), TextColumn("[progress.percentage]{task.percentage:>3.0f}%"), TimeElapsedColumn()) as progress: 47 | task = progress.add_task(f"[cyan]Executing SQL query [bold magenta]{table_name}[/bold magenta] [cyan]where_clause [bold magenta]{field_name}[/bold magenta]...", total=1) 48 | cursor.execute(sql) 49 | progress.update(task, completed=1) 50 | 51 | results = cursor.fetchall() 52 | 53 | if results: 54 | # 如果有超过半数的重复数据 55 | return results 56 | else: 57 | return False 58 | 59 | 60 | def count_column_clause_value(table_name, field_name, where_clause_value, mysql_settings, sample_size): 61 | with pymysql.connect(**mysql_settings) as conn: 62 | with conn.cursor() as cursor: 63 | 64 | """ 65 | 在这个查询中,使用了CASE语句来判断数据行数是否小于100000。如果数据行数小于100000,则使用 66 | (SELECT COUNT(*) FROM {table_name}) / 2 67 | 作为阈值,即表的实际大小除以2;否则使用 {sample_size} / 2 作为阈值。 68 | """ 69 | 70 | # 如果你的数据库是MySQL 8.0,那么推荐用 CTE(公共表达式)的形式 71 | ''' 72 | sql = f""" 73 | WITH subquery AS ( 74 | SELECT {field_name} 75 | FROM {table_name} 76 | LIMIT {sample_size} 77 | ) 78 | SELECT COUNT(*) as count 79 | FROM subquery 80 | GROUP BY {field_name} 81 | HAVING COUNT(*) >= 82 | CASE 83 | WHEN (SELECT COUNT(*) FROM subquery) < {sample_size} THEN (SELECT COUNT(*) FROM {table_name}) / 2 84 | ELSE {sample_size} / 2 85 | END; 86 | """ 87 | ''' 88 | 89 | # 默认采用子查询兼容MySQL 5.7版本 90 | sql = f""" 91 | SELECT COUNT(*) as count 92 | FROM ( 93 | SELECT {field_name} 94 | FROM {table_name} 95 | WHERE {where_clause_value} 96 | LIMIT {sample_size} 97 | ) AS subquery 98 | GROUP BY {field_name} 99 | HAVING COUNT(*) >= CASE WHEN (SELECT COUNT(*) FROM {table_name} LIMIT {sample_size}) < {sample_size} 100 | THEN (SELECT COUNT(*) FROM {table_name}) / 2 ELSE {sample_size} / 2 END; 101 | """ 102 | 103 | #print(sql) 104 | with Progress(TextColumn("[progress.description]{task.description}", justify="right"), BarColumn(), TextColumn("[progress.percentage]{task.percentage:>3.0f}%"), TimeElapsedColumn()) as progress: 105 | task = progress.add_task(f"[cyan]Executing SQL query [bold magenta]{table_name}[/bold magenta] [cyan]where_clause [bold magenta]{where_clause_value}[/bold magenta]...", total=1) 106 | cursor.execute(sql) 107 | progress.update(task, completed=1) 108 | 109 | results = cursor.fetchall() 110 | 111 | if results: 112 | # 如果有超过半数的重复数据 113 | return results 114 | else: 115 | return False 116 | -------------------------------------------------------------------------------- /src/sql_extra.py: -------------------------------------------------------------------------------- 1 | import re 2 | 3 | def check_percent_position(string): 4 | string = string.lower() 5 | pattern = r".*like\s+'%|.*like\s+concat\(+'%|.*regexp\s+" 6 | matches = re.findall(pattern, string) 7 | if matches: 8 | like_pattern = r"like\s+(?:concat\(.*?\)|'%%'|\'.*?%(?:.*?)?\')" 9 | like_match = re.search(like_pattern, string) 10 | if like_match: 11 | return True, like_match.group() 12 | #return True 13 | return False, None 14 | 15 | 16 | def extract_function_index(string): 17 | #pattern = r'\b(\w+)\(' 18 | #pattern = r'\b(\w+)\(.*\).*[>=]' 19 | pattern = r'\b(\w+(\(.*\).*[>=]))' 20 | matches = re.findall(pattern, string) 21 | #function_indexes = set(matches) 22 | function_indexes = [match[0] for match in matches] 23 | if function_indexes: 24 | return ', '.join(function_indexes) 25 | else: 26 | return False 27 | -------------------------------------------------------------------------------- /src/sql_format_class.py: -------------------------------------------------------------------------------- 1 | import sqlparse 2 | 3 | class SQLFormatter: 4 | def format_sql(self, sql_query): 5 | """ 6 | 格式化 SQL 查询语句 7 | """ 8 | formatted_sql = sqlparse.format(sql_query, reindent=True, keyword_case='upper') 9 | 10 | return formatted_sql 11 | 12 | 13 | -------------------------------------------------------------------------------- /src/sql_index.py: -------------------------------------------------------------------------------- 1 | import textwrap 2 | from tabulate import tabulate 3 | import pymysql 4 | 5 | def execute_index_query(mysql_settings, database, table_name, index_columns): 6 | index_columns = index_columns 7 | index_columns = index_columns.split(',') 8 | updated_columns = [f"'{column.strip()}'" for column in index_columns] 9 | final_columns = ', '.join(updated_columns) 10 | sql = f"SELECT TABLE_NAME,INDEX_NAME,COLUMN_NAME,CARDINALITY FROM information_schema.STATISTICS WHERE TABLE_SCHEMA = '{database}' AND TABLE_NAME = '{table_name}' AND COLUMN_NAME IN ({final_columns})" 11 | #print(sql) 12 | try: 13 | conn = pymysql.connect(**mysql_settings) 14 | cur = conn.cursor() 15 | cur.execute(sql) 16 | index_result = cur.fetchall() 17 | 18 | if not index_result: 19 | print(f"没有检测到 {table_name} 表 字段 {final_columns} 有索引。") 20 | 21 | # 提取列名 22 | e_column_names = [desc[0] for desc in cur.description] 23 | 24 | # 提取结果值并进行自动换行处理 25 | e_result_values = [] 26 | for row in index_result: 27 | values = list(row.values()) 28 | wrapped_values = [textwrap.fill(str(value), width=30) for value in values] 29 | e_result_values.append(wrapped_values) 30 | 31 | # 将结果格式化为表格(包含竖线) 32 | e_table = tabulate(e_result_values, headers=e_column_names, tablefmt="grid", numalign="left") 33 | 34 | return e_table 35 | 36 | except pymysql.err.ProgrammingError as e: 37 | print("MySQL 内部错误:",e) 38 | return None 39 | except Exception as e: 40 | print("MySQL 内部错误:",e) 41 | return None 42 | finally: 43 | if cur: 44 | cur.close() 45 | if conn: 46 | conn.close() 47 | 48 | ######################################################### 49 | 50 | def check_index_exist(mysql_settings, table_name, index_column): 51 | show_index_sql = f"show index from {table_name} where Column_name = '{index_column}'" 52 | try: 53 | conn = pymysql.connect(**mysql_settings) 54 | cur = conn.cursor() 55 | cur.execute(show_index_sql) 56 | index_result = cur.fetchall() 57 | 58 | #if not index_result: 59 | #print(f"没有检测到 {table_name} 表 字段 {final_columns} 有索引。") 60 | 61 | return index_result 62 | 63 | except pymysql.err.ProgrammingError as e: 64 | print("MySQL 内部错误:",e) 65 | return None 66 | except Exception as e: 67 | print("MySQL 内部错误:",e) 68 | return None 69 | finally: 70 | if cur: 71 | cur.close() 72 | if conn: 73 | conn.close() 74 | 75 | ######################################################### 76 | 77 | def check_index_exist_multi(mysql_settings, database, table_name, index_columns, index_number): 78 | index_columns = index_columns 79 | index_columns = index_columns.split(',') 80 | updated_columns = [f"'{column.strip()}'" for column in index_columns] 81 | final_columns = ', '.join(updated_columns) 82 | sql = f"SELECT TABLE_NAME,INDEX_NAME,COLUMN_NAME,CARDINALITY FROM information_schema.STATISTICS WHERE TABLE_SCHEMA = '{database}' AND TABLE_NAME = '{table_name}' AND COLUMN_NAME IN ({final_columns}) GROUP BY INDEX_NAME HAVING COUNT(INDEX_NAME) = {index_number}" 83 | #print(sql) 84 | try: 85 | conn = pymysql.connect(**mysql_settings) 86 | cur = conn.cursor() 87 | cur.execute(sql) 88 | index_result = cur.fetchall() 89 | 90 | if not index_result: 91 | return None 92 | 93 | return index_result 94 | 95 | except pymysql.err.ProgrammingError as e: 96 | print("MySQL 内部错误:",e) 97 | return None 98 | except Exception as e: 99 | print("MySQL 内部错误:",e) 100 | return None 101 | finally: 102 | if cur: 103 | cur.close() 104 | if conn: 105 | conn.close() 106 | 107 | -------------------------------------------------------------------------------- /src/sqlai.py: -------------------------------------------------------------------------------- 1 | from vanna.remote import VannaDefault 2 | 3 | """ 4 | pip3 install vanna==0.0.36 -i "http://mirrors.aliyun.com/pypi/simple" --trusted-host "mirrors.aliyun.com" 5 | """ 6 | 7 | def optimize_sql(original_sql): 8 | # 创建 VannaDefault 实例 9 | vn = VannaDefault(model='sql_helper', api_key='xxxxxxxxxxxxxxxxxxxx') 10 | 11 | # 输出调用信息 12 | print('以下是调用的vanna.ai LLM接口.') 13 | print('优化前的SQL是:') 14 | print(original_sql) 15 | print('-' * 55) 16 | 17 | # 输出优化后的 SQL 18 | print('优化后的SQL是:') 19 | 20 | # 调用 VannaDefault 的 ask 方法 21 | vn.ask('How to optimize this SQL : {}'.format(original_sql)) 22 | -------------------------------------------------------------------------------- /src/sqlai_helper.py: -------------------------------------------------------------------------------- 1 | import sys, re 2 | import textwrap 3 | from tabulate import tabulate 4 | import pymysql 5 | from sql_metadata import Parser 6 | from sql_format_class import SQLFormatter 7 | from sql_alias import has_table_alias 8 | from sql_count_value import count_column_value, count_column_clause_value 9 | from sql_index import execute_index_query, check_index_exist, check_index_exist_multi 10 | from where_clause import * # 1.1版本-新增where条件表达式值 11 | from sql_extra import * 12 | import yaml 13 | import argparse 14 | from sqlai import * 15 | 16 | # 创建命令行参数解析器 17 | parser = argparse.ArgumentParser() 18 | 19 | #------------ 20 | # 添加-f/--file参数,用于指定db.yaml文件的路径 21 | parser.add_argument("-f", "--file", help="Path to db.yaml file") 22 | 23 | # 添加参数,用于指定MySQL的主机名 24 | parser.add_argument("-H", "--host", help="MySQL host") 25 | 26 | # 添加参数,用于指定MySQL的端口号 27 | parser.add_argument("-P", "--port", default=3306, type=int, help="MySQL port(default: 3306)") 28 | 29 | # 添加参数,用于指定MySQL的用户名 30 | parser.add_argument("-u", "--user", help="MySQL user") 31 | 32 | # 添加参数,用于指定MySQL的密码 33 | parser.add_argument("-p", "--password", help="MySQL password") 34 | 35 | # 添加参数,用于指定MySQL的数据库名 36 | parser.add_argument("-d", "--database", help="MySQL database name") 37 | 38 | #----------- 39 | 40 | # 添加--sql参数 41 | parser.add_argument("-q", "--sql", required=True, help="SQL query") 42 | 43 | # 添加--sample参数,默认值为100000,表示10万行 44 | parser.add_argument("--sample", default=100000, type=int, help="Number of rows to sample (default: 100000)") 45 | 46 | # 添加版本号参数 47 | parser.add_argument('-v', '--version', action='version', version='sqlai_helper工具版本号: 2.1.3,更新日期:2024-10-10 <-> 支持SQL改写,合并LLM模型接口') 48 | 49 | # 解析命令行参数 50 | args = parser.parse_args() 51 | 52 | # 获取样本数据行数 53 | #sample_size = args.sample 54 | 55 | if args.file: 56 | # 如果使用了-f参数,则必须只用-f参数且不能再使用其他MySQL配置参数 57 | if args.host or args.user or args.password or args.database: 58 | parser.error("-f/--file参数与其他MySQL配置参数不能共存。") 59 | 60 | # 获取传入的db.yaml文件路径 61 | file_path = args.file 62 | 63 | # 从外部的db.yaml文件加载配置 64 | with open(file_path, "r") as f: 65 | db_config = yaml.safe_load(f) 66 | 67 | # 使用加载的配置赋值给mysql_settings 68 | mysql_settings = { 69 | "host": db_config["host"], 70 | "port": db_config["port"], 71 | "user": db_config["user"], 72 | "passwd": db_config["passwd"], 73 | "database": db_config["database"], 74 | "cursorclass": pymysql.cursors.DictCursor 75 | } 76 | else: 77 | # 如果没有使用-f参数,则必须指定所有MySQL配置参数 78 | if not all([args.host, args.user, args.password, args.database]): 79 | parser.error("必须指定所有MySQL配置参数,包括-H/--host、-P/--port、-u/--user、-p/--password和-d/--database。") 80 | 81 | # 获取MySQL的配置信息 82 | mysql_settings = { 83 | "host": args.host, 84 | "port": args.port, 85 | "user": args.user, 86 | "passwd": args.password, 87 | "database": args.database, 88 | "cursorclass": pymysql.cursors.DictCursor 89 | } 90 | 91 | #################################################################### 92 | # 获取样本数据行数 93 | sample_size = args.sample 94 | 95 | # 获取传入的sql_query的值 96 | sql_query = args.sql 97 | 98 | print("\n1) 你刚才输入的SQL语句是:") 99 | print("-" * 100) 100 | # 美化SQL 101 | formatter = SQLFormatter() 102 | formatted_sql = formatter.format_sql(sql_query) 103 | print(formatted_sql) 104 | print("-" * 100) 105 | 106 | ########################################################################### 107 | # 解析SQL,识别出表名和字段名 108 | try: 109 | parser = Parser(sql_query) 110 | table_names = parser.tables 111 | # print(f"表名是: {table_names}") 112 | table_aliases = parser.tables_aliases 113 | data = parser.columns_dict 114 | select_fields = data.get('select', []) 115 | join_fields = data.get('join', []) 116 | where_fields = data.get('where', []) 117 | # print(f"WHERE字段是:{where_fields}") 118 | order_by_fields = data.get('order_by', []) 119 | group_by_fields = data.get('group_by', []) 120 | if 'SELECT' not in sql_query.upper(): 121 | print("sql_helper工具仅支持select语句") 122 | sys.exit(1) 123 | except Exception as e: 124 | print("解析 SQL 出现语法错误:", str(e)) 125 | sys.exit(2) 126 | 127 | ########################################################################### 128 | conn = pymysql.connect(**mysql_settings) 129 | cur = conn.cursor() 130 | 131 | sql = f"EXPLAIN {sql_query}" 132 | try: 133 | cur.execute(sql) 134 | except pymysql.err.ProgrammingError as e: 135 | print("MySQL 内部错误:", e) 136 | sys.exit(1) 137 | except Exception as e: 138 | print("MySQL 内部错误:", e) 139 | sys.exit(1) 140 | explain_result = cur.fetchall() 141 | 142 | # 提取列名 143 | e_column_names = list(explain_result[0].keys()) 144 | 145 | # 提取结果值并进行自动换行处理 146 | e_result_values = [] 147 | for row in explain_result: 148 | values = list(row.values()) 149 | wrapped_values = [textwrap.fill(str(value), width=20) for value in values] 150 | e_result_values.append(wrapped_values) 151 | 152 | # 将结果格式化为表格(包含竖线) 153 | e_table = tabulate(e_result_values, headers=e_column_names, tablefmt="grid", numalign="left") 154 | 155 | print("\n") 156 | print("2) EXPLAIN执行计划:") 157 | print(e_table) 158 | print("\n") 159 | print("3) 索引优化建议:") 160 | print("-" * 100) 161 | ########################################################################### 162 | 163 | contains_dot = False 164 | # 判断有无where条件 165 | if len(where_fields) == 0: 166 | print(f"你的SQL没有where条件.") 167 | else: 168 | contains_dot = any('.' in field for field in where_fields) 169 | 170 | # 判断如果SQL里包含on,检查on后面的字段是否有索引。 171 | if len(join_fields) != 0: 172 | table_field_dict = {} 173 | 174 | for field in join_fields: 175 | table_field = field.split('.') 176 | if len(table_field) == 2: 177 | table_name = table_field[0] 178 | field_name = table_field[1] 179 | if table_name not in table_field_dict: 180 | table_field_dict[table_name] = [] 181 | table_field_dict[table_name].append(field_name) 182 | 183 | for table_name, on_columns in table_field_dict.items(): 184 | for on_column in on_columns: 185 | show_index_sql = f"show index from {table_name} where Column_name = '{on_column}'" 186 | cur.execute(show_index_sql) 187 | index_result = cur.fetchall() 188 | if not index_result: 189 | print("join联表查询,on关联字段必须增加索引!") 190 | print(f"\033[91m需要添加索引:ALTER TABLE {table_name} ADD INDEX idx_{on_column}({on_column});\033[0m\n") 191 | print(f"【{table_name}】表 【{on_column}】字段,索引分析:") 192 | index_static = execute_index_query(mysql_settings, database=mysql_settings["database"], 193 | table_name=table_name, index_columns=on_column) 194 | print(index_static) 195 | 196 | # 解析执行计划,查找需要加索引的字段 197 | for row in explain_result: 198 | # 获取查询语句涉及的表和字段信息 199 | table_name = row['table'] 200 | if table_name.lower().startswith('= 1)) or (len(join_fields) == 0 and ((row['type'] == 'ALL' and row['key'] is None) or int(row['rows']) >= 1000)): 211 | # 判断表是否有别名,没有别名的情况: 212 | if has_table_alias(table_aliases) is False and contains_dot is False: 213 | if len(where_fields) != 0: 214 | for where_field in where_fields: 215 | # 1.1版本-新增where条件表达式值 216 | where_clause_value = parse_where_condition(formatted_sql, where_field) 217 | if where_clause_value is not None: 218 | where_clause_value = where_clause_value.replace('\n', '').replace('\r', '') 219 | where_clause_value = re.sub(r'\s+', ' ', where_clause_value) 220 | Cardinality = count_column_clause_value(table_name, where_field, where_clause_value, mysql_settings, sample_size) 221 | else: 222 | Cardinality = count_column_value(table_name, where_field, mysql_settings, sample_size) 223 | if Cardinality: 224 | count_value = Cardinality[0]['count'] 225 | if where_clause_value is not None: 226 | print( 227 | f"取出表 【{table_name}】 where条件表达式 【{where_clause_value}】 {sample_size} 条记录,重复的数据有:【{count_value}】 条,没有必要为该字段创建索引。") 228 | else: 229 | print( 230 | f"取出表 【{table_name}】 where条件字段 【{where_field}】 {sample_size} 条记录,重复的数据有:【{count_value}】 条,没有必要为该字段创建索引。") 231 | else: 232 | add_index_fields.append(where_field) 233 | 234 | if group_by_fields is not None and len(group_by_fields) != 0: 235 | for group_field in group_by_fields: 236 | #print(f"调试-表名:{table_name}, 分组字段:{group_field}") 237 | Cardinality = count_column_value(table_name, group_field, mysql_settings, sample_size) 238 | if Cardinality: 239 | count_value = Cardinality[0]['count'] 240 | print( 241 | f"取出表 【{table_name}】 group by条件字段 【{group_field}】 {sample_size} 条记录,重复的数据有:【{count_value}】 条,没有必要为该字段创建索引。") 242 | else: 243 | add_index_fields.append(group_field) 244 | 245 | if len(order_by_fields) != 0: 246 | for order_field in order_by_fields: 247 | Cardinality = count_column_value(table_name, order_field, mysql_settings, sample_size) 248 | if Cardinality: 249 | count_value = Cardinality[0]['count'] 250 | print( 251 | f"取出表 【{table_name}】 order by条件字段 【{order_field}】 {sample_size} 条记录,重复的数据有:【{count_value}】 条,没有必要为该字段创建索引。") 252 | else: 253 | add_index_fields.append(order_field) 254 | 255 | # add_index_fields = list(set(add_index_fields)) # 字段名如果一样,则去重 256 | add_index_fields = list(dict.fromkeys(add_index_fields).keys()) # 字段名如果一样,则去重,并确保元素的位置不发生改变 257 | 258 | if len(add_index_fields) == 0: 259 | if 'index_result' not in globals(): 260 | print(f"\n\u2192 \033[1;92m【{table_name}】 表,无需添加任何索引。\033[0m\n") 261 | elif index_result: 262 | print(f"\n\u2192 \033[1;92m【{table_name}】 表,无需添加任何索引。\033[0m\n") 263 | else: 264 | pass 265 | elif len(add_index_fields) == 1: 266 | index_name = add_index_fields[0] 267 | index_columns = add_index_fields[0] 268 | index_result = check_index_exist(mysql_settings, table_name=table_name, index_column=index_columns) 269 | if not index_result: 270 | if row['key'] is None: 271 | print( 272 | f"\033[93m建议添加索引:ALTER TABLE {table_name} ADD INDEX idx_{index_name}({index_columns});\033[0m") 273 | elif row['key'] is not None and row['rows'] >= 1: 274 | print( 275 | f"\033[93m建议添加索引:ALTER TABLE {table_name} ADD INDEX idx_{index_name}({index_columns});\033[0m") 276 | else: 277 | print(f"\n\u2192 \033[1;92m【{table_name}】表 【{index_columns}】字段,索引已经存在,无需添加任何索引。\033[0m") 278 | print(f"\n【{table_name}】表 【{index_columns}】字段,索引分析:") 279 | index_static = execute_index_query(mysql_settings, database=mysql_settings["database"], 280 | table_name=table_name, index_columns=index_columns) 281 | print(index_static) 282 | print() 283 | else: 284 | merged_name = '_'.join(add_index_fields) 285 | merged_columns = ','.join(add_index_fields) 286 | index_result_list = check_index_exist_multi(mysql_settings, database=mysql_settings["database"], 287 | table_name=table_name, index_columns=merged_columns, 288 | index_number=len(add_index_fields)) 289 | if index_result_list is None: 290 | if row['key'] is None: 291 | print( 292 | f"\033[93m建议添加索引:ALTER TABLE {table_name} ADD INDEX idx_{merged_name}({merged_columns});\033[0m") 293 | elif row['key'] is not None and row['rows'] >= 1: 294 | print( 295 | f"\033[93m建议添加索引:ALTER TABLE {table_name} ADD INDEX idx_{merged_name}({merged_columns});\033[0m") 296 | else: 297 | print(f"\n\u2192 \033[1;92m【{table_name}】表 【{merged_columns}】字段,联合索引已经存在,无需添加任何索引。\033[0m") 298 | print(f"\n【{table_name}】表 【{merged_columns}】字段,索引分析:") 299 | index_static = execute_index_query(mysql_settings, database=mysql_settings["database"], 300 | table_name=table_name, index_columns=merged_columns) 301 | print(index_static) 302 | print() 303 | 304 | # 判断表是否有别名,有别名的情况: 305 | if has_table_alias(table_aliases) is True or contains_dot is True: 306 | if has_table_alias(table_aliases) is True: 307 | try: 308 | table_real_name = table_aliases[table_name] 309 | except KeyError: 310 | if table_name.startswith('= 1: 395 | print( 396 | f"\033[93m建议添加索引:ALTER TABLE {table_real_name} ADD INDEX idx_{index_name}({index_columns});\033[0m") 397 | else: 398 | print(f"\n\u2192 \033[1;92m【{table_real_name}】表 【{index_columns}】字段,索引已经存在,无需添加任何索引。\033[0m") 399 | print(f"\n【{table_real_name}】表 【{index_columns}】字段,索引分析:") 400 | index_static = execute_index_query(mysql_settings, database=mysql_settings["database"], 401 | table_name=table_real_name, index_columns=index_columns) 402 | print(index_static) 403 | print() 404 | else: 405 | merged_name = '_'.join(add_index_fields) 406 | merged_columns = ','.join(add_index_fields) 407 | index_result_list = check_index_exist_multi(mysql_settings, database=mysql_settings["database"], 408 | table_name=table_real_name, index_columns=merged_columns, 409 | index_number=len(add_index_fields)) 410 | if index_result_list is None: 411 | if row['key'] is None: 412 | print( 413 | f"\033[93m建议添加索引:ALTER TABLE {table_real_name} ADD INDEX idx_{merged_name}({merged_columns});\033[0m") 414 | elif row['key'] is not None and row['rows'] >= 1: 415 | print( 416 | f"\033[93m建议添加索引:ALTER TABLE {table_real_name} ADD INDEX idx_{merged_name}({merged_columns});\033[0m") 417 | else: 418 | print(f"\n\u2192 \033[1;92m【{table_real_name}】表 【{merged_columns}】字段,联合索引已经存在,无需添加任何索引。\033[0m") 419 | print(f"\n【{table_real_name}】表 【{merged_columns}】字段,索引分析:") 420 | index_static = execute_index_query(mysql_settings, database=mysql_settings["database"], 421 | table_name=table_real_name, index_columns=merged_columns) 422 | print(index_static) 423 | print() 424 | 425 | # 关闭游标和连接 426 | cur.close() 427 | conn.close() 428 | 429 | print("\n") 430 | print("4) 额外的建议:") 431 | print("-" * 100) 432 | 433 | where_clause = parse_where_condition_full(formatted_sql) 434 | #print(f"where子句:{where_clause}") 435 | if where_clause: 436 | like_r, like_expression = check_percent_position(where_clause) 437 | if like_r is True: 438 | print(f"like模糊匹配,百分号在首位,【{like_expression}】是不能用到索引的,例如like '%张三%',可以考虑改成like '张三%',这样是可以用到索引的,如果业务上不能改,可以考虑用全文索引。\n") 439 | 440 | function_r = extract_function_index(where_clause) 441 | if function_r is not False: 442 | print(f"索引列使用了函数作计算:【{function_r}】,会导致索引失效。" 443 | f"如果你是MySQL 8.0可以考虑创建函数索引;如果你是MySQL 5.7,你要更改你的SQL逻辑了。\n") 444 | 445 | print("\n") 446 | print("\033[1m5) AI的建议:\033[0m") 447 | print("-" * 100) 448 | optimized_sql = optimize_sql(formatted_sql) 449 | -------------------------------------------------------------------------------- /src/where_clause.py: -------------------------------------------------------------------------------- 1 | import sqlparse 2 | 3 | def parse_where_condition(sql, column): 4 | parsed = sqlparse.parse(sql) 5 | stmt = parsed[0] 6 | 7 | where_column = "" 8 | where_expression = "" 9 | where_value = "" 10 | where_clause = "" 11 | found = False 12 | result = "" 13 | 14 | for token in stmt.tokens: 15 | if isinstance(token, sqlparse.sql.Where): 16 | where_clause = token.value 17 | conditions = [] 18 | for cond_tok in token.tokens: 19 | if isinstance(cond_tok, sqlparse.sql.Comparison): 20 | left_token = cond_tok.left.value.strip() 21 | if column == left_token: 22 | #return f"比较运算符: {cond_tok.value.strip()}" 23 | return cond_tok.value.strip() 24 | 25 | if isinstance(cond_tok, sqlparse.sql.Identifier) and cond_tok.value == column: 26 | found = True 27 | 28 | if found: 29 | if isinstance(cond_tok, sqlparse.sql.Token) and cond_tok.value.upper() in ["OR","AND"]: 30 | break 31 | else: 32 | result += cond_tok.value 33 | 34 | if isinstance(cond_tok, sqlparse.sql.Parenthesis) and found: 35 | break 36 | 37 | if len(result) != 0: 38 | #return f"逻辑运算符: {result.strip()}" 39 | return result.strip() 40 | else: 41 | #return "没有找到该字段的条件表达式" 42 | return None 43 | 44 | 45 | def parse_where_condition_full(sql): 46 | parsed = sqlparse.parse(sql) 47 | stmt = parsed[0] 48 | 49 | where_clause = "" 50 | found = False 51 | 52 | for token in stmt.tokens: 53 | if found: 54 | where_clause += token.value 55 | if isinstance(token, sqlparse.sql.Where): 56 | found = True 57 | where_clause += token.value 58 | 59 | return where_clause.strip() if where_clause else None 60 | 61 | -------------------------------------------------------------------------------- /test.yaml: -------------------------------------------------------------------------------- 1 | host: 192.168.198.239 2 | port: 3306 3 | user: admin 4 | passwd: 123456 5 | database: test 6 | -------------------------------------------------------------------------------- /web/sql_helper/README.md: -------------------------------------------------------------------------------- 1 | ### sql_helper Web端接口(适用于Centos7 系统) 2 | 3 | 一、环境搭建 4 | 5 | ``` 6 | shell> yum install httpd php php-mysqlnd 7 | shell> systemctl restart httpd.service 8 | ``` 9 | 10 | 1) 把 https://github.com/hcymysql/sql_helper/archive/refs/heads/main.zip 安装包解压缩到 /var/www/html/目录下 11 | 12 | 目录结构如下: 13 | ``` 14 | sql_helper 15 | ├── conn.php 16 | ├── css 17 | │   └── bootstrap.min.css 18 | ├── schema 19 | │   └── sql_helper_schema.sql 20 | ├── sql_helper_args 21 | ├── sql_helper.php 22 | └── sql_helper_result.php 23 | ``` 24 | 25 | 2)赋予sql_helper_args文件可执行权限 26 | ``` 27 | shell> cd /var/www/html/sql_helper/ 28 | shell> chmod 755 sql_helper_args 29 | ``` 30 | 31 | 二、sql_helper 网页端部署 32 | 33 | 1)导入sql_helper工具表结构 34 | ``` 35 | shell> mysql -uroot -p123456 < ./schema/sql_helper_schema.sql 36 | ``` 37 | 38 | 2)录入你线上的数据库信息(你可以填写从库信息) 39 | ``` 40 | mysql> insert into sql_helper.dbinfo values(1,'192.168.0.11','test','admin','123456',3306); 41 | ``` 42 | 43 | 3)配置conn.php文件 44 | 45 | -- 改成你的sql_helper库连接信息 46 | 47 | 4)页面访问 48 | 49 | http://yourIP/sql_helper/sql_helper.php 50 | 51 | 加一个超链接,可方便地接入你们的自动化运维平台里。 52 | 53 | ![image](https://github.com/hcymysql/sql_helper/assets/19261879/5657b325-0e0e-460f-a9c7-9891f9730bb3) 54 | 55 | ![image](https://github.com/hcymysql/sql_helper/assets/19261879/1b9dfc6f-4542-406a-9cc4-0c61c44e80be) 56 | 57 | 58 | 59 | 60 | 61 | 62 | -------------------------------------------------------------------------------- /web/sql_helper/conn.php: -------------------------------------------------------------------------------- 1 | 7 | -------------------------------------------------------------------------------- /web/sql_helper/schema/sql_helper_schema.sql: -------------------------------------------------------------------------------- 1 | CREATE DATABASE IF NOT EXISTS `sql_helper`; 2 | 3 | USE `sql_helper`; 4 | 5 | DROP TABLE IF EXISTS `dbinfo`; 6 | 7 | CREATE TABLE `dbinfo` ( 8 | `id` int(11) NOT NULL AUTO_INCREMENT, 9 | `ip` varchar(100) DEFAULT NULL, 10 | `dbname` varchar(100) DEFAULT NULL, 11 | `user` varchar(500) DEFAULT NULL, 12 | `pwd` varchar(500) DEFAULT NULL, 13 | `port` int(11) DEFAULT NULL, 14 | PRIMARY KEY (`id`), 15 | KEY dbname (`dbname`) 16 | ) ENGINE=InnoDB AUTO_INCREMENT=1 DEFAULT CHARSET=utf8; 17 | -------------------------------------------------------------------------------- /web/sql_helper/sql_helper.php: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | SQL自助查询优化 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 |
15 |
16 |

注:选择你的数据库,然后提交SQL,平台会自动返回优化建议。例如你可以将线上跑的比较慢的SQL,拿到平台里执行,平台会根据你的SQL自动反馈优化建议。

17 |
18 | SQL自助查询优化(自动反馈优化建议) 19 |
20 |
21 |
22 |
23 |
24 |
25 | 26 | 35 | 45 |
46 |
47 | 48 |
49 |
50 |
51 |
52 | 53 |
54 | 55 |
56 |
57 |
58 |
59 | 60 | 61 | 62 | -------------------------------------------------------------------------------- /web/sql_helper/sql_helper_result.php: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | SQL自助查询优化 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | SQL 查询结果'; 39 | 40 | // 使用
 标签将输出保留原始格式
41 | echo '
'; 42 | echo '
'; 43 | echo '
' . $output . '
'; 44 | 45 | echo '
点击此处返回

'; 46 | echo '
'; 47 | 48 | echo "
"; 49 | echo "
"; 50 | 51 | ?> 52 | 53 | 54 | 55 | --------------------------------------------------------------------------------