├── Dataset
├── Dataset 1
│ ├── Benign
│ │ └── benign_1500_6_mixing.csv
│ ├── README.md
│ ├── Trojan
│ │ ├── AhMyth_wireshark_1500.csv
│ │ ├── AndroidRAT_wireshark_1500.csv
│ │ ├── AndroidTester_wireshark_1500.csv
│ │ ├── Droidjack_wireshark_1500.csv
│ │ ├── Hawkshaw_wireshark_1500.csv
│ │ ├── Spymax_wireshark_1500.csv
│ │ └── trojan_1500_6.csv
│ └── benign_trojan_18000_model_1_transformed.csv
└── Dataset 2
│ ├── README.md
│ ├── benign
│ └── benign_75_6_or_150_3_mixing.csv
│ └── trojan
│ ├── AhMyth_75.csv
│ ├── AndroidRAT_75.csv
│ ├── AndroidTester_75.csv
│ ├── DroidJack_75.csv
│ ├── HawkShaw_75.csv
│ └── SpyMax_75.csv
├── README.md
├── Remote Access Trojan Detection On Android Based On Network Traffic Observation Using Machine Learning.pdf
└── Testing Scratch
├── Dataset 1
├── Decision Tree
│ ├── Decision Tree Scratch.ipynb
│ ├── Decision Trees - Model 1 - 2021.rmp
│ ├── DecisionTreeClassifier with sklearn - Dataset 1 - testingv01.ipynb
│ ├── DecisionTreeClassifier with sklearn - Dataset 1 - testingv02.ipynb
│ ├── DecisionTreeClassifier with sklearn - Dataset 1 - testingv03.ipynb
│ └── Images
│ │ ├── Decision Tree - RapidMiner.PNG
│ │ ├── Decision Tree Result - Python (Jupyter) vs RapidMiner.PNG
│ │ ├── imagename.png
│ │ ├── imagenamev02.png
│ │ └── imagenamev03.png
├── Naive Bayes
│ ├── Images
│ │ ├── Naive Bayes - Rapid Miner.PNG
│ │ └── Naive Bayes Result - Python (Jupyter) vs RapidMiner.PNG
│ ├── Naive Bayes - Model 1 - 2021.rmp
│ ├── NaiveBayesClassifier with sklearn - Dataset 1 - testingv01.ipynb
│ ├── NaiveBayesClassifier with sklearn - Dataset 1 - testingv02.ipynb
│ └── NaiveBayesClassifier with sklearn - Dataset 1 - testingv03.ipynb
├── README.md
├── Random Forest
│ ├── Images
│ │ ├── Random Forest - Rapid Miner.PNG
│ │ └── Random Forest Result - Python (Jupyter) vs RapidMiner.PNG
│ ├── Random Forest - Model 1 - 2021.rmp
│ ├── RandomForestClassifier with sklearn - Dataset 1 - testingv01.ipynb
│ ├── RandomForestClassifier with sklearn - Dataset 1 - testingv02.ipynb
│ └── RandomForestClassifier with sklearn - Dataset 1 - testingv03.ipynb
├── testingv01.csv
├── testingv02.csv
├── testingv03.csv
└── training.csv
└── Dataset 2
├── Decision Tree
├── DecisionTreeClassifier with sklearn - Dataset 1 - testingv01.ipynb
├── DecisionTreeClassifier with sklearn - Dataset 1 - testingv02.ipynb
├── DecisionTreeClassifier with sklearn - Dataset 1 - testingv03.ipynb
└── Images
│ ├── Decision Tree Result dataset 2- Python (Jupyter) vs RapidMiner.PNG
│ ├── RapidMiner Process Model.PNG
│ ├── imagename01.png
│ ├── imagename02.png
│ └── imagename03.png
├── Naive Bayes
├── Images
│ ├── Naive Bayes result dataset2 - Python (Jupyter) vs RapidMiner.png
│ └── RapidMiner Naive Bayes Process Model.PNG
├── NaiveBayesClassifier with sklearn - Dataset 1 - testingv01.ipynb
├── NaiveBayesClassifier with sklearn - Dataset 1 - testingv02.ipynb
└── NaiveBayesClassifier with sklearn - Dataset 1 - testingv03.ipynb
├── README.md
├── Random Forest
├── Images
│ ├── Random Forest result dataset 2 Python (Jupyter) vs RapidMiner.PNG
│ └── RapidMiner Random Forest Model.PNG
├── RandomForestClassifier with sklearn - Dataset 1 - testingv01.ipynb
├── RandomForestClassifier with sklearn - Dataset 1 - testingv02.ipynb
└── RandomForestClassifier with sklearn - Dataset 1 - testingv03.ipynb
├── testingv1.csv
├── testingv2.csv
├── testingv3.csv
└── training.csv
/Dataset/Dataset 1/README.md:
--------------------------------------------------------------------------------
1 | # csv
2 | all csv file contains network traffic packet
3 |
4 | benign_trojan_18000_model_1_transformed.csv
5 |
6 | # ==== Protocol ====
7 |
8 | - SMPP = 9
9 | - GQUIC = 8
10 | - KNXnet/IP = 7
11 | - WebSocket = 6
12 | - ICMP = 5
13 | - HTTP = 4
14 | - TLSv1.3 = 3
15 | - TLSv1.2 = 2
16 | - tcp = 1
17 | - udp = 0
18 |
19 | # ==== Type ====
20 |
21 | - out 0
22 | - in 1
23 |
24 | # ===== LABEL =====
25 |
26 | - Benign 0
27 | - Trojan 1
28 |
29 | # Source,Destination,Protocol,Length,Type,Duration,Label
30 |
31 | - Source is port packet source
32 | - Destination is port packet Destination
33 | - Protocol
34 | - Length
35 | - Type based on IP, if source local to external destination means "out"
36 | - Duration is based on time current packet to previous packet (from A to B)
37 |
--------------------------------------------------------------------------------
/Dataset/Dataset 2/README.md:
--------------------------------------------------------------------------------
1 | # csv
2 | all csv file contains network traffic per 20packet to 1 row
3 |
4 | # example
5 | 20 row AhMyth traffic packet transform to 1 row package, means 75 row package = 1500 row packet
--------------------------------------------------------------------------------
/Dataset/Dataset 2/trojan/AhMyth_75.csv:
--------------------------------------------------------------------------------
1 | total_length_out,total_length_in,avg_packet_length_out,avg_packet_length_in,avg_duration_out,avg_duration_in,total_duration
2 | 2217,993,158.3571429,165.5,0.534560643,0.5021735,10.49689
3 | 704,757,64,84.11111111,4.531191909,5.064115667,95.420152
4 | 555,532,55.5,53.2,10.8981632,4.3171529,152.153161
5 | 742,378,53,63,6.5037385,3.231654833,110.442268
6 | 6001,1556,545.5454545,172.8888889,1.026273818,6.662169889,71.248541
7 | 2153,1045,165.6153846,149.2857143,0.118072231,0.058974,1.947757
8 | 555,812,55.5,81.2,7.4964943,4.11955,116.160443
9 | 633,758,63.3,68.90909091,6.0886212,3.545336,89.667757
10 | 4436,471,369.6666667,58.875,6.266031917,2.392116875,94.329318
11 | 853,217,53.3125,54.25,4.929131875,3.90058925,94.468467
12 | 4570,508,380.8333333,63.5,4.184390417,4.225012625,84.012786
13 | 1562,641,173.5555556,58.27272727,8.441892444,6.549571636,148.02232
14 | 6008,1622,546.1818182,180.2222222,2.611064818,0.210755222,30.61851
15 | 4792,540,368.6153846,77.14285714,4.398488,4.701446,90.090466
16 | 18200,364,1400,52,0.000222692,0.000210286,0.004367
17 | 18200,364,1400,52,0.000252231,0.000224857,0.004853
18 | 19600,312,1400,52,0.000277786,0.000137833,0.004716
19 | 8792,471,799.2727273,52.33333333,0.513871727,0.149289667,6.996196
20 | 18200,364,1400,52,0.000197,0.000245571,0.00428
21 | 18200,364,1400,52,0.000232538,0.000224429,0.004594
22 | 18200,364,1400,52,0.000162846,0.000385714,0.004817
23 | 19600,312,1400,52,0.000189071,0.000329833,0.004626
24 | 7316,468,665.0909091,52,0.699906182,0.000281667,7.701503
25 | 18196,364,1399.692308,52,0.000237,0.436016571,3.055197
26 | 19600,312,1400,52,0.000253,0.0002155,0.004835
27 | 18200,364,1400,52,0.000176615,0.000352571,0.004764
28 | 18200,364,1400,52,0.000200692,0.000311,0.004786
29 | 7160,468,716,52,0.7576526,0.231896667,9.663731
30 | 16952,364,1304,52,0.000151385,0.000364286,0.004518
31 | 19600,312,1400,52,0.000179429,0.000379,0.004786
32 | 18200,364,1400,52,0.000222846,0.000268714,0.004778
33 | 16800,364,1400,52,0.000241083,0.000228286,0.004536
34 | 16852,364,1296.307692,52,0.016553769,0.055523857,0.603866
35 | 18196,364,1399.692308,52,0.000181615,0.000298571,0.004451
36 | 19600,312,1400,52,0.000215643,0.000323667,0.004961
37 | 18200,364,1400,52,0.000232462,0.000282,0.004996
38 | 15524,364,1194.153846,52,0.016619231,0.010003,0.286071
39 | 18180,364,1398.461538,52,0.000246846,0.000194143,0.004568
40 | 18200,364,1400,52,0.000156846,0.000371,0.004636
41 | 18200,364,1400,52,0.000162923,0.000437714,0.005182
42 | 12744,416,1062,52,0.137838167,0.025451875,1.857673
43 | 12756,364,1063,52,1.10413125,0.034432571,13.49066
44 | 18200,364,1400,52,0.000217385,0.000244,0.004534
45 | 18200,364,1400,52,0.000222923,0.000279429,0.004854
46 | 19600,312,1400,52,0.000182929,0.000357833,0.004708
47 | 8680,468,789.0909091,52,0.289482727,0.000323111,3.187218
48 | 16852,364,1296.307692,52,0.262063769,0.492147429,6.851861
49 | 18180,364,1398.461538,52,0.000208615,0.000332,0.005036
50 | 18200,364,1400,52,0.000236462,0.000286571,0.00508
51 | 19600,312,1400,52,0.000176143,0.0003105,0.00508
52 | 5856,532,585.6,53.2,1.611882,1.0054566,26.173386
53 | 19600,312,1400,52,0.000301929,0.000199833,0.005426
54 | 16880,364,1298.461538,52,0.000218692,0.000189,0.004166
55 | 19600,312,1400,52,0.000199071,0.000360833,0.004952
56 | 18200,364,1400,52,0.000227769,0.000210714,0.004436
57 | 4824,492,438.5454545,54.66666667,0.670273636,0.606590111,12.832321
58 | 1660,496,150.9090909,55.11111111,3.670141182,0.769408111,47.296226
59 | 2333,735,179.4615385,105,3.873147769,1.051354,57.710399
60 | 1824,2396,182.4,239.6,2.4950565,4.5186617,70.137182
61 | 1182,641,118.2,64.1,5.0205738,3.4850297,85.056035
62 | 1080,0,54,0,0.18626495,0,3.725299
63 | 1120,0,56,0,1.34162985,0,26.832597
64 | 1308,112,72.66666667,56,2.736482667,0.1291545,49.514997
65 | 1630,1076,125.3846154,179.3333333,1.920484769,0.6198325,28.693083
66 | 605,565,60.5,56.5,5.0509084,5.5872811,106.381895
67 | 614,483,55.81818182,53.66666667,13.66482609,1.591593667,164.63743
68 | 826,299,55.06666667,59.8,4.0892242,3.7293468,79.985097
69 | 666,434,55.5,54.25,10.60122108,2.32946075,145.850339
70 | 631,597,57.36363636,66.33333333,10.59965909,3.117168556,144.650767
71 | 838,504,69.83333333,63,6.361316333,6.74210575,130.272642
72 | 752,494,62.66666667,61.75,6.73596875,2.417403375,100.170852
73 | 555,538,55.5,53.8,12.5173646,2.2065608,147.239254
74 | 666,431,55.5,53.875,12.51754408,3.129164375,175.243844
75 | 695,467,57.91666667,58.375,8.974165167,2.183049625,125.154379
76 |
--------------------------------------------------------------------------------
/Dataset/Dataset 2/trojan/AndroidRAT_75.csv:
--------------------------------------------------------------------------------
1 | total_length_out,total_length_in,avg_packet_length_out,avg_packet_length_in,avg_duration_out,avg_duration_in,total_duration
2 | 2445,621,222.2727273,69,0.026909818,42.96524767,386.983237
3 | 11562,4157,1051.090909,461.8888889,0.012191091,13.37589489,120.517156
4 | 8841,513,803.7272727,57,0.000461727,10.606716,95.465523
5 | 15747,334,1124.785714,55.66666667,0.011220357,11.0531495,66.475982
6 | 15371,479,1280.916667,59.875,0.00067925,10.41668138,83.341602
7 | 18200,364,1400,52,0.000239385,0.000242714,0.004811
8 | 19600,312,1400,52,0.000203071,0.000347833,0.00493
9 | 18200,364,1400,52,0.000174462,0.000363571,0.004813
10 | 16800,364,1400,52,0.000247,0.000297,0.00509
11 | 17484,364,1344.923077,52,0.000331538,0.013282143,0.097285
12 | 16800,364,1400,52,0.000311583,0.000227571,0.005381
13 | 18200,364,1400,52,0.000334154,0.000243286,0.006047
14 | 16800,364,1400,52,0.0003445,0.000227714,0.006144
15 | 11589,544,1158.9,60.44444444,0.0007427,6.858406778,61.733088
16 | 1537,821,170.7777778,74.63636364,0.007143444,35.71313564,392.908783
17 | 961,697,96.1,58.08333333,6.6972939,10.61556383,130.030605
18 | 891,520,99,52,0.800173667,0.2063735,9.265298
19 | 990,520,99,52,0.8007184,0.2070737,10.077921
20 | 990,520,99,52,0.7997319,0.2080924,10.078243
21 | 990,520,99,52,0.8947421,0.2052349,10.99977
22 | 990,520,99,52,0.8059514,0.2112545,10.172059
23 | 990,520,99,52,0.7892205,0.2123557,10.015762
24 | 990,520,99,52,0.7935952,0.2110402,10.046354
25 | 6058,1038,550.7272727,115.3333333,0.240070545,13.79114133,126.970269
26 | 13724,364,1055.692308,52,0.049071385,0.000714286,0.642928
27 | 14378,312,1027,52,0.045750786,0.0007815,0.645798
28 | 12978,364,998.3076923,52,0.043492077,0.000599286,0.569592
29 | 13724,364,1055.692308,52,0.048558538,0.000698714,0.636152
30 | 14378,312,1027,52,0.046092143,0.000684167,0.649395
31 | 12978,364,998.3076923,52,0.043372385,0.000674857,0.568565
32 | 13724,364,1055.692308,52,0.049418538,0.000599143,0.646635
33 | 14378,312,1027,52,0.046040286,0.000637833,0.648391
34 | 12978,364,998.3076923,52,0.042415692,0.000593857,0.555561
35 | 14378,13724,1027,1055.692308,0.046843357,0.050381,0.659358
36 | 14378,312,1027,52,0.046099857,0.000636333,0.649216
37 | 12978,364,998.3076923,52,0.042718,0.000806429,0.560979
38 | 13724,364,1055.692308,52,0.049306769,0.000785143,0.646484
39 | 14378,312,1027,52,0.045566143,0.000666167,0.641923
40 | 12978,364,998.3076923,52,0.042864538,0.000580857,0.561305
41 | 13724,364,1055.692308,52,0.049315077,0.000611571,0.645377
42 | 14378,312,1027,52,0.046574286,0.000616333,0.655738
43 | 13724,364,1055.692308,52,0.048424231,0.000557714,0.633419
44 | 14378,312,1027,52,0.045951286,0.000695333,0.64749
45 | 12978,364,998.3076923,52,0.042386538,0.000599857,0.555224
46 | 13724,364,1055.692308,52,0.049694154,0.000769,0.651407
47 | 14378,312,1027,52,0.047501429,0.000589833,0.668559
48 | 12978,364,998.3076923,52,0.040882231,0.000599286,0.535664
49 | 13724,364,1055.692308,52,0.051130154,0.000607571,0.668945
50 | 14378,312,1027,52,0.046318286,0.000606333,0.652094
51 | 12978,364,998.3076923,52,0.042541769,0.00061,0.557313
52 | 13724,364,1055.692308,52,0.049003154,0.000707143,0.641126
53 | 14378,312,1027,52,0.0463765,0.000731667,0.653661
54 | 12978,364,998.3076923,52,0.041508769,0.00058,0.543674
55 | 13724,364,1055.692308,52,0.050220385,0.000607857,0.65712
56 | 14378,312,1027,52,0.046739071,0.000577167,0.65781
57 | 12978,364,998.3076923,52,0.041349846,0.000581,0.541615
58 | 13724,364,1055.692308,52,0.050166846,0.000685143,0.656965
59 | 14378,312,1027,52,0.046899929,0.0005995,0.660196
60 | 12978,364,998.3076923,52,0.041375231,0.000697571,0.542761
61 | 14378,364,1027,52,0.047188571,0.000697571,0.542761
62 | 14378,312,1027,52,0.044743786,0.000907,0.631855
63 | 12978,364,998.3076923,52,0.044147692,0.000621571,0.578271
64 | 13724,364,1055.692308,52,0.048606308,0.000622143,0.636237
65 | 14378,312,1027,52,0.046374429,0.000574667,0.65269
66 | 12978,364,998.3076923,52,0.042167692,0.000655,0.552765
67 | 12324,312,1027,52,0.0475185,0.000622,0.665411
68 | 14378,312,1027,52,0.045990143,0.000576667,0.647322
69 | 12978,364,998.3076923,52,0.042159846,0.000544,0.551886
70 | 13724,364,1055.692308,52,0.049094923,0.000575286,0.642261
71 | 14378,312,1027,52,0.047311357,0.000668833,0.666372
72 | 12978,364,998.3076923,52,0.042836154,0.00067,0.56156
73 | 13724,364,1055.692308,52,0.048844538,0.000763143,0.640321
74 | 14378,312,1027,52,0.046235929,0.000714833,0.651592
75 | 14378,364,1027,52,0.045908786,0.000657143,0.647323
76 |
--------------------------------------------------------------------------------
/Dataset/Dataset 2/trojan/AndroidTester_75.csv:
--------------------------------------------------------------------------------
1 | total_length_out,total_length_in,avg_packet_length_out,avg_packet_length_in,avg_duration_out,avg_duration_in,total_duration
2 | 9231,1515,769.25,189.375,0.360779583,0.267285625,6.46764
3 | 684,624,76,56.72727273,0.210657,3.929349818,45.118761
4 | 3643,496,331.1818182,55.11111111,1.481594273,3.530327889,48.070488
5 | 944,577,94.4,57.7,2.2006779,2.0266149,42.272928
6 | 924,594,92.4,59.4,2.1771128,2.0055671,41.826799
7 | 808,594,80.8,59.4,1.3637388,3.4389643,48.027031
8 | 1061,625,106.1,62.5,1.6759973,1.4729539,31.489512
9 | 7009,582,637.1818182,64.66666667,0.932833273,1.599469667,24.656393
10 | 8510,589,773.6363636,65.44444444,1.053315727,1.801155667,27.796874
11 | 11274,582,867.2307692,83.14285714,0.071165231,0.480554143,4.289027
12 | 13291,482,1022.384615,68.85714286,0.072213308,0.126375429,1.823401
13 | 13291,389,1022.384615,55.57142857,0.224917923,0.439618286,6.001261
14 | 3591,531,359.1,59,2.2110927,1.377628444,34.773108
15 | 818,645,90.88888889,58.63636364,1.803340222,1.901697727,37.148737
16 | 1229,539,111.7272727,59.88888889,3.160890364,1.113644333,44.792593
17 | 1300,620,130,68.88888889,0.4842616,2.707477444,29.422324
18 | 772,606,77.2,60.6,2.4744696,2.112631,45.871006
19 | 880,650,88,65,2.5332566,0.4858034,30.1906
20 | 894,589,89.4,58.9,0.8219715,3.3576526,41.796241
21 | 6165,1710,685,155.4545455,1.907112333,0.478253455,22.424799
22 | 3666,489,333.2727273,54.33333333,1.918789273,0.499852111,25.605351
23 | 828,581,75.27272727,64.55555556,1.772894818,1.895952889,36.565419
24 | 669,686,74.33333333,62.36363636,2.773910778,1.761794364,44.344935
25 | 3928,437,327.3333333,54.625,0.98899925,2.479107375,31.70085
26 | 3643,496,331.1818182,55.11111111,2.491597182,2.937299111,53.843261
27 | 4924,605,492.4,60.5,1.0244591,0.8693194,18.937785
28 | 1003,577,91.18181818,64.11111111,0.750321455,2.673619222,32.316109
29 | 710,561,71,56.1,1.3741465,4.387906,57.620525
30 | 700,555,70,55.5,2.7107517,2.2565212,49.672729
31 | 4983,496,453,55.11111111,2.180034091,3.128993667,52.141318
32 | 2436,502,221.4545455,55.77777778,1.598198545,2.421099,39.370075
33 | 779,561,77.9,56.1,1.3426758,2.66267,40.053458
34 | 700,597,70,59.7,1.8277076,4.069385,58.970926
35 | 814,561,81.4,56.1,2.7985368,1.2618496,40.603864
36 | 700,555,70,55.5,2.5712553,3.9395091,65.107644
37 | 793,614,79.3,61.4,1.0604076,2.868772,39.291796
38 | 3617,1013,361.7,112.5555556,2.8431206,1.441117,41.401299
39 | 3222,589,402.75,58.9,0.859088375,1.2810868,19.794431
40 | 700,555,70,55.5,1.4860371,5.0811005,65.671376
41 | 666,561,74,56.1,2.166566111,2.0342376,40.617301
42 | 700,555,70,55.5,4.0626804,1.9279775,59.906579
43 | 711,567,71.1,56.7,1.9718562,2.7194815,46.913377
44 | 612,562,68,56.2,3.375359889,3.0589176,61.069897
45 | 3784,489,344,54.33333333,2.609163273,1.708903111,44.080924
46 | 928,593,103.1111111,53.90909091,2.150967778,0.776135364,27.896199
47 | 1109,482,100.8181818,53.55555556,2.315458636,0.371138889,28.810295
48 | 1226,482,111.4545455,53.55555556,3.919056091,0.629411,48.774316
49 | 6915,334,493.9285714,55.66666667,9.394775857,2.011904167,143.598287
50 | 720,632,80,57.45454545,1.580340222,3.948873,57.660665
51 | 898,502,81.63636364,55.77777778,0.863104364,4.157749667,46.913895
52 | 812,563,73.81818182,62.55555556,1.908398455,1.516605222,34.64183
53 | 9978,4259,1108.666667,387.1818182,0.021670667,0.041039364,0.646469
54 | 14057,1936,1277.909091,276.5714286,0.000369182,0.000180714,0.005436
55 | 16908,296,1207.714286,59.2,0.000263643,0.0000646,0.004015
56 | 16800,416,1400,52,0.00030475,0.00023425,0.005531
57 | 21000,260,1400,52,9.26667E-05,0.0001938,0.002359
58 | 16112,416,1342.666667,52,0.006949333,0.000110625,0.084277
59 | 19600,312,1400,52,9.07857E-05,0.0001665,0.00227
60 | 18200,364,1400,52,0.000145538,0.000165429,0.00305
61 | 14270,440,1189.166667,55,0.007531583,0.00011625,0.091309
62 | 21000,260,1400,52,0.000220933,0.0001248,0.003938
63 | 18200,364,1400,52,7.10769E-05,0.000096,0.001596
64 | 17038,364,1310.615385,52,0.007216077,0.000156429,0.094904
65 | 18200,364,1400,52,0.000112923,0.000143,0.002469
66 | 19600,312,1400,52,9.40714E-05,0.0001125,0.001992
67 | 18681,312,1334.357143,52,0.006832214,0.000259,0.097205
68 | 19600,312,1400,52,0.000526071,0.000170167,0.008386
69 | 22400,208,1400,52,0.000123938,0.002386,0.002386
70 | 17350,364,1334.615385,52,0.008385538,0.000151857,0.110075
71 | 19600,312,1400,52,0.000210714,0.000222833,0.004287
72 | 19600,312,1400,52,9.24286E-05,0.000155167,0.002225
73 | 16104,416,1342,52,0.000160417,0.000143,0.003069
74 | 23800,156,1400,52,0.007830941,0.000171,0.133639
75 | 19600,312,1400,52,0.0001255,0.000169667,0.002775
76 | 19202,312,1371.571429,52,0.000118643,0.000068,0.002069
77 |
--------------------------------------------------------------------------------
/Dataset/Dataset 2/trojan/DroidJack_75.csv:
--------------------------------------------------------------------------------
1 | total_length_out,total_length_in,avg_packet_length_out,avg_packet_length_in,avg_duration_out,avg_duration_in,total_duration
2 | 604,423,60.4,42.3,0.6278833,0.8051764,14.330597
3 | 549,340,45.75,42.5,2.63028075,0.162460625,32.863054
4 | 521,412,47.36363636,45.77777778,1.568119091,0.415189222,20.986013
5 | 556,345,46.33333333,43.125,0.774183833,2.8205075,31.854266
6 | 447,432,44.7,43.2,0.9567922,2.3155305,32.723227
7 | 574,424,47.83333333,53,0.330387,1.794419125,18.319997
8 | 517,418,47,46.44444444,0.337598182,0.931010556,12.092675
9 | 592,435,53.81818182,48.33333333,0.845618909,0.917920667,17.563094
10 | 1923,916,160.25,114.5,2.152666333,0.320335875,28.394683
11 | 12674,4712,1267.4,471.2,0.0012323,0.0001614,0.013937
12 | 452,452,50.22222222,50.22222222,0.000397889,0.000397889,0.009505
13 | 12784,368,1065.333333,46,0.001068083,0.000366375,0.015748
14 | 464,420,46.4,42,0.4963755,2.6102024,31.065779
15 | 486,527,48.6,52.7,0.5307111,2.7305077,32.612188
16 | 549,340,45.75,42.5,1.37366525,2.851924125,39.299376
17 | 504,385,45.81818182,42.77777778,0.393327545,3.127368889,32.472923
18 | 560,307,46.66666667,43.85714286,1.347795917,1.161245143,29.955233
19 | 474,425,47.4,42.5,1.5565769,2.4944899,40.510668
20 | 589,317,45.30769231,45.28571429,1.861570308,2.313324429,40.393685
21 | 531,411,48.27272727,45.66666667,1.367365455,1.029569333,24.307144
22 | 469,385,46.9,42.77777778,0.6075137,2.957443667,32.692963
23 | 611,305,47,43.57142857,2.790732308,1.709250571,48.244274
24 | 447,425,44.7,42.5,0.4051304,2.8595129,32.646433
25 | 8672,370,788.3636364,46.25,0.795775273,0.17183025,10.128547
26 | 16904,280,1300.307692,40,0.002118846,0.000533571,0.03128
27 | 494,385,44.90909091,42.77777778,0.253200455,3.152892444,31.161237
28 | 509,385,46.27272727,42.77777778,2.030481818,2.012164778,40.444783
29 | 584,380,48.66666667,47.5,0.904711417,2.573834875,31.447216
30 | 18272,240,1305.142857,40,0.036395643,0.000689667,0.513677
31 | 7376,323,614.6666667,46.14285714,1.214627667,0.597650714,18.75963
32 | 12877,252,919.7857143,42,0.1103275,0.008587333,1.596109
33 | 12789,325,1065.75,40.625,0.012644917,0.815204125,6.673372
34 | 509,385,46.27272727,42.77777778,0.991480364,3.281982222,40.444124
35 | 514,392,46.72727273,43.55555556,2.268802273,1.730107,40.527788
36 | 373,418,46.625,46.44444444,1.023252625,1.597824333,32.499242
37 | 9973,4176,997.3,417.6,0.1210324,0.0072081,1.282405
38 | 11286,1568,1128.6,156.8,0.0108419,0.0004051,0.11247
39 | 12725,368,1060.416667,46,0.002174083,0.000471625,0.029862
40 | 4618,375,419.8181818,41.66666667,1.135488182,1.166562222,22.98943
41 | 474,425,47.4,42.5,0.6585679,3.4096893,40.682572
42 | 520,436,47.27272727,48.44444444,0.676106909,1.133404556,17.637817
43 | 469,425,46.9,42.5,1.8039247,2.2452726,40.491973
44 | 514,397,46.72727273,44.11111111,1.575110182,2.564276778,40.404703
45 | 4562,460,456.2,46,0.3547376,2.6750203,30.297579
46 | 16872,280,1297.846154,40,0.002590769,0.000374,0.036298
47 | 4638,375,421.6363636,41.66666667,0.471202545,1.977509889,22.980817
48 | 520,415,47.27272727,46.11111111,0.290143727,2.105494889,22.141035
49 | 407,566,45.22222222,51.45454545,0.883423667,2.125143636,31.327393
50 | 579,347,48.25,43.375,0.408606333,1.299145625,15.296441
51 | 561,347,46.75,43.375,0.705417,2.50517425,28.506398
52 | 529,424,48.09090909,47.11111111,0.349791909,2.01566,21.988651
53 | 442,415,44.2,41.5,0.3843969,1.953759,23.381559
54 | 10089,398,840.75,49.75,0.151534417,0.600073,6.618997
55 | 7344,330,612,41.25,0.305987917,1.43025625,15.113905
56 | 442,441,44.2,49,1.0502016,2.444317889,32.505294
57 | 582,342,48.5,42.75,0.896522917,1.227278375,20.576502
58 | 559,444,50.81818182,49.33333333,0.629939091,1.482922556,20.275633
59 | 16912,240,1208,40,0.010454286,0.000471167,0.149187
60 | 514,380,46.72727273,42.22222222,0.564592364,2.802134778,31.429729
61 | 1902,427,172.9090909,47.44444444,0.196882364,1.972188444,19.915402
62 | 15616,240,1115.428571,40,0.001663,0.000615167,0.026973
63 | 424,477,47.11111111,43.36363636,0.796911889,2.901721,39.091138
64 | 527,428,47.90909091,47.55555556,0.304932091,1.780674222,19.380321
65 | 16912,240,1208,40,0.029941214,0.000561333,0.422545
66 | 490,383,49,42.55555556,4.9714792,0.709038667,56.100564
67 | 1878,957,170.7272727,106.3333333,0.747293182,2.118064222,27.282803
68 | 12674,4712,1267.4,471.2,0.0010817,0.00014,0.012217
69 | 11326,444,1029.636364,49.33333333,0.001054455,0.000265,0.013984
70 | 4539,477,453.9,47.7,0.3307535,0.3713376,7.020911
71 | 492,392,44.72727273,43.55555556,1.325905091,2.026035778,32.819278
72 | 580,452,52.72727273,50.22222222,0.375975455,0.405636667,7.78646
73 | 527,433,47.90909091,48.11111111,0.127961273,1.465372556,14.595927
74 | 518,488,51.8,48.8,0.2271266,0.8433118,10.704384
75 | 513,421,46.63636364,46.77777778,0.687467818,0.529674,12.329212
76 | 549,340,45.75,42.5,1.547604417,2.61769375,39.512803
77 |
--------------------------------------------------------------------------------
/Dataset/Dataset 2/trojan/HawkShaw_75.csv:
--------------------------------------------------------------------------------
1 | total_length_out,total_length_in,avg_packet_length_out,avg_packet_length_in,avg_duration_out,avg_duration_in,total_duration
2 | 1338,3769,148.6666667,418.7777778,0.015766,0.037854,0.596236
3 | 2781,1671,309,151.9090909,0.007215,0.021438273,0.300753
4 | 1567,1125,195.875,93.75,2.593616,0.034014333,21.157102
5 | 2065,602,206.5,66.88888889,0.019814,0.004838333,0.293994
6 | 2986,520,298.6,52,0.003246,0.0006183,0.038639
7 | 2798,1986,559.6,132.4,0.015186,0.005378867,0.156615
8 | 2853,1342,407.5714286,111.8333333,0.004111,0.002159417,0.119335
9 | 1796,974,199.5555556,88.54545455,0.021278,0.015513364,0.362146
10 | 2036,1041,226.2222222,94.63636364,0.014209,0.009590273,0.233374
11 | 1184,1093,148,91.08333333,0.077522,0.014618083,0.795596
12 | 19600,312,1400,52,0.000438,0.000223833,0.007481
13 | 19600,312,1400,52,0.000113,0.000211333,0.002849
14 | 16800,416,1400,52,0.000271,0.000250625,0.00526
15 | 21000,260,1400,52,0.000122,0.00231,0.00231
16 | 18200,364,1400,52,0.000267,0.000229,0.005072
17 | 19600,312,1400,52,0.000158,0.0001345,0.003025
18 | 16800,416,1400,52,0.000124,0.000134625,0.00257
19 | 19600,312,1400,52,0.000100,0.00012,0.002126
20 | 18200,312,1400,52,0.000178,0.000162,0.003368
21 | 18200,364,1400,52,0.000125,0.000211714,0.003111
22 | 21000,260,1400,52,0.000121,0.0001416,0.002521
23 | 18200,364,1400,52,0.000091,0.000147571,0.002211
24 | 18200,364,1400,52,0.000129,0.00018,0.002932
25 | 19600,312,1400,52,0.000093,0.000139667,0.002144
26 | 18200,364,1400,52,0.000139,0.000101857,0.002517
27 | 16800,364,1400,52,0.000418,0.000220571,0.006589
28 | 18200,364,1400,52,0.000816,0.000856714,0.016609
29 | 21000,260,1400,52,0.000056,0.0001496,0.001587
30 | 5374,654,597.1111111,65.4,0.015354,0.023884,0.793215
31 | 1902,1617,237.75,134.75,0.161324,0.028680083,1.634756
32 | 2088,922,208.8,92.2,0.502505,0.0485692,5.510743
33 | 1747,855,174.7,85.5,1.811875,0.0450097,18.568849
34 | 1345,855,134.5,85.5,0.750307,0.0464091,7.967163
35 | 1025,653,102.5,65.3,25.919710,12.0291471,379.488571
36 | 2419,4186,268.7777778,380.5454545,0.041541,0.042222909,0.838322
37 | 6860,1318,762.2222222,119.8181818,0.019397,0.020134545,0.396052
38 | 989,2616,164.8333333,186.8571429,0.080633,0.010938429,0.636936
39 | 1215,860,121.5,86,10.065422,0.0469063,101.12328
40 | 1589,860,158.9,86,0.071415,0.0576619,1.290768
41 | 1614,860,161.4,86,0.048999,0.0576762,1.066747
42 | 1639,860,163.9,86,0.064626,0.0340103,0.986367
43 | 1895,792,189.5,79.2,0.032678,0.0463268,0.790049
44 | 1458,928,145.8,92.8,0.047416,0.055569,1.029849
45 | 1714,860,171.4,86,0.187834,0.0577943,2.456282
46 | 1252,822,139.1111111,74.72727273,6.841962,0.041854,62.038049
47 | 1217,3757,110.6363636,417.4444444,3.064854,0.039054667,34.064883
48 | 2298,1820,255.3333333,165.4545455,7.590469,0.043089182,68.788203
49 | 1028,3864,114.2222222,351.2727273,6.086610,0.035679909,55.171972
50 | 2995,1863,332.7777778,169.3636364,0.015072,0.042623455,0.604502
51 | 1875,792,187.5,79.2,3.570179,0.02908,35.992589
52 | 1795,928,179.5,92.8,0.285265,0.0378509,3.231163
53 | 2057,1963,228.5555556,196.3,4.314500,0.0471139,39.314757
54 | 1964,2271,196.4,227.1,0.181284,0.0235935,2.048776
55 | 2154,980,239.3333333,89.09090909,12.021741,0.085423909,109.135335
56 | 2270,975,227,97.5,0.131224,1.55829,1.55829
57 | 2040,860,204,86,3.346793,0.0350576,33.81851
58 | 2648,740,240.7272727,82.22222222,1.347818,0.012898333,14.942087
59 | 1452,996,145.2,99.6,0.013731,0.0122205,0.259513
60 | 2295,1392,459,92.8,0.012347,0.015879267,0.299922
61 | 2090,844,232.2222222,76.72727273,0.013231,0.011104909,0.241232
62 | 1423,1408,284.6,100.5714286,0.127665,0.025620286,0.998865
63 | 1641,860,164.1,86,4.060108,0.0475922,41.076997
64 | 1908,912,212,82.90909091,13.096996,0.032658091,118.232202
65 | 2175,1220,310.7142857,93.84615385,0.027279,0.018778077,0.435065
66 | 1977,1152,282.4285714,88.61538462,0.029776,0.018739846,0.452048
67 | 1963,1084,280.4285714,83.38461538,0.017568,0.017978615,0.356699
68 | 1567,1100,195.875,91.66666667,0.073538,0.030069667,0.949139
69 | 1625,860,162.5,86,5.902242,0.0590767,59.613187
70 | 1937,860,193.7,86,6.068196,0.0355086,61.037045
71 | 2227,860,222.7,86,0.206149,0.0237078,2.298564
72 | 2028,860,202.8,86,12.344468,0.0688068,124.132743
73 | 3011,724,301.1,72.4,0.026888,0.0038576,0.307453
74 | 932,1184,103.5555556,107.6363636,0.006000,0.004229091,0.100518
75 | 4098,1504,455.3333333,136.7272727,0.026160,0.011338636,0.360161
76 | 1047,1340,174.5,95.71428571,0.015656,0.008912643,0.218713
77 |
--------------------------------------------------------------------------------
/Dataset/Dataset 2/trojan/SpyMax_75.csv:
--------------------------------------------------------------------------------
1 | total_length_out,total_length_in,avg_packet_length_out,avg_packet_length_in,avg_duration_out,avg_duration_in,total_duration
2 | 826,10152,82.6,1015.2,0.12666,0.0217003,1.483603
3 | 468,14104,52,1282.181818,0.000463222,0.001868182,0.024719
4 | 520,13172,52,1317.2,0.0002183,0.0011614,0.013797
5 | 520,13092,52,1309.2,0.0003639,0.0013296,0.016935
6 | 624,9757,52,1219.625,0.003626333,0.00346975,0.071274
7 | 5787,1420,526.0909091,157.7777778,1.000515909,3.711587222,44.40996
8 | 1014,996,101.4,99.6,0.2021121,8.6185294,88.206415
9 | 1228,788,102.3333333,98.5,0.443867583,10.818956,91.878059
10 | 996,974,90.54545455,108.2222222,0.287269091,9.652376,90.031344
11 | 857,1075,85.7,107.5,0.1532421,7.9874633,81.407054
12 | 1263,772,105.25,96.5,0.234897667,6.35791525,53.682094
13 | 990,996,99,99.6,0.1322238,8.7539815,88.862053
14 | 1260,788,105,98.5,0.279665583,10.98897188,91.267762
15 | 988,976,98.8,97.6,0.1109985,7.0384344,71.494329
16 | 1008,902,91.63636364,100.2222222,0.169979364,8.465874667,78.062645
17 | 996,979,90.54545455,108.7777778,0.158102455,9.791487444,89.862514
18 | 1254,882,114,98,0.163565636,8.254633556,76.090924
19 | 1011,1142,101.1,114.2,0.0204721,8.8920615,89.125336
20 | 1275,897,115.9090909,99.66666667,0.130686909,8.191028222,75.15681
21 | 1064,6139,96.72727273,682.1111111,0.080817,3.018422667,28.054791
22 | 608,8715,55.27272727,968.3333333,0.000739455,0.000432222,0.012024
23 | 416,12915,52,1076.25,0.00079075,0.003094167,0.043456
24 | 312,15715,52,1122.5,0.001864167,0.003644071,0.062202
25 | 312,14420,52,1030,0.001584667,0.002784143,0.048486
26 | 364,14315,52,1101.153846,0.001120571,0.004044077,0.060417
27 | 312,15715,52,1122.5,0.001177667,0.002842357,0.046859
28 | 364,13020,52,1001.538462,0.001901286,0.003154385,0.054316
29 | 312,15715,52,1122.5,0.002144,0.003084786,0.056051
30 | 364,14315,52,1101.153846,0.002700429,0.002597538,0.052671
31 | 364,13020,52,1001.538462,0.002321143,0.002335846,0.046614
32 | 312,15715,52,1122.5,0.002924167,0.002739786,0.055902
33 | 364,14315,52,1101.153846,0.002100714,0.002587923,0.048348
34 | 364,13020,52,1001.538462,0.001599429,0.002908231,0.049003
35 | 312,15715,52,1122.5,0.001403833,0.003628214,0.059218
36 | 364,14315,52,1101.153846,0.001497,0.002933231,0.048611
37 | 364,13020,52,1001.538462,0.001539571,0.002925769,0.048812
38 | 15715,15715,1122.5,1122.5,0.003536429,0.003536429,0.058592
39 | 766,4237,85.11111111,385.1818182,0.888631556,2.110949182,31.218125
40 | 1228,788,102.3333333,98.5,0.120679583,11.14400463,90.600192
41 | 1062,1011,106.2,101.1,0.0467927,8.9275389,89.743316
42 | 1228,788,102.3333333,98.5,0.055113833,11.206878,90.31639
43 | 1294,897,117.6363636,99.66666667,0.824873636,8.995013222,90.028729
44 | 1080,912,90,114,2.490228167,105.248529,105.248529
45 | 1048,850,87.33333333,106.25,2.4790745,9.433771,105.219062
46 | 1294,897,117.6363636,99.66666667,2.630769636,5.135479556,75.157782
47 | 996,902,90.54545455,100.2222222,2.650070455,8.452001222,105.218786
48 | 11372,4308,1033.818182,478.6666667,0.008387182,0.360653111,3.338137
49 | 16859,1884,1296.846154,314,0.000267308,0.0000985,0.004166
50 | 6417,654,583.3636364,72.66666667,1.009899273,1.734581556,26.720126
51 | 1305,976,118.6363636,108.4444444,2.616980909,5.096328444,74.653746
52 | 732,1266,73.2,126.6,1.288615,6.6868396,79.754546
53 | 1305,840,118.6363636,93.33333333,2.181283455,6.901020778,86.103305
54 | 1228,788,102.3333333,98.5,3.49053675,7.771528125,104.058666
55 | 1028,840,93.45454545,93.33333333,2.512567818,6.959213111,90.271164
56 | 1040,973,104,108.1111111,2.7263012,6.958272222,89.888707
57 | 1028,840,93.45454545,93.33333333,2.485776455,7.005427556,90.392389
58 | 996,984,90.54545455,109.3333333,2.466362818,6.989017,90.031144
59 | 996,976,90.54545455,108.4444444,2.464541,6.989515778,90.015593
60 | 1277,959,116.0909091,106.5555556,2.417429818,7.027518,89.83939
61 | 1028,840,93.45454545,93.33333333,90.238448,7.115695778,90.238448
62 | 996,976,90.54545455,108.4444444,2.353422545,7.127103,90.031575
63 | 1040,1145,104,114.5,1.2887131,6.4206516,77.093647
64 | 1228,788,102.3333333,98.5,3.137792917,8.140542125,102.777852
65 | 1028,840,93.45454545,93.33333333,2.272796727,7.253093,90.278601
66 | 996,902,90.54545455,100.2222222,2.270013636,7.486191444,92.345873
67 | 950,6216,86.36363636,690.6666667,0.024406,3.273078778,29.726175
68 | 416,12915,52,1076.25,0.00074825,0.000337583,0.010037
69 | 312,15715,52,1122.5,0.001034333,0.000356571,0.011198
70 | 312,14420,52,1030,0.000980667,0.000331286,0.010522
71 | 416,12862,52,1071.833333,0.001092875,0.00054225,0.013465
72 | 312,15715,52,1122.5,0.001011333,0.000387714,0.011496
73 | 312,14420,52,1030,0.000966,0.000308,0.010108
74 | 416,12915,52,1076.25,0.001479875,0.000473,0.017515
75 | 312,15715,52,1122.5,0.001565667,0.000498786,0.016377
76 | 364,14315,52,1101.153846,0.001513,0.000451615,0.016462
77 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # Android-RAT-Dataset
2 |
This repo contains all dataset for my research/analysis about :
3 | "DETEKSI REMOTE ACCESS TROJAN PADA ANDROID BERBASIS PENGAMATAN LALU LINTAS JARINGAN MENGGUNAKAN MACHINE LEARNING."
4 |
5 | # Apps Name
6 | | Name | Label |
7 | | ---------- | ---------- |
8 | | AhMyth | Trojan |
9 | | AndroidRAT | Trojan |
10 | | AndroidTester | Trojan |
11 | | DroidJack | Trojan |
12 | | HawkShaw | Trojan |
13 | | SpyMax | Trojan |
14 | | Google | Benign |
15 | | Facebook | Benign |
16 | | Twitter | Benign |
17 |
18 | # DATA SET 1 FEATURES
19 | | Name | Desc |
20 | | ----------- | ------------ |
21 | | Source | The port number of source packet |
22 | | Destination | The port number of destination packet |
23 | | Protocol | The type protocol used in packet |
24 | | Length | The size of data generated by the packet (Byte) |
25 | | Type | Type of packet - In or Out |
26 | | Duration | The duration between previous package to the next packet (Second) |
27 |
28 | # DATA SET 2 FEATURES
29 | | Name | Desc |
30 | | ------------- | ------------ |
31 | | Total Length Out | The amount of data size generated by packet out of 20 mixed packet (Byte) |
32 | | Total Length In | The amount of data size generated by packet in of 20 mixed packet (Byte) |
33 | | Avg Packet Length Out | The average data size generated by packets out of 20 mixed packets (Byte) |
34 | | Avg Packet Length In | The average data size generated by incoming packets of 20 mixed packets (Byte) |
35 | | Avg Duration Out | The average duration required by packets out of 20 mixed packets (Second) |
36 | | Avg Duration In | The average duration required by incoming packets from 20 mixed packets (Second) |
37 | | Total Duration | The total duration required by 20 mixed packets (Second) |
38 |
39 | # Dataset
40 | | | Dataset 1 | Dataset 2 |
41 | | ----- | ---- | ---- |
42 | | Training | 14400 | 5400 |
43 | | Testing | 3600 | 600 |
44 | | Total | 18000 | 6000 |
45 |
46 | *note : raw/original dataset contains more row perpcap rat
47 |
48 | # Accuracy and Machine Learning Algorithm Used
49 | | Dataset 1 | Decision Tree | Random Forest | Naive Bayes |
50 | | ------------ | ---------- | ---------- | ---------- |
51 | | N900 - RAT90 | 0.9989 | 0.9990 | 0.1181 |
52 | | N1800 - RAT90 | 0.9994 | 0.9989 | 0.0756 |
53 | | N1800 - RAT1800 | 0.9997 | 0.9997 | 0.4691 |
54 |
55 | | Dataset 2 | Decision Tree | Random Forest | Naive Bayes |
56 | | ------------ | ---------- | ---------- | ---------- |
57 | | N150 - RAT10 | 0.873 | 0.933 | 0.513 |
58 | | N300 - RAT10 | 0.883 | 0.929 | 0.545 |
59 | | N300 - RAT300 | 0.90 | 0.928 | 0.71 |
60 |
61 | *note : accuracy based on jupyter python result
62 |
63 | # Source Original Dataset
64 | Name : Android Mischief Dataset v1
65 | Author : Kamila Babayeva , Stratosphere Laboratory
66 | Date : November 18th 2020
67 | URL : https://www.stratosphereips.org/android-mischief-dataset
68 |
69 | # Looking for Android Remote Access Trojan ?
70 | URL : https://github.com/wishihab/Android-RATList
71 |
--------------------------------------------------------------------------------
/Remote Access Trojan Detection On Android Based On Network Traffic Observation Using Machine Learning.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wishihab/Android-RAT-Dataset/fa4b9f7c94e19cd2c47e7985e47aa08b1530bed7/Remote Access Trojan Detection On Android Based On Network Traffic Observation Using Machine Learning.pdf
--------------------------------------------------------------------------------
/Testing Scratch/Dataset 1/Decision Tree/Decision Tree Scratch.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "code",
5 | "execution_count": 1,
6 | "metadata": {},
7 | "outputs": [],
8 | "source": [
9 | "from sklearn import tree"
10 | ]
11 | },
12 | {
13 | "cell_type": "code",
14 | "execution_count": 2,
15 | "metadata": {},
16 | "outputs": [],
17 | "source": [
18 | "clf = tree.DecisionTreeClassifier();"
19 | ]
20 | },
21 | {
22 | "cell_type": "code",
23 | "execution_count": 15,
24 | "metadata": {},
25 | "outputs": [],
26 | "source": [
27 | "# Gmail 3 Facebook 3 Twitter 3 \n",
28 | "# RAT 3 Tester 3 Droidjack 3\n",
29 | "# TLSv.1.3 = 3\n",
30 | "# TLSv.1.2 = 2\n",
31 | "# tcp = 1\n",
32 | "# udp = 0\n",
33 | "\n",
34 | "# out 0\n",
35 | "# in 1\n",
36 | "\n",
37 | "# Length, Data, Protocol, Duration, Type, Source, Destination\n",
38 | "X = [\n",
39 | " ['591','0','3','0.00153','0','54534','443'],\n",
40 | " ['52','0','1','0.001147','0','54534','443'],\n",
41 | " ['60','0','1','0','0','54534','443'],\n",
42 | " ['60','0','1','0','0','33952','443'],\n",
43 | " ['60','0','1','0.000755','1','443','33952'],\n",
44 | " ['452','0','3','0.00153','0','33952','443'],\n",
45 | " ['60','0','1','0.007413','1','443','56228'],\n",
46 | " ['52','0','1','0.001804','0','56228','443'],\n",
47 | " ['569','0','2','0.012655','0','56228','443'],\n",
48 | " ['60','0','1','0','0','37451','1337'],\n",
49 | " ['60','0','1','0.022053','1','1337','37451'],\n",
50 | " ['52','0','1','0.006701','0','37451','1337'],\n",
51 | " ['59','7','1','0.281862','1','1337','37451'],\n",
52 | " ['52','0','1','0.107486','0','37451','1337'],\n",
53 | " ['59','7','1','11.910185','1','1337','37451'],\n",
54 | " ['40','0','1','0.041368','0','41893','1337'],\n",
55 | " ['45','5','1','1.422961','0','41893','1337'],\n",
56 | " ['45','5','1','0.270677','1','1337','41893']\n",
57 | " \n",
58 | "]"
59 | ]
60 | },
61 | {
62 | "cell_type": "code",
63 | "execution_count": 16,
64 | "metadata": {},
65 | "outputs": [
66 | {
67 | "data": {
68 | "text/plain": [
69 | "[['591', '0', '3', '0.00153', '0', '54534', '443'],\n",
70 | " ['52', '0', '1', '0.001147', '0', '54534', '443'],\n",
71 | " ['60', '0', '1', '0', '0', '54534', '443'],\n",
72 | " ['60', '0', '1', '0', '0', '33952', '443'],\n",
73 | " ['60', '0', '1', '0.000755', '1', '443', '33952'],\n",
74 | " ['452', '0', '3', '0.00153', '0', '33952', '443'],\n",
75 | " ['60', '0', '1', '0.007413', '1', '443', '56228'],\n",
76 | " ['52', '0', '1', '0.001804', '0', '56228', '443'],\n",
77 | " ['569', '0', '2', '0.012655', '0', '56228', '443'],\n",
78 | " ['60', '0', '1', '0', '0', '37451', '1337'],\n",
79 | " ['60', '0', '1', '0.022053', '1', '1337', '37451'],\n",
80 | " ['52', '0', '1', '0.006701', '0', '37451', '1337'],\n",
81 | " ['59', '7', '1', '0.281862', '1', '1337', '37451'],\n",
82 | " ['52', '0', '1', '0.107486', '0', '37451', '1337'],\n",
83 | " ['59', '7', '1', '11.910185', '1', '1337', '37451'],\n",
84 | " ['40', '0', '1', '0.041368', '0', '41893', '1337'],\n",
85 | " ['45', '5', '1', '1.422961', '0', '41893', '1337'],\n",
86 | " ['45', '5', '1', '0.270677', '1', '1337', '41893']]"
87 | ]
88 | },
89 | "execution_count": 16,
90 | "metadata": {},
91 | "output_type": "execute_result"
92 | }
93 | ],
94 | "source": [
95 | "X"
96 | ]
97 | },
98 | {
99 | "cell_type": "code",
100 | "execution_count": 17,
101 | "metadata": {},
102 | "outputs": [],
103 | "source": [
104 | "Y = [\n",
105 | " ['Benign'],\n",
106 | " ['Benign'],\n",
107 | " ['Benign'],\n",
108 | " ['Benign'],\n",
109 | " ['Benign'],\n",
110 | " ['Benign'],\n",
111 | " ['Benign'],\n",
112 | " ['Benign'],\n",
113 | " ['Benign'],\n",
114 | " ['Trojan'],\n",
115 | " ['Trojan'],\n",
116 | " ['Trojan'],\n",
117 | " ['Trojan'],\n",
118 | " ['Trojan'],\n",
119 | " ['Trojan'],\n",
120 | " ['Trojan'],\n",
121 | " ['Trojan'],\n",
122 | " ['Trojan']\n",
123 | "]"
124 | ]
125 | },
126 | {
127 | "cell_type": "code",
128 | "execution_count": 13,
129 | "metadata": {},
130 | "outputs": [
131 | {
132 | "data": {
133 | "text/plain": [
134 | "[['Benign'],\n",
135 | " ['Benign'],\n",
136 | " ['Benign'],\n",
137 | " ['Benign'],\n",
138 | " ['Benign'],\n",
139 | " ['Benign'],\n",
140 | " ['Benign'],\n",
141 | " ['Benign'],\n",
142 | " ['Benign'],\n",
143 | " ['Trojan'],\n",
144 | " ['Trojan'],\n",
145 | " ['Trojan'],\n",
146 | " ['Trojan'],\n",
147 | " ['Trojan'],\n",
148 | " ['Trojan'],\n",
149 | " ['Trojan'],\n",
150 | " ['Trojan'],\n",
151 | " ['Trojan']]"
152 | ]
153 | },
154 | "execution_count": 13,
155 | "metadata": {},
156 | "output_type": "execute_result"
157 | }
158 | ],
159 | "source": [
160 | "Y"
161 | ]
162 | },
163 | {
164 | "cell_type": "code",
165 | "execution_count": 18,
166 | "metadata": {},
167 | "outputs": [],
168 | "source": [
169 | "clf = clf.fit(X,Y)"
170 | ]
171 | },
172 | {
173 | "cell_type": "code",
174 | "execution_count": 19,
175 | "metadata": {},
176 | "outputs": [
177 | {
178 | "name": "stdout",
179 | "output_type": "stream",
180 | "text": [
181 | "['Benign']\n"
182 | ]
183 | }
184 | ],
185 | "source": [
186 | "prediction = clf.predict([['52','0','1','0.007339','1','443','56228']])\n",
187 | "print(prediction)"
188 | ]
189 | },
190 | {
191 | "cell_type": "code",
192 | "execution_count": null,
193 | "metadata": {},
194 | "outputs": [],
195 | "source": []
196 | }
197 | ],
198 | "metadata": {
199 | "kernelspec": {
200 | "display_name": "Python 3",
201 | "language": "python",
202 | "name": "python3"
203 | },
204 | "language_info": {
205 | "codemirror_mode": {
206 | "name": "ipython",
207 | "version": 3
208 | },
209 | "file_extension": ".py",
210 | "mimetype": "text/x-python",
211 | "name": "python",
212 | "nbconvert_exporter": "python",
213 | "pygments_lexer": "ipython3",
214 | "version": "3.8.5"
215 | }
216 | },
217 | "nbformat": 4,
218 | "nbformat_minor": 4
219 | }
220 |
--------------------------------------------------------------------------------
/Testing Scratch/Dataset 1/Decision Tree/Decision Trees - Model 1 - 2021.rmp:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
27 |
28 |
29 |
30 |
31 |
32 |
33 |
34 |
35 |
36 |
37 |
38 |
39 |
40 |
41 |
42 |
43 |
44 |
45 |
46 |
47 |
48 |
49 |
50 |
51 |
52 |
53 |
54 |
55 |
56 |
57 |
58 |
59 |
60 |
61 |
62 |
63 |
64 |
65 |
66 |
67 |
68 |
69 |
70 |
71 |
72 |
73 |
74 |
75 |
76 |
77 |
78 |
79 |
80 |
81 |
82 |
83 |
84 |
85 |
86 |
87 |
88 |
89 |
90 |
91 |
--------------------------------------------------------------------------------
/Testing Scratch/Dataset 1/Decision Tree/Images/Decision Tree - RapidMiner.PNG:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wishihab/Android-RAT-Dataset/fa4b9f7c94e19cd2c47e7985e47aa08b1530bed7/Testing Scratch/Dataset 1/Decision Tree/Images/Decision Tree - RapidMiner.PNG
--------------------------------------------------------------------------------
/Testing Scratch/Dataset 1/Decision Tree/Images/Decision Tree Result - Python (Jupyter) vs RapidMiner.PNG:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wishihab/Android-RAT-Dataset/fa4b9f7c94e19cd2c47e7985e47aa08b1530bed7/Testing Scratch/Dataset 1/Decision Tree/Images/Decision Tree Result - Python (Jupyter) vs RapidMiner.PNG
--------------------------------------------------------------------------------
/Testing Scratch/Dataset 1/Decision Tree/Images/imagename.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wishihab/Android-RAT-Dataset/fa4b9f7c94e19cd2c47e7985e47aa08b1530bed7/Testing Scratch/Dataset 1/Decision Tree/Images/imagename.png
--------------------------------------------------------------------------------
/Testing Scratch/Dataset 1/Decision Tree/Images/imagenamev02.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wishihab/Android-RAT-Dataset/fa4b9f7c94e19cd2c47e7985e47aa08b1530bed7/Testing Scratch/Dataset 1/Decision Tree/Images/imagenamev02.png
--------------------------------------------------------------------------------
/Testing Scratch/Dataset 1/Decision Tree/Images/imagenamev03.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wishihab/Android-RAT-Dataset/fa4b9f7c94e19cd2c47e7985e47aa08b1530bed7/Testing Scratch/Dataset 1/Decision Tree/Images/imagenamev03.png
--------------------------------------------------------------------------------
/Testing Scratch/Dataset 1/Naive Bayes/Images/Naive Bayes - Rapid Miner.PNG:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wishihab/Android-RAT-Dataset/fa4b9f7c94e19cd2c47e7985e47aa08b1530bed7/Testing Scratch/Dataset 1/Naive Bayes/Images/Naive Bayes - Rapid Miner.PNG
--------------------------------------------------------------------------------
/Testing Scratch/Dataset 1/Naive Bayes/Images/Naive Bayes Result - Python (Jupyter) vs RapidMiner.PNG:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wishihab/Android-RAT-Dataset/fa4b9f7c94e19cd2c47e7985e47aa08b1530bed7/Testing Scratch/Dataset 1/Naive Bayes/Images/Naive Bayes Result - Python (Jupyter) vs RapidMiner.PNG
--------------------------------------------------------------------------------
/Testing Scratch/Dataset 1/Naive Bayes/Naive Bayes - Model 1 - 2021.rmp:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
27 |
28 |
29 |
30 |
31 |
32 |
33 |
34 |
35 |
36 |
37 |
38 |
39 |
40 |
41 |
42 |
43 |
44 |
45 |
46 |
47 |
48 |
49 |
50 |
51 |
52 |
53 |
54 |
55 |
56 |
57 |
58 |
59 |
60 |
61 |
62 |
63 |
64 |
65 |
66 |
67 |
68 |
69 |
70 |
71 |
72 |
73 |
74 |
75 |
76 |
77 |
78 |
79 |
80 |
81 |
82 |
83 |
--------------------------------------------------------------------------------
/Testing Scratch/Dataset 1/Naive Bayes/NaiveBayesClassifier with sklearn - Dataset 1 - testingv01.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "code",
5 | "execution_count": 1,
6 | "metadata": {},
7 | "outputs": [],
8 | "source": [
9 | "import pandas as pd\n",
10 | "import numpy as np\n",
11 | "\n",
12 | "from sklearn.naive_bayes import GaussianNB"
13 | ]
14 | },
15 | {
16 | "cell_type": "code",
17 | "execution_count": 2,
18 | "metadata": {},
19 | "outputs": [],
20 | "source": [
21 | "def read_file(filename):\n",
22 | " df = pd.read_csv(filename)\n",
23 | " print(df.shape)\n",
24 | " df['Label'] = df['Label'].apply(lambda x: x.strip().lower())\n",
25 | " return df"
26 | ]
27 | },
28 | {
29 | "cell_type": "code",
30 | "execution_count": 3,
31 | "metadata": {},
32 | "outputs": [
33 | {
34 | "name": "stdout",
35 | "output_type": "stream",
36 | "text": [
37 | "(14400, 7)\n",
38 | "(990, 7)\n"
39 | ]
40 | }
41 | ],
42 | "source": [
43 | "FILE_HO_TRAIN = 'training.csv'\n",
44 | "FILE_HO_TEST = 'testingv01.csv'\n",
45 | "\n",
46 | "df_ho_train = read_file(FILE_HO_TRAIN)\n",
47 | "df_ho_test = read_file(FILE_HO_TEST)"
48 | ]
49 | },
50 | {
51 | "cell_type": "code",
52 | "execution_count": 4,
53 | "metadata": {},
54 | "outputs": [],
55 | "source": [
56 | "from sklearn.preprocessing import LabelEncoder\n",
57 | "\n",
58 | "target_encoder = LabelEncoder()\n",
59 | "df_ho_train['Target'] = target_encoder.fit_transform(df_ho_train['Label'])\n",
60 | "df_ho_test['Target'] = target_encoder.transform(df_ho_test['Label'])\n",
61 | "\n",
62 | "x_ho_train = df_ho_train.drop(['Label', 'Target'], axis=1)\n",
63 | "y_ho_train = df_ho_train['Target']\n",
64 | "\n",
65 | "x_ho_test = df_ho_test.drop(['Label', 'Target'], axis=1)\n",
66 | "y_ho_test = df_ho_test['Target']"
67 | ]
68 | },
69 | {
70 | "cell_type": "code",
71 | "execution_count": 5,
72 | "metadata": {},
73 | "outputs": [
74 | {
75 | "data": {
76 | "text/plain": [
77 | "0.11818181818181818"
78 | ]
79 | },
80 | "execution_count": 5,
81 | "metadata": {},
82 | "output_type": "execute_result"
83 | }
84 | ],
85 | "source": [
86 | "# Mengaktifkan/memanggil/membuat fungsi klasifikasi Naive Bayes\n",
87 | "modelnb = GaussianNB()\n",
88 | "\n",
89 | "# Memasukkan data training pada fungsi klasifikasi Naive Bayes\n",
90 | "nbtrain = modelnb.fit(x_ho_train, y_ho_train)\n",
91 | "modelnb.score(x_ho_test,y_ho_test)"
92 | ]
93 | },
94 | {
95 | "cell_type": "code",
96 | "execution_count": 6,
97 | "metadata": {},
98 | "outputs": [
99 | {
100 | "name": "stdout",
101 | "output_type": "stream",
102 | "text": [
103 | "Akurasi pada Algoritma RandomForest menggunakan split dengan persentase 90:10 = 11.818181818181818\n"
104 | ]
105 | }
106 | ],
107 | "source": [
108 | "from sklearn.metrics import accuracy_score, f1_score\n",
109 | "\n",
110 | "predicted= modelnb.predict(x_ho_test)\n",
111 | "\n",
112 | "accuracy_ho = accuracy_score(predicted, y_ho_test) * 100\n",
113 | "print('Akurasi pada Algoritma RandomForest menggunakan split dengan persentase 90:10 = %s' % accuracy_ho)"
114 | ]
115 | },
116 | {
117 | "cell_type": "code",
118 | "execution_count": 7,
119 | "metadata": {},
120 | "outputs": [
121 | {
122 | "data": {
123 | "image/png": "\n",
124 | "text/plain": [
125 | ""
126 | ]
127 | },
128 | "metadata": {
129 | "needs_background": "light"
130 | },
131 | "output_type": "display_data"
132 | }
133 | ],
134 | "source": [
135 | "# Create Confusion Matrix\n",
136 | "\n",
137 | "import seaborn as sns\n",
138 | "import matplotlib.pyplot as plt\n",
139 | "\n",
140 | "from sklearn.metrics import confusion_matrix\n",
141 | "confusion_matrix(y_ho_test, predicted) \n",
142 | "\n",
143 | "f, ax = plt.subplots(figsize=(8,5))\n",
144 | "sns.heatmap(confusion_matrix(y_ho_test, predicted), annot=True, fmt=\".0f\", ax=ax)\n",
145 | "plt.xlabel(\"Predicted Class\")\n",
146 | "plt.ylabel(\"Actual Class\")\n",
147 | "plt.show()"
148 | ]
149 | },
150 | {
151 | "cell_type": "code",
152 | "execution_count": 8,
153 | "metadata": {},
154 | "outputs": [
155 | {
156 | "name": "stdout",
157 | "output_type": "stream",
158 | "text": [
159 | " precision recall f1-score support\n",
160 | "\n",
161 | " 0 0.91 0.03 0.06 900\n",
162 | " 1 0.09 0.97 0.17 90\n",
163 | "\n",
164 | " accuracy 0.12 990\n",
165 | " macro avg 0.50 0.50 0.12 990\n",
166 | "weighted avg 0.83 0.12 0.07 990\n",
167 | "\n"
168 | ]
169 | }
170 | ],
171 | "source": [
172 | "# Performance Matrix Report - Precision Recall f1score\n",
173 | "\n",
174 | "from sklearn.metrics import classification_report\n",
175 | "print (classification_report(y_ho_test, predicted))"
176 | ]
177 | },
178 | {
179 | "cell_type": "code",
180 | "execution_count": null,
181 | "metadata": {},
182 | "outputs": [],
183 | "source": []
184 | }
185 | ],
186 | "metadata": {
187 | "kernelspec": {
188 | "display_name": "Python 3",
189 | "language": "python",
190 | "name": "python3"
191 | },
192 | "language_info": {
193 | "codemirror_mode": {
194 | "name": "ipython",
195 | "version": 3
196 | },
197 | "file_extension": ".py",
198 | "mimetype": "text/x-python",
199 | "name": "python",
200 | "nbconvert_exporter": "python",
201 | "pygments_lexer": "ipython3",
202 | "version": "3.8.5"
203 | }
204 | },
205 | "nbformat": 4,
206 | "nbformat_minor": 4
207 | }
208 |
--------------------------------------------------------------------------------
/Testing Scratch/Dataset 1/Naive Bayes/NaiveBayesClassifier with sklearn - Dataset 1 - testingv02.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "code",
5 | "execution_count": 1,
6 | "metadata": {},
7 | "outputs": [],
8 | "source": [
9 | "import pandas as pd\n",
10 | "import numpy as np\n",
11 | "\n",
12 | "from sklearn.naive_bayes import GaussianNB"
13 | ]
14 | },
15 | {
16 | "cell_type": "code",
17 | "execution_count": 2,
18 | "metadata": {},
19 | "outputs": [],
20 | "source": [
21 | "def read_file(filename):\n",
22 | " df = pd.read_csv(filename)\n",
23 | " print(df.shape)\n",
24 | " df['Label'] = df['Label'].apply(lambda x: x.strip().lower())\n",
25 | " return df"
26 | ]
27 | },
28 | {
29 | "cell_type": "code",
30 | "execution_count": 3,
31 | "metadata": {},
32 | "outputs": [
33 | {
34 | "name": "stdout",
35 | "output_type": "stream",
36 | "text": [
37 | "(14400, 7)\n",
38 | "(1890, 7)\n"
39 | ]
40 | }
41 | ],
42 | "source": [
43 | "FILE_HO_TRAIN = 'training.csv'\n",
44 | "FILE_HO_TEST = 'testingv02.csv'\n",
45 | "\n",
46 | "df_ho_train = read_file(FILE_HO_TRAIN)\n",
47 | "df_ho_test = read_file(FILE_HO_TEST)"
48 | ]
49 | },
50 | {
51 | "cell_type": "code",
52 | "execution_count": 4,
53 | "metadata": {},
54 | "outputs": [],
55 | "source": [
56 | "from sklearn.preprocessing import LabelEncoder\n",
57 | "\n",
58 | "target_encoder = LabelEncoder()\n",
59 | "df_ho_train['Target'] = target_encoder.fit_transform(df_ho_train['Label'])\n",
60 | "df_ho_test['Target'] = target_encoder.transform(df_ho_test['Label'])\n",
61 | "\n",
62 | "x_ho_train = df_ho_train.drop(['Label', 'Target'], axis=1)\n",
63 | "y_ho_train = df_ho_train['Target']\n",
64 | "\n",
65 | "x_ho_test = df_ho_test.drop(['Label', 'Target'], axis=1)\n",
66 | "y_ho_test = df_ho_test['Target']"
67 | ]
68 | },
69 | {
70 | "cell_type": "code",
71 | "execution_count": 5,
72 | "metadata": {},
73 | "outputs": [
74 | {
75 | "data": {
76 | "text/plain": [
77 | "0.07566137566137567"
78 | ]
79 | },
80 | "execution_count": 5,
81 | "metadata": {},
82 | "output_type": "execute_result"
83 | }
84 | ],
85 | "source": [
86 | "# Mengaktifkan/memanggil/membuat fungsi klasifikasi Naive Bayes\n",
87 | "modelnb = GaussianNB()\n",
88 | "\n",
89 | "# Memasukkan data training pada fungsi klasifikasi Naive Bayes\n",
90 | "nbtrain = modelnb.fit(x_ho_train, y_ho_train)\n",
91 | "modelnb.score(x_ho_test,y_ho_test)"
92 | ]
93 | },
94 | {
95 | "cell_type": "code",
96 | "execution_count": 6,
97 | "metadata": {},
98 | "outputs": [
99 | {
100 | "name": "stdout",
101 | "output_type": "stream",
102 | "text": [
103 | "Akurasi pada Algoritma RandomForest menggunakan split dengan persentase 90:10 = 7.5661375661375665\n"
104 | ]
105 | }
106 | ],
107 | "source": [
108 | "from sklearn.metrics import accuracy_score, f1_score\n",
109 | "\n",
110 | "predicted= modelnb.predict(x_ho_test)\n",
111 | "\n",
112 | "accuracy_ho = accuracy_score(predicted, y_ho_test) * 100\n",
113 | "print('Akurasi pada Algoritma RandomForest menggunakan split dengan persentase 90:10 = %s' % accuracy_ho)"
114 | ]
115 | },
116 | {
117 | "cell_type": "code",
118 | "execution_count": 7,
119 | "metadata": {},
120 | "outputs": [
121 | {
122 | "data": {
123 | "image/png": "\n",
124 | "text/plain": [
125 | ""
126 | ]
127 | },
128 | "metadata": {
129 | "needs_background": "light"
130 | },
131 | "output_type": "display_data"
132 | }
133 | ],
134 | "source": [
135 | "# Create Confusion Matrix\n",
136 | "\n",
137 | "import seaborn as sns\n",
138 | "import matplotlib.pyplot as plt\n",
139 | "\n",
140 | "from sklearn.metrics import confusion_matrix\n",
141 | "confusion_matrix(y_ho_test, predicted) \n",
142 | "\n",
143 | "f, ax = plt.subplots(figsize=(8,5))\n",
144 | "sns.heatmap(confusion_matrix(y_ho_test, predicted), annot=True, fmt=\".0f\", ax=ax)\n",
145 | "plt.xlabel(\"Predicted Class\")\n",
146 | "plt.ylabel(\"Actual Class\")\n",
147 | "plt.show()"
148 | ]
149 | },
150 | {
151 | "cell_type": "code",
152 | "execution_count": 8,
153 | "metadata": {},
154 | "outputs": [
155 | {
156 | "name": "stdout",
157 | "output_type": "stream",
158 | "text": [
159 | " precision recall f1-score support\n",
160 | "\n",
161 | " 0 0.95 0.03 0.06 1800\n",
162 | " 1 0.05 0.97 0.09 90\n",
163 | "\n",
164 | " accuracy 0.08 1890\n",
165 | " macro avg 0.50 0.50 0.08 1890\n",
166 | "weighted avg 0.91 0.08 0.06 1890\n",
167 | "\n"
168 | ]
169 | }
170 | ],
171 | "source": [
172 | "# Performance Matrix Report - Precision Recall f1score\n",
173 | "\n",
174 | "from sklearn.metrics import classification_report\n",
175 | "print (classification_report(y_ho_test, predicted))"
176 | ]
177 | },
178 | {
179 | "cell_type": "code",
180 | "execution_count": null,
181 | "metadata": {},
182 | "outputs": [],
183 | "source": []
184 | }
185 | ],
186 | "metadata": {
187 | "kernelspec": {
188 | "display_name": "Python 3",
189 | "language": "python",
190 | "name": "python3"
191 | },
192 | "language_info": {
193 | "codemirror_mode": {
194 | "name": "ipython",
195 | "version": 3
196 | },
197 | "file_extension": ".py",
198 | "mimetype": "text/x-python",
199 | "name": "python",
200 | "nbconvert_exporter": "python",
201 | "pygments_lexer": "ipython3",
202 | "version": "3.8.5"
203 | }
204 | },
205 | "nbformat": 4,
206 | "nbformat_minor": 4
207 | }
208 |
--------------------------------------------------------------------------------
/Testing Scratch/Dataset 1/README.md:
--------------------------------------------------------------------------------
1 | # csv
2 |
3 | # Algoritm
4 |
5 | 1. Decision Tree
6 | 2. Random Forest
7 | 3. Naive Bayes
--------------------------------------------------------------------------------
/Testing Scratch/Dataset 1/Random Forest/Images/Random Forest - Rapid Miner.PNG:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wishihab/Android-RAT-Dataset/fa4b9f7c94e19cd2c47e7985e47aa08b1530bed7/Testing Scratch/Dataset 1/Random Forest/Images/Random Forest - Rapid Miner.PNG
--------------------------------------------------------------------------------
/Testing Scratch/Dataset 1/Random Forest/Images/Random Forest Result - Python (Jupyter) vs RapidMiner.PNG:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wishihab/Android-RAT-Dataset/fa4b9f7c94e19cd2c47e7985e47aa08b1530bed7/Testing Scratch/Dataset 1/Random Forest/Images/Random Forest Result - Python (Jupyter) vs RapidMiner.PNG
--------------------------------------------------------------------------------
/Testing Scratch/Dataset 1/Random Forest/Random Forest - Model 1 - 2021.rmp:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
27 |
28 |
29 |
30 |
31 |
32 |
33 |
34 |
35 |
36 |
37 |
38 |
39 |
40 |
41 |
42 |
43 |
44 |
45 |
46 |
47 |
48 |
49 |
50 |
51 |
52 |
53 |
54 |
55 |
56 |
57 |
58 |
59 |
60 |
61 |
62 |
63 |
64 |
65 |
66 |
67 |
68 |
69 |
70 |
71 |
72 |
73 |
74 |
75 |
76 |
77 |
78 |
79 |
80 |
81 |
82 |
83 |
84 |
85 |
86 |
87 |
88 |
89 |
90 |
91 |
92 |
93 |
94 |
95 |
96 |
97 |
98 |
99 |
--------------------------------------------------------------------------------
/Testing Scratch/Dataset 1/Random Forest/RandomForestClassifier with sklearn - Dataset 1 - testingv01.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "code",
5 | "execution_count": 1,
6 | "metadata": {},
7 | "outputs": [],
8 | "source": [
9 | "import pandas as pd\n",
10 | "import numpy as np\n",
11 | "\n",
12 | "from sklearn.ensemble import RandomForestClassifier"
13 | ]
14 | },
15 | {
16 | "cell_type": "code",
17 | "execution_count": 2,
18 | "metadata": {},
19 | "outputs": [],
20 | "source": [
21 | "def read_file(filename):\n",
22 | " df = pd.read_csv(filename)\n",
23 | " print(df.shape)\n",
24 | " df['Label'] = df['Label'].apply(lambda x: x.strip().lower())\n",
25 | " return df"
26 | ]
27 | },
28 | {
29 | "cell_type": "code",
30 | "execution_count": 3,
31 | "metadata": {},
32 | "outputs": [
33 | {
34 | "name": "stdout",
35 | "output_type": "stream",
36 | "text": [
37 | "(14400, 7)\n",
38 | "(990, 7)\n"
39 | ]
40 | }
41 | ],
42 | "source": [
43 | "# training csv is 90% of 18000 dataset model 1 - transformed\n",
44 | "# testing csv is 10% of 18000 dataset model 1 - transformed\n",
45 | "\n",
46 | "FILE_HO_TRAIN = 'training.csv'\n",
47 | "FILE_HO_TEST = 'testingv01.csv'\n",
48 | "\n",
49 | "df_ho_train = read_file(FILE_HO_TRAIN)\n",
50 | "df_ho_test = read_file(FILE_HO_TEST)"
51 | ]
52 | },
53 | {
54 | "cell_type": "code",
55 | "execution_count": 4,
56 | "metadata": {},
57 | "outputs": [],
58 | "source": [
59 | "from sklearn.preprocessing import LabelEncoder\n",
60 | "\n",
61 | "target_encoder = LabelEncoder()\n",
62 | "df_ho_train['Target'] = target_encoder.fit_transform(df_ho_train['Label'])\n",
63 | "df_ho_test['Target'] = target_encoder.transform(df_ho_test['Label'])\n",
64 | "\n",
65 | "x_ho_train = df_ho_train.drop(['Label', 'Target'], axis=1)\n",
66 | "y_ho_train = df_ho_train['Target']\n",
67 | "\n",
68 | "x_ho_test = df_ho_test.drop(['Label', 'Target'], axis=1)\n",
69 | "y_ho_test = df_ho_test['Target']"
70 | ]
71 | },
72 | {
73 | "cell_type": "code",
74 | "execution_count": 5,
75 | "metadata": {},
76 | "outputs": [
77 | {
78 | "data": {
79 | "text/plain": [
80 | "0.998989898989899"
81 | ]
82 | },
83 | "execution_count": 5,
84 | "metadata": {},
85 | "output_type": "execute_result"
86 | }
87 | ],
88 | "source": [
89 | "# do like rapidminer setting\n",
90 | "# criterion entropy is information gain\n",
91 | "# estimator set 100\n",
92 | "# max depth 10\n",
93 | "\n",
94 | "modelRF = RandomForestClassifier(criterion=\"entropy\", n_estimators=100, max_depth=10)\n",
95 | "modelRF.fit(x_ho_train,y_ho_train)\n",
96 | "modelRF.score(x_ho_test,y_ho_test)"
97 | ]
98 | },
99 | {
100 | "cell_type": "code",
101 | "execution_count": 6,
102 | "metadata": {},
103 | "outputs": [
104 | {
105 | "name": "stdout",
106 | "output_type": "stream",
107 | "text": [
108 | "Akurasi pada Algoritma RandomForest menggunakan split dengan persentase 90:10 = 99.8989898989899\n"
109 | ]
110 | }
111 | ],
112 | "source": [
113 | "from sklearn.metrics import accuracy_score, f1_score\n",
114 | "\n",
115 | "predicted= modelRF.predict(x_ho_test)\n",
116 | "\n",
117 | "accuracy_ho = accuracy_score(predicted, y_ho_test) * 100\n",
118 | "print('Akurasi pada Algoritma RandomForest menggunakan split dengan persentase 90:10 = %s' % accuracy_ho)"
119 | ]
120 | },
121 | {
122 | "cell_type": "code",
123 | "execution_count": 7,
124 | "metadata": {},
125 | "outputs": [
126 | {
127 | "data": {
128 | "image/png": "\n",
129 | "text/plain": [
130 | ""
131 | ]
132 | },
133 | "metadata": {
134 | "needs_background": "light"
135 | },
136 | "output_type": "display_data"
137 | }
138 | ],
139 | "source": [
140 | "# Create Confusion Matrix\n",
141 | "\n",
142 | "import seaborn as sns\n",
143 | "import matplotlib.pyplot as plt\n",
144 | "\n",
145 | "from sklearn.metrics import confusion_matrix\n",
146 | "confusion_matrix(y_ho_test, predicted) \n",
147 | "\n",
148 | "f, ax = plt.subplots(figsize=(8,5))\n",
149 | "sns.heatmap(confusion_matrix(y_ho_test, predicted), annot=True, fmt=\".0f\", ax=ax)\n",
150 | "plt.xlabel(\"Predicted Class\")\n",
151 | "plt.ylabel(\"Actual Class\")\n",
152 | "plt.show()"
153 | ]
154 | },
155 | {
156 | "cell_type": "code",
157 | "execution_count": 8,
158 | "metadata": {},
159 | "outputs": [
160 | {
161 | "name": "stdout",
162 | "output_type": "stream",
163 | "text": [
164 | " precision recall f1-score support\n",
165 | "\n",
166 | " 0 1.00 1.00 1.00 900\n",
167 | " 1 0.99 1.00 0.99 90\n",
168 | "\n",
169 | " accuracy 1.00 990\n",
170 | " macro avg 0.99 1.00 1.00 990\n",
171 | "weighted avg 1.00 1.00 1.00 990\n",
172 | "\n"
173 | ]
174 | }
175 | ],
176 | "source": [
177 | "# Performance Matrix Report - Precision Recall f1score\n",
178 | "\n",
179 | "from sklearn.metrics import classification_report\n",
180 | "print (classification_report(y_ho_test, predicted))"
181 | ]
182 | },
183 | {
184 | "cell_type": "code",
185 | "execution_count": null,
186 | "metadata": {},
187 | "outputs": [],
188 | "source": []
189 | }
190 | ],
191 | "metadata": {
192 | "kernelspec": {
193 | "display_name": "Python 3",
194 | "language": "python",
195 | "name": "python3"
196 | },
197 | "language_info": {
198 | "codemirror_mode": {
199 | "name": "ipython",
200 | "version": 3
201 | },
202 | "file_extension": ".py",
203 | "mimetype": "text/x-python",
204 | "name": "python",
205 | "nbconvert_exporter": "python",
206 | "pygments_lexer": "ipython3",
207 | "version": "3.8.5"
208 | }
209 | },
210 | "nbformat": 4,
211 | "nbformat_minor": 4
212 | }
213 |
--------------------------------------------------------------------------------
/Testing Scratch/Dataset 2/Decision Tree/Images/Decision Tree Result dataset 2- Python (Jupyter) vs RapidMiner.PNG:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wishihab/Android-RAT-Dataset/fa4b9f7c94e19cd2c47e7985e47aa08b1530bed7/Testing Scratch/Dataset 2/Decision Tree/Images/Decision Tree Result dataset 2- Python (Jupyter) vs RapidMiner.PNG
--------------------------------------------------------------------------------
/Testing Scratch/Dataset 2/Decision Tree/Images/RapidMiner Process Model.PNG:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wishihab/Android-RAT-Dataset/fa4b9f7c94e19cd2c47e7985e47aa08b1530bed7/Testing Scratch/Dataset 2/Decision Tree/Images/RapidMiner Process Model.PNG
--------------------------------------------------------------------------------
/Testing Scratch/Dataset 2/Decision Tree/Images/imagename01.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wishihab/Android-RAT-Dataset/fa4b9f7c94e19cd2c47e7985e47aa08b1530bed7/Testing Scratch/Dataset 2/Decision Tree/Images/imagename01.png
--------------------------------------------------------------------------------
/Testing Scratch/Dataset 2/Decision Tree/Images/imagename02.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wishihab/Android-RAT-Dataset/fa4b9f7c94e19cd2c47e7985e47aa08b1530bed7/Testing Scratch/Dataset 2/Decision Tree/Images/imagename02.png
--------------------------------------------------------------------------------
/Testing Scratch/Dataset 2/Decision Tree/Images/imagename03.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wishihab/Android-RAT-Dataset/fa4b9f7c94e19cd2c47e7985e47aa08b1530bed7/Testing Scratch/Dataset 2/Decision Tree/Images/imagename03.png
--------------------------------------------------------------------------------
/Testing Scratch/Dataset 2/Naive Bayes/Images/Naive Bayes result dataset2 - Python (Jupyter) vs RapidMiner.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wishihab/Android-RAT-Dataset/fa4b9f7c94e19cd2c47e7985e47aa08b1530bed7/Testing Scratch/Dataset 2/Naive Bayes/Images/Naive Bayes result dataset2 - Python (Jupyter) vs RapidMiner.png
--------------------------------------------------------------------------------
/Testing Scratch/Dataset 2/Naive Bayes/Images/RapidMiner Naive Bayes Process Model.PNG:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wishihab/Android-RAT-Dataset/fa4b9f7c94e19cd2c47e7985e47aa08b1530bed7/Testing Scratch/Dataset 2/Naive Bayes/Images/RapidMiner Naive Bayes Process Model.PNG
--------------------------------------------------------------------------------
/Testing Scratch/Dataset 2/Naive Bayes/NaiveBayesClassifier with sklearn - Dataset 1 - testingv01.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "code",
5 | "execution_count": 1,
6 | "metadata": {},
7 | "outputs": [],
8 | "source": [
9 | "import pandas as pd\n",
10 | "\n",
11 | "from sklearn.naive_bayes import GaussianNB"
12 | ]
13 | },
14 | {
15 | "cell_type": "code",
16 | "execution_count": 2,
17 | "metadata": {},
18 | "outputs": [],
19 | "source": [
20 | "def read_file(filename):\n",
21 | " df = pd.read_csv(filename)\n",
22 | " print(df.shape)\n",
23 | " df['label'] = df['label'].apply(lambda x: x.strip().lower())\n",
24 | " return df"
25 | ]
26 | },
27 | {
28 | "cell_type": "code",
29 | "execution_count": 4,
30 | "metadata": {},
31 | "outputs": [
32 | {
33 | "name": "stdout",
34 | "output_type": "stream",
35 | "text": [
36 | "(5400, 8)\n",
37 | "(150, 8)\n"
38 | ]
39 | }
40 | ],
41 | "source": [
42 | "FILE_HO_TRAIN = 'training.csv'\n",
43 | "FILE_HO_TEST = 'testingv1.csv'\n",
44 | "\n",
45 | "df_ho_train = read_file(FILE_HO_TRAIN)\n",
46 | "df_ho_test = read_file(FILE_HO_TEST)"
47 | ]
48 | },
49 | {
50 | "cell_type": "code",
51 | "execution_count": 5,
52 | "metadata": {},
53 | "outputs": [],
54 | "source": [
55 | "from sklearn.preprocessing import LabelEncoder\n",
56 | "\n",
57 | "target_encoder = LabelEncoder()\n",
58 | "df_ho_train['Target'] = target_encoder.fit_transform(df_ho_train['label'])\n",
59 | "df_ho_test['Target'] = target_encoder.transform(df_ho_test['label'])\n",
60 | "\n",
61 | "x_ho_train = df_ho_train.drop(['label', 'Target'], axis=1)\n",
62 | "y_ho_train = df_ho_train['Target']\n",
63 | "\n",
64 | "x_ho_test = df_ho_test.drop(['label', 'Target'], axis=1)\n",
65 | "y_ho_test = df_ho_test['Target']"
66 | ]
67 | },
68 | {
69 | "cell_type": "code",
70 | "execution_count": 7,
71 | "metadata": {},
72 | "outputs": [
73 | {
74 | "data": {
75 | "text/plain": [
76 | "0.5133333333333333"
77 | ]
78 | },
79 | "execution_count": 7,
80 | "metadata": {},
81 | "output_type": "execute_result"
82 | }
83 | ],
84 | "source": [
85 | "# Mengaktifkan/memanggil/membuat fungsi klasifikasi Naive Bayes\n",
86 | "modelnb = GaussianNB()\n",
87 | "\n",
88 | "# Memasukkan data training pada fungsi klasifikasi Naive Bayes\n",
89 | "nbtrain = modelnb.fit(x_ho_train, y_ho_train)\n",
90 | "modelnb.score(x_ho_test,y_ho_test)"
91 | ]
92 | },
93 | {
94 | "cell_type": "code",
95 | "execution_count": 8,
96 | "metadata": {},
97 | "outputs": [
98 | {
99 | "name": "stdout",
100 | "output_type": "stream",
101 | "text": [
102 | "Akurasi pada Algoritma RandomForest menggunakan split dengan persentase 90:10 = 51.33333333333333\n"
103 | ]
104 | }
105 | ],
106 | "source": [
107 | "from sklearn.metrics import accuracy_score, f1_score\n",
108 | "\n",
109 | "predicted= modelnb.predict(x_ho_test)\n",
110 | "\n",
111 | "accuracy_ho = accuracy_score(predicted, y_ho_test) * 100\n",
112 | "print('Akurasi pada Algoritma RandomForest menggunakan split dengan persentase 90:10 = %s' % accuracy_ho)"
113 | ]
114 | },
115 | {
116 | "cell_type": "code",
117 | "execution_count": 9,
118 | "metadata": {},
119 | "outputs": [
120 | {
121 | "data": {
122 | "image/png": "\n",
123 | "text/plain": [
124 | ""
125 | ]
126 | },
127 | "metadata": {
128 | "needs_background": "light"
129 | },
130 | "output_type": "display_data"
131 | }
132 | ],
133 | "source": [
134 | "# Create Confusion Matrix\n",
135 | "\n",
136 | "import seaborn as sns\n",
137 | "import matplotlib.pyplot as plt\n",
138 | "\n",
139 | "from sklearn.metrics import confusion_matrix\n",
140 | "confusion_matrix(y_ho_test, predicted) \n",
141 | "\n",
142 | "f, ax = plt.subplots(figsize=(8,5))\n",
143 | "sns.heatmap(confusion_matrix(y_ho_test, predicted), annot=True, fmt=\".0f\", ax=ax)\n",
144 | "plt.xlabel(\"Predicted Class\")\n",
145 | "plt.ylabel(\"Actual Class\")\n",
146 | "plt.show()"
147 | ]
148 | },
149 | {
150 | "cell_type": "code",
151 | "execution_count": 10,
152 | "metadata": {},
153 | "outputs": [
154 | {
155 | "name": "stdout",
156 | "output_type": "stream",
157 | "text": [
158 | " precision recall f1-score support\n",
159 | "\n",
160 | " 0 1.00 0.48 0.65 140\n",
161 | " 1 0.12 1.00 0.22 10\n",
162 | "\n",
163 | " accuracy 0.51 150\n",
164 | " macro avg 0.56 0.74 0.43 150\n",
165 | "weighted avg 0.94 0.51 0.62 150\n",
166 | "\n"
167 | ]
168 | }
169 | ],
170 | "source": [
171 | "# Performance Matrix Report - Precision Recall f1score\n",
172 | "\n",
173 | "from sklearn.metrics import classification_report\n",
174 | "print (classification_report(y_ho_test, predicted))"
175 | ]
176 | },
177 | {
178 | "cell_type": "code",
179 | "execution_count": null,
180 | "metadata": {},
181 | "outputs": [],
182 | "source": []
183 | }
184 | ],
185 | "metadata": {
186 | "kernelspec": {
187 | "display_name": "Python 3",
188 | "language": "python",
189 | "name": "python3"
190 | },
191 | "language_info": {
192 | "codemirror_mode": {
193 | "name": "ipython",
194 | "version": 3
195 | },
196 | "file_extension": ".py",
197 | "mimetype": "text/x-python",
198 | "name": "python",
199 | "nbconvert_exporter": "python",
200 | "pygments_lexer": "ipython3",
201 | "version": "3.8.5"
202 | }
203 | },
204 | "nbformat": 4,
205 | "nbformat_minor": 4
206 | }
207 |
--------------------------------------------------------------------------------
/Testing Scratch/Dataset 2/Naive Bayes/NaiveBayesClassifier with sklearn - Dataset 1 - testingv02.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "code",
5 | "execution_count": 1,
6 | "metadata": {},
7 | "outputs": [],
8 | "source": [
9 | "import pandas as pd\n",
10 | "\n",
11 | "from sklearn.naive_bayes import GaussianNB"
12 | ]
13 | },
14 | {
15 | "cell_type": "code",
16 | "execution_count": 2,
17 | "metadata": {},
18 | "outputs": [],
19 | "source": [
20 | "def read_file(filename):\n",
21 | " df = pd.read_csv(filename)\n",
22 | " print(df.shape)\n",
23 | " df['label'] = df['label'].apply(lambda x: x.strip().lower())\n",
24 | " return df"
25 | ]
26 | },
27 | {
28 | "cell_type": "code",
29 | "execution_count": 4,
30 | "metadata": {},
31 | "outputs": [
32 | {
33 | "name": "stdout",
34 | "output_type": "stream",
35 | "text": [
36 | "(5400, 8)\n",
37 | "(310, 8)\n"
38 | ]
39 | }
40 | ],
41 | "source": [
42 | "FILE_HO_TRAIN = 'training.csv'\n",
43 | "FILE_HO_TEST = 'testingv2.csv'\n",
44 | "\n",
45 | "df_ho_train = read_file(FILE_HO_TRAIN)\n",
46 | "df_ho_test = read_file(FILE_HO_TEST)"
47 | ]
48 | },
49 | {
50 | "cell_type": "code",
51 | "execution_count": 5,
52 | "metadata": {},
53 | "outputs": [],
54 | "source": [
55 | "from sklearn.preprocessing import LabelEncoder\n",
56 | "\n",
57 | "target_encoder = LabelEncoder()\n",
58 | "df_ho_train['Target'] = target_encoder.fit_transform(df_ho_train['label'])\n",
59 | "df_ho_test['Target'] = target_encoder.transform(df_ho_test['label'])\n",
60 | "\n",
61 | "x_ho_train = df_ho_train.drop(['label', 'Target'], axis=1)\n",
62 | "y_ho_train = df_ho_train['Target']\n",
63 | "\n",
64 | "x_ho_test = df_ho_test.drop(['label', 'Target'], axis=1)\n",
65 | "y_ho_test = df_ho_test['Target']"
66 | ]
67 | },
68 | {
69 | "cell_type": "code",
70 | "execution_count": 8,
71 | "metadata": {},
72 | "outputs": [
73 | {
74 | "data": {
75 | "text/plain": [
76 | "0.5451612903225806"
77 | ]
78 | },
79 | "execution_count": 8,
80 | "metadata": {},
81 | "output_type": "execute_result"
82 | }
83 | ],
84 | "source": [
85 | "# Mengaktifkan/memanggil/membuat fungsi klasifikasi Naive Bayes\n",
86 | "modelnb = GaussianNB()\n",
87 | "\n",
88 | "# Memasukkan data training pada fungsi klasifikasi Naive Bayes\n",
89 | "nbtrain = modelnb.fit(x_ho_train, y_ho_train)\n",
90 | "modelnb.score(x_ho_test,y_ho_test)"
91 | ]
92 | },
93 | {
94 | "cell_type": "code",
95 | "execution_count": 9,
96 | "metadata": {},
97 | "outputs": [
98 | {
99 | "name": "stdout",
100 | "output_type": "stream",
101 | "text": [
102 | "Akurasi pada Algoritma RandomForest menggunakan split dengan persentase 90:10 = 54.516129032258064\n"
103 | ]
104 | }
105 | ],
106 | "source": [
107 | "from sklearn.metrics import accuracy_score, f1_score\n",
108 | "\n",
109 | "predicted= modelnb.predict(x_ho_test)\n",
110 | "\n",
111 | "accuracy_ho = accuracy_score(predicted, y_ho_test) * 100\n",
112 | "print('Akurasi pada Algoritma RandomForest menggunakan split dengan persentase 90:10 = %s' % accuracy_ho)"
113 | ]
114 | },
115 | {
116 | "cell_type": "code",
117 | "execution_count": 10,
118 | "metadata": {},
119 | "outputs": [
120 | {
121 | "data": {
122 | "image/png": "\n",
123 | "text/plain": [
124 | ""
125 | ]
126 | },
127 | "metadata": {
128 | "needs_background": "light"
129 | },
130 | "output_type": "display_data"
131 | }
132 | ],
133 | "source": [
134 | "# Create Confusion Matrix\n",
135 | "\n",
136 | "import seaborn as sns\n",
137 | "import matplotlib.pyplot as plt\n",
138 | "\n",
139 | "from sklearn.metrics import confusion_matrix\n",
140 | "confusion_matrix(y_ho_test, predicted) \n",
141 | "\n",
142 | "f, ax = plt.subplots(figsize=(8,5))\n",
143 | "sns.heatmap(confusion_matrix(y_ho_test, predicted), annot=True, fmt=\".0f\", ax=ax)\n",
144 | "plt.xlabel(\"Predicted Class\")\n",
145 | "plt.ylabel(\"Actual Class\")\n",
146 | "plt.show()"
147 | ]
148 | },
149 | {
150 | "cell_type": "code",
151 | "execution_count": 11,
152 | "metadata": {},
153 | "outputs": [
154 | {
155 | "name": "stdout",
156 | "output_type": "stream",
157 | "text": [
158 | " precision recall f1-score support\n",
159 | "\n",
160 | " 0 1.00 0.53 0.69 300\n",
161 | " 1 0.07 1.00 0.12 10\n",
162 | "\n",
163 | " accuracy 0.55 310\n",
164 | " macro avg 0.53 0.77 0.41 310\n",
165 | "weighted avg 0.97 0.55 0.67 310\n",
166 | "\n"
167 | ]
168 | }
169 | ],
170 | "source": [
171 | "# Performance Matrix Report - Precision Recall f1score\n",
172 | "\n",
173 | "from sklearn.metrics import classification_report\n",
174 | "print (classification_report(y_ho_test, predicted))"
175 | ]
176 | },
177 | {
178 | "cell_type": "code",
179 | "execution_count": null,
180 | "metadata": {},
181 | "outputs": [],
182 | "source": []
183 | }
184 | ],
185 | "metadata": {
186 | "kernelspec": {
187 | "display_name": "Python 3",
188 | "language": "python",
189 | "name": "python3"
190 | },
191 | "language_info": {
192 | "codemirror_mode": {
193 | "name": "ipython",
194 | "version": 3
195 | },
196 | "file_extension": ".py",
197 | "mimetype": "text/x-python",
198 | "name": "python",
199 | "nbconvert_exporter": "python",
200 | "pygments_lexer": "ipython3",
201 | "version": "3.8.5"
202 | }
203 | },
204 | "nbformat": 4,
205 | "nbformat_minor": 4
206 | }
207 |
--------------------------------------------------------------------------------
/Testing Scratch/Dataset 2/Naive Bayes/NaiveBayesClassifier with sklearn - Dataset 1 - testingv03.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "code",
5 | "execution_count": 1,
6 | "metadata": {},
7 | "outputs": [],
8 | "source": [
9 | "import pandas as pd\n",
10 | "\n",
11 | "from sklearn.naive_bayes import GaussianNB"
12 | ]
13 | },
14 | {
15 | "cell_type": "code",
16 | "execution_count": 2,
17 | "metadata": {},
18 | "outputs": [],
19 | "source": [
20 | "def read_file(filename):\n",
21 | " df = pd.read_csv(filename)\n",
22 | " print(df.shape)\n",
23 | " df['label'] = df['label'].apply(lambda x: x.strip().lower())\n",
24 | " return df"
25 | ]
26 | },
27 | {
28 | "cell_type": "code",
29 | "execution_count": 4,
30 | "metadata": {},
31 | "outputs": [
32 | {
33 | "name": "stdout",
34 | "output_type": "stream",
35 | "text": [
36 | "(5400, 8)\n",
37 | "(600, 8)\n"
38 | ]
39 | }
40 | ],
41 | "source": [
42 | "FILE_HO_TRAIN = 'training.csv'\n",
43 | "FILE_HO_TEST = 'testingv3.csv'\n",
44 | "\n",
45 | "df_ho_train = read_file(FILE_HO_TRAIN)\n",
46 | "df_ho_test = read_file(FILE_HO_TEST)"
47 | ]
48 | },
49 | {
50 | "cell_type": "code",
51 | "execution_count": 5,
52 | "metadata": {},
53 | "outputs": [],
54 | "source": [
55 | "from sklearn.preprocessing import LabelEncoder\n",
56 | "\n",
57 | "target_encoder = LabelEncoder()\n",
58 | "df_ho_train['Target'] = target_encoder.fit_transform(df_ho_train['label'])\n",
59 | "df_ho_test['Target'] = target_encoder.transform(df_ho_test['label'])\n",
60 | "\n",
61 | "x_ho_train = df_ho_train.drop(['label', 'Target'], axis=1)\n",
62 | "y_ho_train = df_ho_train['Target']\n",
63 | "\n",
64 | "x_ho_test = df_ho_test.drop(['label', 'Target'], axis=1)\n",
65 | "y_ho_test = df_ho_test['Target']"
66 | ]
67 | },
68 | {
69 | "cell_type": "code",
70 | "execution_count": 8,
71 | "metadata": {},
72 | "outputs": [
73 | {
74 | "data": {
75 | "text/plain": [
76 | "0.71"
77 | ]
78 | },
79 | "execution_count": 8,
80 | "metadata": {},
81 | "output_type": "execute_result"
82 | }
83 | ],
84 | "source": [
85 | "# Mengaktifkan/memanggil/membuat fungsi klasifikasi Naive Bayes\n",
86 | "modelnb = GaussianNB()\n",
87 | "\n",
88 | "# Memasukkan data training pada fungsi klasifikasi Naive Bayes\n",
89 | "nbtrain = modelnb.fit(x_ho_train, y_ho_train)\n",
90 | "modelnb.score(x_ho_test,y_ho_test)"
91 | ]
92 | },
93 | {
94 | "cell_type": "code",
95 | "execution_count": 9,
96 | "metadata": {},
97 | "outputs": [
98 | {
99 | "name": "stdout",
100 | "output_type": "stream",
101 | "text": [
102 | "Akurasi pada Algoritma RandomForest menggunakan split dengan persentase 90:10 = 71.0\n"
103 | ]
104 | }
105 | ],
106 | "source": [
107 | "from sklearn.metrics import accuracy_score, f1_score\n",
108 | "\n",
109 | "predicted= modelnb.predict(x_ho_test)\n",
110 | "\n",
111 | "accuracy_ho = accuracy_score(predicted, y_ho_test) * 100\n",
112 | "print('Akurasi pada Algoritma RandomForest menggunakan split dengan persentase 90:10 = %s' % accuracy_ho)"
113 | ]
114 | },
115 | {
116 | "cell_type": "code",
117 | "execution_count": 11,
118 | "metadata": {},
119 | "outputs": [
120 | {
121 | "data": {
122 | "image/png": "\n",
123 | "text/plain": [
124 | ""
125 | ]
126 | },
127 | "metadata": {
128 | "needs_background": "light"
129 | },
130 | "output_type": "display_data"
131 | }
132 | ],
133 | "source": [
134 | "# Create Confusion Matrix\n",
135 | "\n",
136 | "import seaborn as sns\n",
137 | "import matplotlib.pyplot as plt\n",
138 | "\n",
139 | "from sklearn.metrics import confusion_matrix\n",
140 | "confusion_matrix(y_ho_test, predicted) \n",
141 | "\n",
142 | "f, ax = plt.subplots(figsize=(8,5))\n",
143 | "sns.heatmap(confusion_matrix(y_ho_test, predicted), annot=True, fmt=\".0f\", ax=ax)\n",
144 | "plt.xlabel(\"Predicted Class\")\n",
145 | "plt.ylabel(\"Actual Class\")\n",
146 | "plt.show()"
147 | ]
148 | },
149 | {
150 | "cell_type": "code",
151 | "execution_count": 12,
152 | "metadata": {},
153 | "outputs": [
154 | {
155 | "name": "stdout",
156 | "output_type": "stream",
157 | "text": [
158 | " precision recall f1-score support\n",
159 | "\n",
160 | " 0 0.83 0.53 0.65 300\n",
161 | " 1 0.65 0.89 0.75 300\n",
162 | "\n",
163 | " accuracy 0.71 600\n",
164 | " macro avg 0.74 0.71 0.70 600\n",
165 | "weighted avg 0.74 0.71 0.70 600\n",
166 | "\n"
167 | ]
168 | }
169 | ],
170 | "source": [
171 | "# Performance Matrix Report - Precision Recall f1score\n",
172 | "\n",
173 | "from sklearn.metrics import classification_report\n",
174 | "print (classification_report(y_ho_test, predicted))"
175 | ]
176 | },
177 | {
178 | "cell_type": "code",
179 | "execution_count": null,
180 | "metadata": {},
181 | "outputs": [],
182 | "source": []
183 | }
184 | ],
185 | "metadata": {
186 | "kernelspec": {
187 | "display_name": "Python 3",
188 | "language": "python",
189 | "name": "python3"
190 | },
191 | "language_info": {
192 | "codemirror_mode": {
193 | "name": "ipython",
194 | "version": 3
195 | },
196 | "file_extension": ".py",
197 | "mimetype": "text/x-python",
198 | "name": "python",
199 | "nbconvert_exporter": "python",
200 | "pygments_lexer": "ipython3",
201 | "version": "3.8.5"
202 | }
203 | },
204 | "nbformat": 4,
205 | "nbformat_minor": 4
206 | }
207 |
--------------------------------------------------------------------------------
/Testing Scratch/Dataset 2/README.md:
--------------------------------------------------------------------------------
1 | # csv
2 |
3 | # Algoritm
4 |
5 | 1. Decision Tree
6 | 2. Random Forest
7 | 3. Naive Bayes
--------------------------------------------------------------------------------
/Testing Scratch/Dataset 2/Random Forest/Images/Random Forest result dataset 2 Python (Jupyter) vs RapidMiner.PNG:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wishihab/Android-RAT-Dataset/fa4b9f7c94e19cd2c47e7985e47aa08b1530bed7/Testing Scratch/Dataset 2/Random Forest/Images/Random Forest result dataset 2 Python (Jupyter) vs RapidMiner.PNG
--------------------------------------------------------------------------------
/Testing Scratch/Dataset 2/Random Forest/Images/RapidMiner Random Forest Model.PNG:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wishihab/Android-RAT-Dataset/fa4b9f7c94e19cd2c47e7985e47aa08b1530bed7/Testing Scratch/Dataset 2/Random Forest/Images/RapidMiner Random Forest Model.PNG
--------------------------------------------------------------------------------
/Testing Scratch/Dataset 2/Random Forest/RandomForestClassifier with sklearn - Dataset 1 - testingv01.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "code",
5 | "execution_count": 1,
6 | "metadata": {},
7 | "outputs": [],
8 | "source": [
9 | "import pandas as pd\n",
10 | "import numpy as np\n",
11 | "\n",
12 | "from sklearn.ensemble import RandomForestClassifier"
13 | ]
14 | },
15 | {
16 | "cell_type": "code",
17 | "execution_count": 2,
18 | "metadata": {},
19 | "outputs": [],
20 | "source": [
21 | "def read_file(filename):\n",
22 | " df = pd.read_csv(filename)\n",
23 | " print(df.shape)\n",
24 | " df['label'] = df['label'].apply(lambda x: x.strip().lower())\n",
25 | " return df"
26 | ]
27 | },
28 | {
29 | "cell_type": "code",
30 | "execution_count": 4,
31 | "metadata": {},
32 | "outputs": [
33 | {
34 | "name": "stdout",
35 | "output_type": "stream",
36 | "text": [
37 | "(5400, 8)\n",
38 | "(150, 8)\n"
39 | ]
40 | }
41 | ],
42 | "source": [
43 | "# training 5400\n",
44 | "# testing 150-10\n",
45 | "\n",
46 | "FILE_HO_TRAIN = 'training.csv'\n",
47 | "FILE_HO_TEST = 'testingv1.csv'\n",
48 | "\n",
49 | "df_ho_train = read_file(FILE_HO_TRAIN)\n",
50 | "df_ho_test = read_file(FILE_HO_TEST)"
51 | ]
52 | },
53 | {
54 | "cell_type": "code",
55 | "execution_count": 5,
56 | "metadata": {},
57 | "outputs": [],
58 | "source": [
59 | "from sklearn.preprocessing import LabelEncoder\n",
60 | "\n",
61 | "target_encoder = LabelEncoder()\n",
62 | "df_ho_train['Target'] = target_encoder.fit_transform(df_ho_train['label'])\n",
63 | "df_ho_test['Target'] = target_encoder.transform(df_ho_test['label'])\n",
64 | "\n",
65 | "x_ho_train = df_ho_train.drop(['label', 'Target'], axis=1)\n",
66 | "y_ho_train = df_ho_train['Target']\n",
67 | "\n",
68 | "x_ho_test = df_ho_test.drop(['label', 'Target'], axis=1)\n",
69 | "y_ho_test = df_ho_test['Target']"
70 | ]
71 | },
72 | {
73 | "cell_type": "code",
74 | "execution_count": 6,
75 | "metadata": {},
76 | "outputs": [
77 | {
78 | "data": {
79 | "text/plain": [
80 | "0.9333333333333333"
81 | ]
82 | },
83 | "execution_count": 6,
84 | "metadata": {},
85 | "output_type": "execute_result"
86 | }
87 | ],
88 | "source": [
89 | "# do like rapidminer setting\n",
90 | "# criterion entropy is information gain\n",
91 | "# estimator set 100\n",
92 | "# max depth 10\n",
93 | "\n",
94 | "modelRF = RandomForestClassifier(criterion=\"entropy\", n_estimators=100, max_depth=10)\n",
95 | "modelRF.fit(x_ho_train,y_ho_train)\n",
96 | "modelRF.score(x_ho_test,y_ho_test)"
97 | ]
98 | },
99 | {
100 | "cell_type": "code",
101 | "execution_count": 7,
102 | "metadata": {},
103 | "outputs": [
104 | {
105 | "name": "stdout",
106 | "output_type": "stream",
107 | "text": [
108 | "Akurasi pada Algoritma RandomForest menggunakan split dengan persentase 90:10 = 93.33333333333333\n"
109 | ]
110 | }
111 | ],
112 | "source": [
113 | "from sklearn.metrics import accuracy_score, f1_score\n",
114 | "\n",
115 | "predicted= modelRF.predict(x_ho_test)\n",
116 | "\n",
117 | "accuracy_ho = accuracy_score(predicted, y_ho_test) * 100\n",
118 | "print('Akurasi pada Algoritma RandomForest menggunakan split dengan persentase 90:10 = %s' % accuracy_ho)"
119 | ]
120 | },
121 | {
122 | "cell_type": "code",
123 | "execution_count": 8,
124 | "metadata": {},
125 | "outputs": [
126 | {
127 | "data": {
128 | "image/png": "\n",
129 | "text/plain": [
130 | ""
131 | ]
132 | },
133 | "metadata": {
134 | "needs_background": "light"
135 | },
136 | "output_type": "display_data"
137 | }
138 | ],
139 | "source": [
140 | "# Create Confusion Matrix\n",
141 | "\n",
142 | "import seaborn as sns\n",
143 | "import matplotlib.pyplot as plt\n",
144 | "\n",
145 | "from sklearn.metrics import confusion_matrix\n",
146 | "confusion_matrix(y_ho_test, predicted) \n",
147 | "\n",
148 | "f, ax = plt.subplots(figsize=(8,5))\n",
149 | "sns.heatmap(confusion_matrix(y_ho_test, predicted), annot=True, fmt=\".0f\", ax=ax)\n",
150 | "plt.xlabel(\"Predicted Class\")\n",
151 | "plt.ylabel(\"Actual Class\")\n",
152 | "plt.show()"
153 | ]
154 | },
155 | {
156 | "cell_type": "code",
157 | "execution_count": 9,
158 | "metadata": {},
159 | "outputs": [
160 | {
161 | "name": "stdout",
162 | "output_type": "stream",
163 | "text": [
164 | " precision recall f1-score support\n",
165 | "\n",
166 | " 0 1.00 0.93 0.96 140\n",
167 | " 1 0.50 1.00 0.67 10\n",
168 | "\n",
169 | " accuracy 0.93 150\n",
170 | " macro avg 0.75 0.96 0.81 150\n",
171 | "weighted avg 0.97 0.93 0.94 150\n",
172 | "\n"
173 | ]
174 | }
175 | ],
176 | "source": [
177 | "# Performance Matrix Report - Precision Recall f1score\n",
178 | "\n",
179 | "from sklearn.metrics import classification_report\n",
180 | "print (classification_report(y_ho_test, predicted))"
181 | ]
182 | },
183 | {
184 | "cell_type": "code",
185 | "execution_count": null,
186 | "metadata": {},
187 | "outputs": [],
188 | "source": []
189 | }
190 | ],
191 | "metadata": {
192 | "kernelspec": {
193 | "display_name": "Python 3",
194 | "language": "python",
195 | "name": "python3"
196 | },
197 | "language_info": {
198 | "codemirror_mode": {
199 | "name": "ipython",
200 | "version": 3
201 | },
202 | "file_extension": ".py",
203 | "mimetype": "text/x-python",
204 | "name": "python",
205 | "nbconvert_exporter": "python",
206 | "pygments_lexer": "ipython3",
207 | "version": "3.8.5"
208 | }
209 | },
210 | "nbformat": 4,
211 | "nbformat_minor": 4
212 | }
213 |
--------------------------------------------------------------------------------
/Testing Scratch/Dataset 2/Random Forest/RandomForestClassifier with sklearn - Dataset 1 - testingv02.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "code",
5 | "execution_count": 1,
6 | "metadata": {},
7 | "outputs": [],
8 | "source": [
9 | "import pandas as pd\n",
10 | "import numpy as np\n",
11 | "\n",
12 | "from sklearn.ensemble import RandomForestClassifier"
13 | ]
14 | },
15 | {
16 | "cell_type": "code",
17 | "execution_count": 2,
18 | "metadata": {},
19 | "outputs": [],
20 | "source": [
21 | "def read_file(filename):\n",
22 | " df = pd.read_csv(filename)\n",
23 | " print(df.shape)\n",
24 | " df['label'] = df['label'].apply(lambda x: x.strip().lower())\n",
25 | " return df"
26 | ]
27 | },
28 | {
29 | "cell_type": "code",
30 | "execution_count": 3,
31 | "metadata": {},
32 | "outputs": [
33 | {
34 | "name": "stdout",
35 | "output_type": "stream",
36 | "text": [
37 | "(5400, 8)\n",
38 | "(310, 8)\n"
39 | ]
40 | }
41 | ],
42 | "source": [
43 | "# training 5400\n",
44 | "# testing 300-10\n",
45 | "\n",
46 | "FILE_HO_TRAIN = 'training.csv'\n",
47 | "FILE_HO_TEST = 'testingv2.csv'\n",
48 | "\n",
49 | "df_ho_train = read_file(FILE_HO_TRAIN)\n",
50 | "df_ho_test = read_file(FILE_HO_TEST)"
51 | ]
52 | },
53 | {
54 | "cell_type": "code",
55 | "execution_count": 4,
56 | "metadata": {},
57 | "outputs": [],
58 | "source": [
59 | "from sklearn.preprocessing import LabelEncoder\n",
60 | "\n",
61 | "target_encoder = LabelEncoder()\n",
62 | "df_ho_train['Target'] = target_encoder.fit_transform(df_ho_train['label'])\n",
63 | "df_ho_test['Target'] = target_encoder.transform(df_ho_test['label'])\n",
64 | "\n",
65 | "x_ho_train = df_ho_train.drop(['label', 'Target'], axis=1)\n",
66 | "y_ho_train = df_ho_train['Target']\n",
67 | "\n",
68 | "x_ho_test = df_ho_test.drop(['label', 'Target'], axis=1)\n",
69 | "y_ho_test = df_ho_test['Target']"
70 | ]
71 | },
72 | {
73 | "cell_type": "code",
74 | "execution_count": 5,
75 | "metadata": {},
76 | "outputs": [
77 | {
78 | "data": {
79 | "text/plain": [
80 | "0.9290322580645162"
81 | ]
82 | },
83 | "execution_count": 5,
84 | "metadata": {},
85 | "output_type": "execute_result"
86 | }
87 | ],
88 | "source": [
89 | "# do like rapidminer setting\n",
90 | "# criterion entropy is information gain\n",
91 | "# estimator set 100\n",
92 | "# max depth 10\n",
93 | "\n",
94 | "modelRF = RandomForestClassifier(criterion=\"entropy\", n_estimators=100, max_depth=10)\n",
95 | "modelRF.fit(x_ho_train,y_ho_train)\n",
96 | "modelRF.score(x_ho_test,y_ho_test)"
97 | ]
98 | },
99 | {
100 | "cell_type": "code",
101 | "execution_count": 6,
102 | "metadata": {},
103 | "outputs": [
104 | {
105 | "name": "stdout",
106 | "output_type": "stream",
107 | "text": [
108 | "Akurasi pada Algoritma RandomForest menggunakan split dengan persentase 90:10 = 92.90322580645162\n"
109 | ]
110 | }
111 | ],
112 | "source": [
113 | "from sklearn.metrics import accuracy_score, f1_score\n",
114 | "\n",
115 | "predicted= modelRF.predict(x_ho_test)\n",
116 | "\n",
117 | "accuracy_ho = accuracy_score(predicted, y_ho_test) * 100\n",
118 | "print('Akurasi pada Algoritma RandomForest menggunakan split dengan persentase 90:10 = %s' % accuracy_ho)"
119 | ]
120 | },
121 | {
122 | "cell_type": "code",
123 | "execution_count": 7,
124 | "metadata": {},
125 | "outputs": [
126 | {
127 | "data": {
128 | "image/png": "\n",
129 | "text/plain": [
130 | ""
131 | ]
132 | },
133 | "metadata": {
134 | "needs_background": "light"
135 | },
136 | "output_type": "display_data"
137 | }
138 | ],
139 | "source": [
140 | "# Create Confusion Matrix\n",
141 | "\n",
142 | "import seaborn as sns\n",
143 | "import matplotlib.pyplot as plt\n",
144 | "\n",
145 | "from sklearn.metrics import confusion_matrix\n",
146 | "confusion_matrix(y_ho_test, predicted) \n",
147 | "\n",
148 | "f, ax = plt.subplots(figsize=(8,5))\n",
149 | "sns.heatmap(confusion_matrix(y_ho_test, predicted), annot=True, fmt=\".0f\", ax=ax)\n",
150 | "plt.xlabel(\"Predicted Class\")\n",
151 | "plt.ylabel(\"Actual Class\")\n",
152 | "plt.show()"
153 | ]
154 | },
155 | {
156 | "cell_type": "code",
157 | "execution_count": 8,
158 | "metadata": {},
159 | "outputs": [
160 | {
161 | "name": "stdout",
162 | "output_type": "stream",
163 | "text": [
164 | " precision recall f1-score support\n",
165 | "\n",
166 | " 0 1.00 0.93 0.96 300\n",
167 | " 1 0.31 1.00 0.48 10\n",
168 | "\n",
169 | " accuracy 0.93 310\n",
170 | " macro avg 0.66 0.96 0.72 310\n",
171 | "weighted avg 0.98 0.93 0.95 310\n",
172 | "\n"
173 | ]
174 | }
175 | ],
176 | "source": [
177 | "# Performance Matrix Report - Precision Recall f1score\n",
178 | "\n",
179 | "from sklearn.metrics import classification_report\n",
180 | "print (classification_report(y_ho_test, predicted))"
181 | ]
182 | },
183 | {
184 | "cell_type": "code",
185 | "execution_count": null,
186 | "metadata": {},
187 | "outputs": [],
188 | "source": []
189 | }
190 | ],
191 | "metadata": {
192 | "kernelspec": {
193 | "display_name": "Python 3",
194 | "language": "python",
195 | "name": "python3"
196 | },
197 | "language_info": {
198 | "codemirror_mode": {
199 | "name": "ipython",
200 | "version": 3
201 | },
202 | "file_extension": ".py",
203 | "mimetype": "text/x-python",
204 | "name": "python",
205 | "nbconvert_exporter": "python",
206 | "pygments_lexer": "ipython3",
207 | "version": "3.8.5"
208 | }
209 | },
210 | "nbformat": 4,
211 | "nbformat_minor": 4
212 | }
213 |
--------------------------------------------------------------------------------
/Testing Scratch/Dataset 2/Random Forest/RandomForestClassifier with sklearn - Dataset 1 - testingv03.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "code",
5 | "execution_count": 1,
6 | "metadata": {},
7 | "outputs": [],
8 | "source": [
9 | "import pandas as pd\n",
10 | "\n",
11 | "from sklearn.ensemble import RandomForestClassifier"
12 | ]
13 | },
14 | {
15 | "cell_type": "code",
16 | "execution_count": 2,
17 | "metadata": {},
18 | "outputs": [],
19 | "source": [
20 | "def read_file(filename):\n",
21 | " df = pd.read_csv(filename)\n",
22 | " print(df.shape)\n",
23 | " df['label'] = df['label'].apply(lambda x: x.strip().lower())\n",
24 | " return df"
25 | ]
26 | },
27 | {
28 | "cell_type": "code",
29 | "execution_count": 3,
30 | "metadata": {},
31 | "outputs": [
32 | {
33 | "name": "stdout",
34 | "output_type": "stream",
35 | "text": [
36 | "(5400, 8)\n",
37 | "(600, 8)\n"
38 | ]
39 | }
40 | ],
41 | "source": [
42 | "# training 5400\n",
43 | "# testing 300-300\n",
44 | "\n",
45 | "FILE_HO_TRAIN = 'training.csv'\n",
46 | "FILE_HO_TEST = 'testingv3.csv'\n",
47 | "\n",
48 | "df_ho_train = read_file(FILE_HO_TRAIN)\n",
49 | "df_ho_test = read_file(FILE_HO_TEST)"
50 | ]
51 | },
52 | {
53 | "cell_type": "code",
54 | "execution_count": 4,
55 | "metadata": {},
56 | "outputs": [],
57 | "source": [
58 | "from sklearn.preprocessing import LabelEncoder\n",
59 | "\n",
60 | "target_encoder = LabelEncoder()\n",
61 | "df_ho_train['Target'] = target_encoder.fit_transform(df_ho_train['label'])\n",
62 | "df_ho_test['Target'] = target_encoder.transform(df_ho_test['label'])\n",
63 | "\n",
64 | "x_ho_train = df_ho_train.drop(['label', 'Target'], axis=1)\n",
65 | "y_ho_train = df_ho_train['Target']\n",
66 | "\n",
67 | "x_ho_test = df_ho_test.drop(['label', 'Target'], axis=1)\n",
68 | "y_ho_test = df_ho_test['Target']"
69 | ]
70 | },
71 | {
72 | "cell_type": "code",
73 | "execution_count": 10,
74 | "metadata": {},
75 | "outputs": [
76 | {
77 | "data": {
78 | "text/plain": [
79 | "0.9283333333333333"
80 | ]
81 | },
82 | "execution_count": 10,
83 | "metadata": {},
84 | "output_type": "execute_result"
85 | }
86 | ],
87 | "source": [
88 | "# do like rapidminer setting\n",
89 | "# criterion entropy is information gain\n",
90 | "# estimator set 100\n",
91 | "# max depth 10\n",
92 | "\n",
93 | "modelRF = RandomForestClassifier(criterion=\"entropy\", n_estimators=100, max_depth=10)\n",
94 | "modelRF.fit(x_ho_train,y_ho_train)\n",
95 | "modelRF.score(x_ho_test,y_ho_test)"
96 | ]
97 | },
98 | {
99 | "cell_type": "code",
100 | "execution_count": 11,
101 | "metadata": {},
102 | "outputs": [
103 | {
104 | "name": "stdout",
105 | "output_type": "stream",
106 | "text": [
107 | "Akurasi pada Algoritma RandomForest menggunakan split dengan persentase 90:10 = 92.83333333333333\n"
108 | ]
109 | }
110 | ],
111 | "source": [
112 | "from sklearn.metrics import accuracy_score, f1_score\n",
113 | "\n",
114 | "predicted= modelRF.predict(x_ho_test)\n",
115 | "\n",
116 | "accuracy_ho = accuracy_score(predicted, y_ho_test) * 100\n",
117 | "print('Akurasi pada Algoritma RandomForest menggunakan split dengan persentase 90:10 = %s' % accuracy_ho)"
118 | ]
119 | },
120 | {
121 | "cell_type": "code",
122 | "execution_count": 12,
123 | "metadata": {},
124 | "outputs": [
125 | {
126 | "data": {
127 | "image/png": "\n",
128 | "text/plain": [
129 | ""
130 | ]
131 | },
132 | "metadata": {
133 | "needs_background": "light"
134 | },
135 | "output_type": "display_data"
136 | }
137 | ],
138 | "source": [
139 | "# Create Confusion Matrix\n",
140 | "\n",
141 | "import seaborn as sns\n",
142 | "import matplotlib.pyplot as plt\n",
143 | "\n",
144 | "from sklearn.metrics import confusion_matrix\n",
145 | "confusion_matrix(y_ho_test, predicted) \n",
146 | "\n",
147 | "f, ax = plt.subplots(figsize=(8,5))\n",
148 | "sns.heatmap(confusion_matrix(y_ho_test, predicted), annot=True, fmt=\".0f\", ax=ax)\n",
149 | "plt.xlabel(\"Predicted Class\")\n",
150 | "plt.ylabel(\"Actual Class\")\n",
151 | "plt.show()"
152 | ]
153 | },
154 | {
155 | "cell_type": "code",
156 | "execution_count": 13,
157 | "metadata": {},
158 | "outputs": [
159 | {
160 | "name": "stdout",
161 | "output_type": "stream",
162 | "text": [
163 | " precision recall f1-score support\n",
164 | "\n",
165 | " 0 0.94 0.92 0.93 300\n",
166 | " 1 0.92 0.94 0.93 300\n",
167 | "\n",
168 | " accuracy 0.93 600\n",
169 | " macro avg 0.93 0.93 0.93 600\n",
170 | "weighted avg 0.93 0.93 0.93 600\n",
171 | "\n"
172 | ]
173 | }
174 | ],
175 | "source": [
176 | "# Performance Matrix Report - Precision Recall f1score\n",
177 | "\n",
178 | "from sklearn.metrics import classification_report\n",
179 | "print (classification_report(y_ho_test, predicted))"
180 | ]
181 | },
182 | {
183 | "cell_type": "code",
184 | "execution_count": null,
185 | "metadata": {},
186 | "outputs": [],
187 | "source": []
188 | }
189 | ],
190 | "metadata": {
191 | "kernelspec": {
192 | "display_name": "Python 3",
193 | "language": "python",
194 | "name": "python3"
195 | },
196 | "language_info": {
197 | "codemirror_mode": {
198 | "name": "ipython",
199 | "version": 3
200 | },
201 | "file_extension": ".py",
202 | "mimetype": "text/x-python",
203 | "name": "python",
204 | "nbconvert_exporter": "python",
205 | "pygments_lexer": "ipython3",
206 | "version": "3.8.5"
207 | }
208 | },
209 | "nbformat": 4,
210 | "nbformat_minor": 4
211 | }
212 |
--------------------------------------------------------------------------------
/Testing Scratch/Dataset 2/testingv1.csv:
--------------------------------------------------------------------------------
1 | total_length_out,total_length_in,avg_packet_length_out,avg_packet_length_in,avg_duration_out,avg_duration_in,total_duration,label
2 | 5856,532,585.6,53.2,1.611882,1.005457,26.173386,Trojan
3 | 19600,312,1400,52,0.000302,0.000200,0.005426,Trojan
4 | 13724,364,1055.692308,52,0.049003,0.000707,0.641126,Trojan
5 | 14378,312,1027,52,0.046377,0.000732,0.653661,Trojan
6 | 812,563,73.81818182,62.55555556,1.908398,1.516605,34.64183,Trojan
7 | 9978,4259,1108.666667,387.1818182,0.021671,0.041039,0.646469,Trojan
8 | 442,415,44.2,41.5,0.384397,1.953759,23.381559,Trojan
9 | 10089,398,840.75,49.75,0.151534,0.600073,6.618997,Trojan
10 | 1795,928,179.5,92.8,0.285265,0.037851,3.231163,Trojan
11 | 732,1266,73.2,126.6,1.288615,6.686840,79.754546,Trojan
12 | 416,15526,52,1293.833333,0.000220,0.000060,0.002477,Benign
13 | 520,13732,52,1373.2,0.000192,0.000135,0.003269,Benign
14 | 936,1526,52,763,0.000272,0.000166,0.005227,Benign
15 | 1040,0,52,0,0.000318,0.000000,0.006355,Benign
16 | 1040,0,52,0,0.000290,0.000000,0.005792,Benign
17 | 1040,0,52,0,0.000283,0.000000,0.005665,Benign
18 | 1040,0,52,0,0.000301,0.000000,0.006016,Benign
19 | 1040,0,52,0,0.000287,0.000000,0.005739,Benign
20 | 948,7000,63.2,1400,0.000202,0.000194,0.004001,Benign
21 | 1288,2800,71.55555556,1400,0.000296,0.000128,0.005577,Benign
22 | 1440,0,72,0,0.000300,0.000000,0.006005,Benign
23 | 1440,0,72,0,0.000291,0.000000,0.005827,Benign
24 | 1200,1400,63.15789474,1400,0.000262,0.000108,0.005089,Benign
25 | 1132,0,56.6,0,0.000263,0.000000,0.005254,Benign
26 | 168,16523,56,971.9411765,0.000270,0.006370,0.109101,Benign
27 | 0,24290,0,1214.5,0.000000,0.000022,0.00043,Benign
28 | 520,9107,52,910.7,0.000171,0.000012,0.001826,Benign
29 | 1040,0,52,0,0.000219,0.000000,0.004375,Benign
30 | 896,828,52.70588235,276,0.000286,0.008357,0.029939,Benign
31 | 683,23130,341.5,1285,0.167521,0.000108,0.336982,Benign
32 | 416,14210,52,1184.166667,0.000284,0.000017,0.002477,Benign
33 | 1040,0,52,0,0.000284,0.000000,0.005673,Benign
34 | 700,20376,233.3333333,1198.588235,0.041032,0.001865,0.154808,Benign
35 | 0,28000,0,1400,0.000000,0.000010,0.000191,Benign
36 | 52,26435,52,1391.315789,0.000173,0.000016,0.000472,Benign
37 | 52,26600,52,1400,0.000053,0.000011,0.000266,Benign
38 | 833,7087,59.5,1181.166667,0.000561,0.000112,0.00853,Benign
39 | 1635,52,86.05263158,52,0.000506,0.000715,0.01033,Benign
40 | 1040,0,52,0,0.000026,0.000000,0.000526,Benign
41 | 520,14000,52,1400,0.000033,0.000035,0.000673,Benign
42 | 0,28000,0,1400,0.000000,0.000026,0.000527,Benign
43 | 312,19600,52,1400,0.000024,0.000013,0.00033,Benign
44 | 104,25200,52,1400,0.000013,0.000031,25200,Benign
45 | 312,19185,52,1370.357143,0.001794,0.000032,0.011208,Benign
46 | 988,126,52,126,0.000020,0.000001,0.000377,Benign
47 | 1040,0,52,0,0.000479,0.000000,0.009583,Benign
48 | 1040,0,52,0,0.000049,0.000000,0.000977,Benign
49 | 1040,0,52,0,0.000041,0.000000,0.000819,Benign
50 | 936,364,52,182,0.000226,0.001219,0.006499,Benign
51 | 1350,17631,270,1175.4,0.009026,0.000152,0.047405,Benign
52 | 260,19618,52,1307.866667,0.001377,0.000021,0.007194,Benign
53 | 1040,0,52,0,0.000327,0.000000,0.006533,Benign
54 | 260,17868,52,1191.2,0.000028,0.000450,0.006885,Benign
55 | 104,20883,52,1160.166667,0.001297,0.000019,0.002939,Benign
56 | 1040,0,52,0,0.000385,0.000000,0.007709,Benign
57 | 468,9316,52,846.9090909,0.001572,0.003209,0.049455,Benign
58 | 927,3851,61.8,770.2,0.000741,0.000023,0.01123,Benign
59 | 208,12481,52,780.0625,0.000644,0.000191,0.005622,Benign
60 | 1006,2813,111.7777778,255.7272727,0.006983,0.008674,0.158257,Benign
61 | 699,3125,87.375,260.4166667,0.011236,0.029025,0.43818,Benign
62 | 1213,6172,121.3,617.2,0.250721,0.023988,2.747083,Benign
63 | 1792,1256,149.3333333,157,1.136611,0.153883,14.870386,Benign
64 | 468,15196,52,1381.454545,0.000602,0.000089,0.006397,Benign
65 | 367,18304,61.16666667,1307.428571,0.002962,0.000128,0.019569,Benign
66 | 777,8291,59.76923077,1036.375,0.001078,0.000154,0.015233,Benign
67 | 3869,4696,386.9,469.6,3.475922,0.008177,34.840995,Benign
68 | 5841,3387,486.75,423.375,0.082086,0.004039,1.017344,Benign
69 | 12600,572,1400,52,0.000237,0.000153,0.003819,Benign
70 | 14000,520,1400,52,0.000144,0.000122,0.002661,Benign
71 | 11049,1828,1227.666667,166.1818182,0.047967,0.028197,0.741869,Benign
72 | 13567,1138,1130.583333,142.25,0.017619,0.002865,0.234347,Benign
73 | 4443,4330,493.6666667,393.6363636,0.028444,0.018051,0.454552,Benign
74 | 6109,3281,555.3636364,364.5555556,0.021890,0.008087,0.313572,Benign
75 | 2254,2739,250.4444444,249,15.417934,0.005021,138.816634,Benign
76 | 2153,4869,215.3,486.9,0.342925,0.004785,3.477093,Benign
77 | 1498,1683,187.25,140.25,0.025416,5.005738,60.272179,Benign
78 | 644,476,58.54545455,52.88888889,5.485595,13.274716,179.813989,Benign
79 | 1144,3178,114.4,317.8,17.252015,0.003792,172.55807,Benign
80 | 2209,1777,200.8181818,177.7,1.448418,0.027127,16.203002,Benign
81 | 2001,4449,222.3333333,404.4545455,0.006135,0.005726,0.118203,Benign
82 | 1810,3857,201.1111111,350.6363636,13.578150,0.011711,122.33217,Benign
83 | 1621,1676,202.625,139.6666667,0.012625,8.470028,101.741331,Benign
84 | 6235,4772,566.8181818,530.2222222,1.324204,0.007430,14.633111,Benign
85 | 1606,3713,160.6,371.3,0.118602,0.022756,1.413574,Benign
86 | 1873,1015,208.1111111,92.27272727,0.021721,0.002725,0.22546,Benign
87 | 671,4417,83.875,368.0833333,0.677163,0.009401,5.530116,Benign
88 | 1945,1825,176.8181818,202.7777778,0.011313,0.016697,0.274721,Benign
89 | 536,536,67,53.6,0.749217,10.893955,116.274163,Benign
90 | 0,22667,0,1133.35,0.000000,0.000021,0.000416,Benign
91 | 1309,9029,187,694.5384615,0.000454,0.000217,0.005991,Benign
92 | 1469,178,81.61111111,89,0.000649,0.001243,0.014167,Benign
93 | 620,9612,56.36363636,1068,0.001414,0.000027,0.015794,Benign
94 | 1245,7338,103.75,917.25,0.007912,0.000309,0.097412,Benign
95 | 588,6543,53.45454545,727,0.001994,0.002583,0.045183,Benign
96 | 416,13312,52,1109.333333,0.001624,0.000021,0.013243,Benign
97 | 1995,2681,153.4615385,446.8333333,0.002476,0.000645,0.036259,Benign
98 | 1019,10573,169.8333333,755.2142857,0.000859,0.000174,0.007595,Benign
99 | 1255,1615,83.66666667,323,0.004289,0.001328,0.070978,Benign
100 | 665,665,95,95,0.000552,0.000552,0.0603,Benign
101 | 4562,972,380.1666667,121.5,0.003639,0.000355,0.046501,Benign
102 | 649,23116,324.5,1284.222222,0.017717,0.000099,0.03722,Benign
103 | 0,28000,0,1400,0.000000,0.000027,0.000549,Benign
104 | 0,28000,0,1400,0.000000,0.000010,0.0002,Benign
105 | 700,23032,233.3333333,1354.823529,0.000722,0.000017,0.002458,Benign
106 | 0,28000,0,1400,0.000000,0.000011,0.000225,Benign
107 | 0,28000,0,1400,0.000000,0.000009,0.000178,Benign
108 | 52,26600,52,1400,0.000061,0.000016,0.000372,Benign
109 | 312,18252,52,1303.714286,0.000275,0.000204,0.004511,Benign
110 | 7859,7243,714.4545455,804.7777778,0.000766,0.000378,0.01183,Benign
111 | 864,18853,144,1346.642857,0.000329,0.005011,0.005011,Benign
112 | 0,26675,0,1333.75,0.000000,0.000100,0.001998,Benign
113 | 0,26464,0,1323.2,0.000000,0.000035,0.000704,Benign
114 | 208,19301,52,1206.3125,0.000290,0.000090,0.002598,Benign
115 | 416,16800,52,1400,0.000694,0.000149,0.007337,Benign
116 | 12238,2427,1359.777778,220.6363636,0.000878,0.039199,0.439087,Benign
117 | 656,7125,72.88888889,647.7272727,3.865589,0.003982,34.834099,Benign
118 | 1452,1447,145.2,144.7,0.005393,0.011599,0.169923,Benign
119 | 1222,3767,135.7777778,342.4545455,0.035985,0.011201,0.447081,Benign
120 | 1449,1695,181.125,141.25,0.026967,19.968305,239.835392,Benign
121 | 708,504,70.8,50.4,2.454773,0.027523,24.822953,Benign
122 | 2272,2611,252.4444444,237.3636364,55.823881,0.007820,502.500952,Benign
123 | 2765,2876,276.5,287.6,0.240199,0.026164,2.663622,Benign
124 | 1715,1872,142.9166667,234,5.016834,29.997419,300.181352,Benign
125 | 3831,1499,383.1,149.9,0.019733,0.003069,0.228017,Benign
126 | 1901,2646,190.1,264.6,2.319603,0.028269,23.478724,Benign
127 | 3911,3027,391.1,302.7,0.080811,0.120381,2.01192,Benign
128 | 3749,2247,374.9,224.7,1.006559,0.083725,10.902833,Benign
129 | 2515,4145,251.5,414.5,0.343112,0.434465,7.775766,Benign
130 | 640,5266,64,526.6,0.001436,0.000747,0.021831,Benign
131 | 640,4705,64,470.5,0.001078,0.001503,0.025806,Benign
132 | 448,6771,64,520.8461538,0.000679,0.000667,0.013428,Benign
133 | 648,4110,54,513.75,0.000890,0.000510,0.014751,Benign
134 | 1475,2291,122.9166667,286.375,0.537064,0.023667,6.634098,Benign
135 | 1570,7112,157,711.2,3.330282,0.002362,33.326437,Benign
136 | 3758,1147,341.6363636,127.4444444,0.010364,0.001746,0.129727,Benign
137 | 7190,1922,653.6363636,213.5555556,0.031467,0.030463,0.620305,Benign
138 | 2772,829,252,92.11111111,6.089106,0.036370,67.307489,Benign
139 | 4578,893,457.8,89.3,1.174172,0.034073,12.082454,Benign
140 | 882,625,67.84615385,89.28571429,8.879899,0.006421,115.483628,Benign
141 | 4336,1834,394.1818182,203.7777778,0.000623,0.000420,0.010631,Benign
142 | 1257,598,89.78571429,99.66666667,4.346160,40.000051,300.846552,Benign
143 | 6060,1307,550.9090909,145.2222222,0.047773,0.000473,0.529757,Benign
144 | 0,20830,0,1041.5,0.000000,0.001919,0.038374,Benign
145 | 696,11618,77.33333333,1056.181818,0.000201,0.000008,0.001893,Benign
146 | 1560,0,78,0,0.000301,0.000000,0.00602,Benign
147 | 838,4705,76.18181818,522.7777778,0.031981,0.029532,0.617579,Benign
148 | 1065,9758,118.3333333,887.0909091,0.003838,0.000256,0.037358,Benign
149 | 1399,7592,174.875,632.6666667,0.001163,0.000183,0.011497,Benign
150 | 1723,6289,132.5384615,898.4285714,0.001663,0.000283,0.023606,Benign
151 | 1493,7375,186.625,614.5833333,0.001381,0.000301,0.014656,Benign
152 |
--------------------------------------------------------------------------------