├── Dataset ├── Dataset 1 │ ├── Benign │ │ └── benign_1500_6_mixing.csv │ ├── README.md │ ├── Trojan │ │ ├── AhMyth_wireshark_1500.csv │ │ ├── AndroidRAT_wireshark_1500.csv │ │ ├── AndroidTester_wireshark_1500.csv │ │ ├── Droidjack_wireshark_1500.csv │ │ ├── Hawkshaw_wireshark_1500.csv │ │ ├── Spymax_wireshark_1500.csv │ │ └── trojan_1500_6.csv │ └── benign_trojan_18000_model_1_transformed.csv └── Dataset 2 │ ├── README.md │ ├── benign │ └── benign_75_6_or_150_3_mixing.csv │ └── trojan │ ├── AhMyth_75.csv │ ├── AndroidRAT_75.csv │ ├── AndroidTester_75.csv │ ├── DroidJack_75.csv │ ├── HawkShaw_75.csv │ └── SpyMax_75.csv ├── README.md ├── Remote Access Trojan Detection On Android Based On Network Traffic Observation Using Machine Learning.pdf └── Testing Scratch ├── Dataset 1 ├── Decision Tree │ ├── Decision Tree Scratch.ipynb │ ├── Decision Trees - Model 1 - 2021.rmp │ ├── DecisionTreeClassifier with sklearn - Dataset 1 - testingv01.ipynb │ ├── DecisionTreeClassifier with sklearn - Dataset 1 - testingv02.ipynb │ ├── DecisionTreeClassifier with sklearn - Dataset 1 - testingv03.ipynb │ └── Images │ │ ├── Decision Tree - RapidMiner.PNG │ │ ├── Decision Tree Result - Python (Jupyter) vs RapidMiner.PNG │ │ ├── imagename.png │ │ ├── imagenamev02.png │ │ └── imagenamev03.png ├── Naive Bayes │ ├── Images │ │ ├── Naive Bayes - Rapid Miner.PNG │ │ └── Naive Bayes Result - Python (Jupyter) vs RapidMiner.PNG │ ├── Naive Bayes - Model 1 - 2021.rmp │ ├── NaiveBayesClassifier with sklearn - Dataset 1 - testingv01.ipynb │ ├── NaiveBayesClassifier with sklearn - Dataset 1 - testingv02.ipynb │ └── NaiveBayesClassifier with sklearn - Dataset 1 - testingv03.ipynb ├── README.md ├── Random Forest │ ├── Images │ │ ├── Random Forest - Rapid Miner.PNG │ │ └── Random Forest Result - Python (Jupyter) vs RapidMiner.PNG │ ├── Random Forest - Model 1 - 2021.rmp │ ├── RandomForestClassifier with sklearn - Dataset 1 - testingv01.ipynb │ ├── RandomForestClassifier with sklearn - Dataset 1 - testingv02.ipynb │ └── RandomForestClassifier with sklearn - Dataset 1 - testingv03.ipynb ├── testingv01.csv ├── testingv02.csv ├── testingv03.csv └── training.csv └── Dataset 2 ├── Decision Tree ├── DecisionTreeClassifier with sklearn - Dataset 1 - testingv01.ipynb ├── DecisionTreeClassifier with sklearn - Dataset 1 - testingv02.ipynb ├── DecisionTreeClassifier with sklearn - Dataset 1 - testingv03.ipynb └── Images │ ├── Decision Tree Result dataset 2- Python (Jupyter) vs RapidMiner.PNG │ ├── RapidMiner Process Model.PNG │ ├── imagename01.png │ ├── imagename02.png │ └── imagename03.png ├── Naive Bayes ├── Images │ ├── Naive Bayes result dataset2 - Python (Jupyter) vs RapidMiner.png │ └── RapidMiner Naive Bayes Process Model.PNG ├── NaiveBayesClassifier with sklearn - Dataset 1 - testingv01.ipynb ├── NaiveBayesClassifier with sklearn - Dataset 1 - testingv02.ipynb └── NaiveBayesClassifier with sklearn - Dataset 1 - testingv03.ipynb ├── README.md ├── Random Forest ├── Images │ ├── Random Forest result dataset 2 Python (Jupyter) vs RapidMiner.PNG │ └── RapidMiner Random Forest Model.PNG ├── RandomForestClassifier with sklearn - Dataset 1 - testingv01.ipynb ├── RandomForestClassifier with sklearn - Dataset 1 - testingv02.ipynb └── RandomForestClassifier with sklearn - Dataset 1 - testingv03.ipynb ├── testingv1.csv ├── testingv2.csv ├── testingv3.csv └── training.csv /Dataset/Dataset 1/README.md: -------------------------------------------------------------------------------- 1 | # csv 2 | all csv file contains network traffic packet 3 | 4 | benign_trojan_18000_model_1_transformed.csv 5 | 6 | # ==== Protocol ==== 7 | 8 | - SMPP = 9 9 | - GQUIC = 8 10 | - KNXnet/IP = 7 11 | - WebSocket = 6 12 | - ICMP = 5 13 | - HTTP = 4 14 | - TLSv1.3 = 3 15 | - TLSv1.2 = 2 16 | - tcp = 1 17 | - udp = 0 18 | 19 | # ==== Type ==== 20 | 21 | - out 0 22 | - in 1 23 | 24 | # ===== LABEL ===== 25 | 26 | - Benign 0 27 | - Trojan 1 28 | 29 | # Source,Destination,Protocol,Length,Type,Duration,Label 30 | 31 | - Source is port packet source 32 | - Destination is port packet Destination 33 | - Protocol 34 | - Length 35 | - Type based on IP, if source local to external destination means "out" 36 | - Duration is based on time current packet to previous packet (from A to B) 37 | -------------------------------------------------------------------------------- /Dataset/Dataset 2/README.md: -------------------------------------------------------------------------------- 1 | # csv 2 | all csv file contains network traffic per 20packet to 1 row 3 | 4 | # example 5 | 20 row AhMyth traffic packet transform to 1 row package, means 75 row package = 1500 row packet -------------------------------------------------------------------------------- /Dataset/Dataset 2/trojan/AhMyth_75.csv: -------------------------------------------------------------------------------- 1 | total_length_out,total_length_in,avg_packet_length_out,avg_packet_length_in,avg_duration_out,avg_duration_in,total_duration 2 | 2217,993,158.3571429,165.5,0.534560643,0.5021735,10.49689 3 | 704,757,64,84.11111111,4.531191909,5.064115667,95.420152 4 | 555,532,55.5,53.2,10.8981632,4.3171529,152.153161 5 | 742,378,53,63,6.5037385,3.231654833,110.442268 6 | 6001,1556,545.5454545,172.8888889,1.026273818,6.662169889,71.248541 7 | 2153,1045,165.6153846,149.2857143,0.118072231,0.058974,1.947757 8 | 555,812,55.5,81.2,7.4964943,4.11955,116.160443 9 | 633,758,63.3,68.90909091,6.0886212,3.545336,89.667757 10 | 4436,471,369.6666667,58.875,6.266031917,2.392116875,94.329318 11 | 853,217,53.3125,54.25,4.929131875,3.90058925,94.468467 12 | 4570,508,380.8333333,63.5,4.184390417,4.225012625,84.012786 13 | 1562,641,173.5555556,58.27272727,8.441892444,6.549571636,148.02232 14 | 6008,1622,546.1818182,180.2222222,2.611064818,0.210755222,30.61851 15 | 4792,540,368.6153846,77.14285714,4.398488,4.701446,90.090466 16 | 18200,364,1400,52,0.000222692,0.000210286,0.004367 17 | 18200,364,1400,52,0.000252231,0.000224857,0.004853 18 | 19600,312,1400,52,0.000277786,0.000137833,0.004716 19 | 8792,471,799.2727273,52.33333333,0.513871727,0.149289667,6.996196 20 | 18200,364,1400,52,0.000197,0.000245571,0.00428 21 | 18200,364,1400,52,0.000232538,0.000224429,0.004594 22 | 18200,364,1400,52,0.000162846,0.000385714,0.004817 23 | 19600,312,1400,52,0.000189071,0.000329833,0.004626 24 | 7316,468,665.0909091,52,0.699906182,0.000281667,7.701503 25 | 18196,364,1399.692308,52,0.000237,0.436016571,3.055197 26 | 19600,312,1400,52,0.000253,0.0002155,0.004835 27 | 18200,364,1400,52,0.000176615,0.000352571,0.004764 28 | 18200,364,1400,52,0.000200692,0.000311,0.004786 29 | 7160,468,716,52,0.7576526,0.231896667,9.663731 30 | 16952,364,1304,52,0.000151385,0.000364286,0.004518 31 | 19600,312,1400,52,0.000179429,0.000379,0.004786 32 | 18200,364,1400,52,0.000222846,0.000268714,0.004778 33 | 16800,364,1400,52,0.000241083,0.000228286,0.004536 34 | 16852,364,1296.307692,52,0.016553769,0.055523857,0.603866 35 | 18196,364,1399.692308,52,0.000181615,0.000298571,0.004451 36 | 19600,312,1400,52,0.000215643,0.000323667,0.004961 37 | 18200,364,1400,52,0.000232462,0.000282,0.004996 38 | 15524,364,1194.153846,52,0.016619231,0.010003,0.286071 39 | 18180,364,1398.461538,52,0.000246846,0.000194143,0.004568 40 | 18200,364,1400,52,0.000156846,0.000371,0.004636 41 | 18200,364,1400,52,0.000162923,0.000437714,0.005182 42 | 12744,416,1062,52,0.137838167,0.025451875,1.857673 43 | 12756,364,1063,52,1.10413125,0.034432571,13.49066 44 | 18200,364,1400,52,0.000217385,0.000244,0.004534 45 | 18200,364,1400,52,0.000222923,0.000279429,0.004854 46 | 19600,312,1400,52,0.000182929,0.000357833,0.004708 47 | 8680,468,789.0909091,52,0.289482727,0.000323111,3.187218 48 | 16852,364,1296.307692,52,0.262063769,0.492147429,6.851861 49 | 18180,364,1398.461538,52,0.000208615,0.000332,0.005036 50 | 18200,364,1400,52,0.000236462,0.000286571,0.00508 51 | 19600,312,1400,52,0.000176143,0.0003105,0.00508 52 | 5856,532,585.6,53.2,1.611882,1.0054566,26.173386 53 | 19600,312,1400,52,0.000301929,0.000199833,0.005426 54 | 16880,364,1298.461538,52,0.000218692,0.000189,0.004166 55 | 19600,312,1400,52,0.000199071,0.000360833,0.004952 56 | 18200,364,1400,52,0.000227769,0.000210714,0.004436 57 | 4824,492,438.5454545,54.66666667,0.670273636,0.606590111,12.832321 58 | 1660,496,150.9090909,55.11111111,3.670141182,0.769408111,47.296226 59 | 2333,735,179.4615385,105,3.873147769,1.051354,57.710399 60 | 1824,2396,182.4,239.6,2.4950565,4.5186617,70.137182 61 | 1182,641,118.2,64.1,5.0205738,3.4850297,85.056035 62 | 1080,0,54,0,0.18626495,0,3.725299 63 | 1120,0,56,0,1.34162985,0,26.832597 64 | 1308,112,72.66666667,56,2.736482667,0.1291545,49.514997 65 | 1630,1076,125.3846154,179.3333333,1.920484769,0.6198325,28.693083 66 | 605,565,60.5,56.5,5.0509084,5.5872811,106.381895 67 | 614,483,55.81818182,53.66666667,13.66482609,1.591593667,164.63743 68 | 826,299,55.06666667,59.8,4.0892242,3.7293468,79.985097 69 | 666,434,55.5,54.25,10.60122108,2.32946075,145.850339 70 | 631,597,57.36363636,66.33333333,10.59965909,3.117168556,144.650767 71 | 838,504,69.83333333,63,6.361316333,6.74210575,130.272642 72 | 752,494,62.66666667,61.75,6.73596875,2.417403375,100.170852 73 | 555,538,55.5,53.8,12.5173646,2.2065608,147.239254 74 | 666,431,55.5,53.875,12.51754408,3.129164375,175.243844 75 | 695,467,57.91666667,58.375,8.974165167,2.183049625,125.154379 76 | -------------------------------------------------------------------------------- /Dataset/Dataset 2/trojan/AndroidRAT_75.csv: -------------------------------------------------------------------------------- 1 | total_length_out,total_length_in,avg_packet_length_out,avg_packet_length_in,avg_duration_out,avg_duration_in,total_duration 2 | 2445,621,222.2727273,69,0.026909818,42.96524767,386.983237 3 | 11562,4157,1051.090909,461.8888889,0.012191091,13.37589489,120.517156 4 | 8841,513,803.7272727,57,0.000461727,10.606716,95.465523 5 | 15747,334,1124.785714,55.66666667,0.011220357,11.0531495,66.475982 6 | 15371,479,1280.916667,59.875,0.00067925,10.41668138,83.341602 7 | 18200,364,1400,52,0.000239385,0.000242714,0.004811 8 | 19600,312,1400,52,0.000203071,0.000347833,0.00493 9 | 18200,364,1400,52,0.000174462,0.000363571,0.004813 10 | 16800,364,1400,52,0.000247,0.000297,0.00509 11 | 17484,364,1344.923077,52,0.000331538,0.013282143,0.097285 12 | 16800,364,1400,52,0.000311583,0.000227571,0.005381 13 | 18200,364,1400,52,0.000334154,0.000243286,0.006047 14 | 16800,364,1400,52,0.0003445,0.000227714,0.006144 15 | 11589,544,1158.9,60.44444444,0.0007427,6.858406778,61.733088 16 | 1537,821,170.7777778,74.63636364,0.007143444,35.71313564,392.908783 17 | 961,697,96.1,58.08333333,6.6972939,10.61556383,130.030605 18 | 891,520,99,52,0.800173667,0.2063735,9.265298 19 | 990,520,99,52,0.8007184,0.2070737,10.077921 20 | 990,520,99,52,0.7997319,0.2080924,10.078243 21 | 990,520,99,52,0.8947421,0.2052349,10.99977 22 | 990,520,99,52,0.8059514,0.2112545,10.172059 23 | 990,520,99,52,0.7892205,0.2123557,10.015762 24 | 990,520,99,52,0.7935952,0.2110402,10.046354 25 | 6058,1038,550.7272727,115.3333333,0.240070545,13.79114133,126.970269 26 | 13724,364,1055.692308,52,0.049071385,0.000714286,0.642928 27 | 14378,312,1027,52,0.045750786,0.0007815,0.645798 28 | 12978,364,998.3076923,52,0.043492077,0.000599286,0.569592 29 | 13724,364,1055.692308,52,0.048558538,0.000698714,0.636152 30 | 14378,312,1027,52,0.046092143,0.000684167,0.649395 31 | 12978,364,998.3076923,52,0.043372385,0.000674857,0.568565 32 | 13724,364,1055.692308,52,0.049418538,0.000599143,0.646635 33 | 14378,312,1027,52,0.046040286,0.000637833,0.648391 34 | 12978,364,998.3076923,52,0.042415692,0.000593857,0.555561 35 | 14378,13724,1027,1055.692308,0.046843357,0.050381,0.659358 36 | 14378,312,1027,52,0.046099857,0.000636333,0.649216 37 | 12978,364,998.3076923,52,0.042718,0.000806429,0.560979 38 | 13724,364,1055.692308,52,0.049306769,0.000785143,0.646484 39 | 14378,312,1027,52,0.045566143,0.000666167,0.641923 40 | 12978,364,998.3076923,52,0.042864538,0.000580857,0.561305 41 | 13724,364,1055.692308,52,0.049315077,0.000611571,0.645377 42 | 14378,312,1027,52,0.046574286,0.000616333,0.655738 43 | 13724,364,1055.692308,52,0.048424231,0.000557714,0.633419 44 | 14378,312,1027,52,0.045951286,0.000695333,0.64749 45 | 12978,364,998.3076923,52,0.042386538,0.000599857,0.555224 46 | 13724,364,1055.692308,52,0.049694154,0.000769,0.651407 47 | 14378,312,1027,52,0.047501429,0.000589833,0.668559 48 | 12978,364,998.3076923,52,0.040882231,0.000599286,0.535664 49 | 13724,364,1055.692308,52,0.051130154,0.000607571,0.668945 50 | 14378,312,1027,52,0.046318286,0.000606333,0.652094 51 | 12978,364,998.3076923,52,0.042541769,0.00061,0.557313 52 | 13724,364,1055.692308,52,0.049003154,0.000707143,0.641126 53 | 14378,312,1027,52,0.0463765,0.000731667,0.653661 54 | 12978,364,998.3076923,52,0.041508769,0.00058,0.543674 55 | 13724,364,1055.692308,52,0.050220385,0.000607857,0.65712 56 | 14378,312,1027,52,0.046739071,0.000577167,0.65781 57 | 12978,364,998.3076923,52,0.041349846,0.000581,0.541615 58 | 13724,364,1055.692308,52,0.050166846,0.000685143,0.656965 59 | 14378,312,1027,52,0.046899929,0.0005995,0.660196 60 | 12978,364,998.3076923,52,0.041375231,0.000697571,0.542761 61 | 14378,364,1027,52,0.047188571,0.000697571,0.542761 62 | 14378,312,1027,52,0.044743786,0.000907,0.631855 63 | 12978,364,998.3076923,52,0.044147692,0.000621571,0.578271 64 | 13724,364,1055.692308,52,0.048606308,0.000622143,0.636237 65 | 14378,312,1027,52,0.046374429,0.000574667,0.65269 66 | 12978,364,998.3076923,52,0.042167692,0.000655,0.552765 67 | 12324,312,1027,52,0.0475185,0.000622,0.665411 68 | 14378,312,1027,52,0.045990143,0.000576667,0.647322 69 | 12978,364,998.3076923,52,0.042159846,0.000544,0.551886 70 | 13724,364,1055.692308,52,0.049094923,0.000575286,0.642261 71 | 14378,312,1027,52,0.047311357,0.000668833,0.666372 72 | 12978,364,998.3076923,52,0.042836154,0.00067,0.56156 73 | 13724,364,1055.692308,52,0.048844538,0.000763143,0.640321 74 | 14378,312,1027,52,0.046235929,0.000714833,0.651592 75 | 14378,364,1027,52,0.045908786,0.000657143,0.647323 76 | -------------------------------------------------------------------------------- /Dataset/Dataset 2/trojan/AndroidTester_75.csv: -------------------------------------------------------------------------------- 1 | total_length_out,total_length_in,avg_packet_length_out,avg_packet_length_in,avg_duration_out,avg_duration_in,total_duration 2 | 9231,1515,769.25,189.375,0.360779583,0.267285625,6.46764 3 | 684,624,76,56.72727273,0.210657,3.929349818,45.118761 4 | 3643,496,331.1818182,55.11111111,1.481594273,3.530327889,48.070488 5 | 944,577,94.4,57.7,2.2006779,2.0266149,42.272928 6 | 924,594,92.4,59.4,2.1771128,2.0055671,41.826799 7 | 808,594,80.8,59.4,1.3637388,3.4389643,48.027031 8 | 1061,625,106.1,62.5,1.6759973,1.4729539,31.489512 9 | 7009,582,637.1818182,64.66666667,0.932833273,1.599469667,24.656393 10 | 8510,589,773.6363636,65.44444444,1.053315727,1.801155667,27.796874 11 | 11274,582,867.2307692,83.14285714,0.071165231,0.480554143,4.289027 12 | 13291,482,1022.384615,68.85714286,0.072213308,0.126375429,1.823401 13 | 13291,389,1022.384615,55.57142857,0.224917923,0.439618286,6.001261 14 | 3591,531,359.1,59,2.2110927,1.377628444,34.773108 15 | 818,645,90.88888889,58.63636364,1.803340222,1.901697727,37.148737 16 | 1229,539,111.7272727,59.88888889,3.160890364,1.113644333,44.792593 17 | 1300,620,130,68.88888889,0.4842616,2.707477444,29.422324 18 | 772,606,77.2,60.6,2.4744696,2.112631,45.871006 19 | 880,650,88,65,2.5332566,0.4858034,30.1906 20 | 894,589,89.4,58.9,0.8219715,3.3576526,41.796241 21 | 6165,1710,685,155.4545455,1.907112333,0.478253455,22.424799 22 | 3666,489,333.2727273,54.33333333,1.918789273,0.499852111,25.605351 23 | 828,581,75.27272727,64.55555556,1.772894818,1.895952889,36.565419 24 | 669,686,74.33333333,62.36363636,2.773910778,1.761794364,44.344935 25 | 3928,437,327.3333333,54.625,0.98899925,2.479107375,31.70085 26 | 3643,496,331.1818182,55.11111111,2.491597182,2.937299111,53.843261 27 | 4924,605,492.4,60.5,1.0244591,0.8693194,18.937785 28 | 1003,577,91.18181818,64.11111111,0.750321455,2.673619222,32.316109 29 | 710,561,71,56.1,1.3741465,4.387906,57.620525 30 | 700,555,70,55.5,2.7107517,2.2565212,49.672729 31 | 4983,496,453,55.11111111,2.180034091,3.128993667,52.141318 32 | 2436,502,221.4545455,55.77777778,1.598198545,2.421099,39.370075 33 | 779,561,77.9,56.1,1.3426758,2.66267,40.053458 34 | 700,597,70,59.7,1.8277076,4.069385,58.970926 35 | 814,561,81.4,56.1,2.7985368,1.2618496,40.603864 36 | 700,555,70,55.5,2.5712553,3.9395091,65.107644 37 | 793,614,79.3,61.4,1.0604076,2.868772,39.291796 38 | 3617,1013,361.7,112.5555556,2.8431206,1.441117,41.401299 39 | 3222,589,402.75,58.9,0.859088375,1.2810868,19.794431 40 | 700,555,70,55.5,1.4860371,5.0811005,65.671376 41 | 666,561,74,56.1,2.166566111,2.0342376,40.617301 42 | 700,555,70,55.5,4.0626804,1.9279775,59.906579 43 | 711,567,71.1,56.7,1.9718562,2.7194815,46.913377 44 | 612,562,68,56.2,3.375359889,3.0589176,61.069897 45 | 3784,489,344,54.33333333,2.609163273,1.708903111,44.080924 46 | 928,593,103.1111111,53.90909091,2.150967778,0.776135364,27.896199 47 | 1109,482,100.8181818,53.55555556,2.315458636,0.371138889,28.810295 48 | 1226,482,111.4545455,53.55555556,3.919056091,0.629411,48.774316 49 | 6915,334,493.9285714,55.66666667,9.394775857,2.011904167,143.598287 50 | 720,632,80,57.45454545,1.580340222,3.948873,57.660665 51 | 898,502,81.63636364,55.77777778,0.863104364,4.157749667,46.913895 52 | 812,563,73.81818182,62.55555556,1.908398455,1.516605222,34.64183 53 | 9978,4259,1108.666667,387.1818182,0.021670667,0.041039364,0.646469 54 | 14057,1936,1277.909091,276.5714286,0.000369182,0.000180714,0.005436 55 | 16908,296,1207.714286,59.2,0.000263643,0.0000646,0.004015 56 | 16800,416,1400,52,0.00030475,0.00023425,0.005531 57 | 21000,260,1400,52,9.26667E-05,0.0001938,0.002359 58 | 16112,416,1342.666667,52,0.006949333,0.000110625,0.084277 59 | 19600,312,1400,52,9.07857E-05,0.0001665,0.00227 60 | 18200,364,1400,52,0.000145538,0.000165429,0.00305 61 | 14270,440,1189.166667,55,0.007531583,0.00011625,0.091309 62 | 21000,260,1400,52,0.000220933,0.0001248,0.003938 63 | 18200,364,1400,52,7.10769E-05,0.000096,0.001596 64 | 17038,364,1310.615385,52,0.007216077,0.000156429,0.094904 65 | 18200,364,1400,52,0.000112923,0.000143,0.002469 66 | 19600,312,1400,52,9.40714E-05,0.0001125,0.001992 67 | 18681,312,1334.357143,52,0.006832214,0.000259,0.097205 68 | 19600,312,1400,52,0.000526071,0.000170167,0.008386 69 | 22400,208,1400,52,0.000123938,0.002386,0.002386 70 | 17350,364,1334.615385,52,0.008385538,0.000151857,0.110075 71 | 19600,312,1400,52,0.000210714,0.000222833,0.004287 72 | 19600,312,1400,52,9.24286E-05,0.000155167,0.002225 73 | 16104,416,1342,52,0.000160417,0.000143,0.003069 74 | 23800,156,1400,52,0.007830941,0.000171,0.133639 75 | 19600,312,1400,52,0.0001255,0.000169667,0.002775 76 | 19202,312,1371.571429,52,0.000118643,0.000068,0.002069 77 | -------------------------------------------------------------------------------- /Dataset/Dataset 2/trojan/DroidJack_75.csv: -------------------------------------------------------------------------------- 1 | total_length_out,total_length_in,avg_packet_length_out,avg_packet_length_in,avg_duration_out,avg_duration_in,total_duration 2 | 604,423,60.4,42.3,0.6278833,0.8051764,14.330597 3 | 549,340,45.75,42.5,2.63028075,0.162460625,32.863054 4 | 521,412,47.36363636,45.77777778,1.568119091,0.415189222,20.986013 5 | 556,345,46.33333333,43.125,0.774183833,2.8205075,31.854266 6 | 447,432,44.7,43.2,0.9567922,2.3155305,32.723227 7 | 574,424,47.83333333,53,0.330387,1.794419125,18.319997 8 | 517,418,47,46.44444444,0.337598182,0.931010556,12.092675 9 | 592,435,53.81818182,48.33333333,0.845618909,0.917920667,17.563094 10 | 1923,916,160.25,114.5,2.152666333,0.320335875,28.394683 11 | 12674,4712,1267.4,471.2,0.0012323,0.0001614,0.013937 12 | 452,452,50.22222222,50.22222222,0.000397889,0.000397889,0.009505 13 | 12784,368,1065.333333,46,0.001068083,0.000366375,0.015748 14 | 464,420,46.4,42,0.4963755,2.6102024,31.065779 15 | 486,527,48.6,52.7,0.5307111,2.7305077,32.612188 16 | 549,340,45.75,42.5,1.37366525,2.851924125,39.299376 17 | 504,385,45.81818182,42.77777778,0.393327545,3.127368889,32.472923 18 | 560,307,46.66666667,43.85714286,1.347795917,1.161245143,29.955233 19 | 474,425,47.4,42.5,1.5565769,2.4944899,40.510668 20 | 589,317,45.30769231,45.28571429,1.861570308,2.313324429,40.393685 21 | 531,411,48.27272727,45.66666667,1.367365455,1.029569333,24.307144 22 | 469,385,46.9,42.77777778,0.6075137,2.957443667,32.692963 23 | 611,305,47,43.57142857,2.790732308,1.709250571,48.244274 24 | 447,425,44.7,42.5,0.4051304,2.8595129,32.646433 25 | 8672,370,788.3636364,46.25,0.795775273,0.17183025,10.128547 26 | 16904,280,1300.307692,40,0.002118846,0.000533571,0.03128 27 | 494,385,44.90909091,42.77777778,0.253200455,3.152892444,31.161237 28 | 509,385,46.27272727,42.77777778,2.030481818,2.012164778,40.444783 29 | 584,380,48.66666667,47.5,0.904711417,2.573834875,31.447216 30 | 18272,240,1305.142857,40,0.036395643,0.000689667,0.513677 31 | 7376,323,614.6666667,46.14285714,1.214627667,0.597650714,18.75963 32 | 12877,252,919.7857143,42,0.1103275,0.008587333,1.596109 33 | 12789,325,1065.75,40.625,0.012644917,0.815204125,6.673372 34 | 509,385,46.27272727,42.77777778,0.991480364,3.281982222,40.444124 35 | 514,392,46.72727273,43.55555556,2.268802273,1.730107,40.527788 36 | 373,418,46.625,46.44444444,1.023252625,1.597824333,32.499242 37 | 9973,4176,997.3,417.6,0.1210324,0.0072081,1.282405 38 | 11286,1568,1128.6,156.8,0.0108419,0.0004051,0.11247 39 | 12725,368,1060.416667,46,0.002174083,0.000471625,0.029862 40 | 4618,375,419.8181818,41.66666667,1.135488182,1.166562222,22.98943 41 | 474,425,47.4,42.5,0.6585679,3.4096893,40.682572 42 | 520,436,47.27272727,48.44444444,0.676106909,1.133404556,17.637817 43 | 469,425,46.9,42.5,1.8039247,2.2452726,40.491973 44 | 514,397,46.72727273,44.11111111,1.575110182,2.564276778,40.404703 45 | 4562,460,456.2,46,0.3547376,2.6750203,30.297579 46 | 16872,280,1297.846154,40,0.002590769,0.000374,0.036298 47 | 4638,375,421.6363636,41.66666667,0.471202545,1.977509889,22.980817 48 | 520,415,47.27272727,46.11111111,0.290143727,2.105494889,22.141035 49 | 407,566,45.22222222,51.45454545,0.883423667,2.125143636,31.327393 50 | 579,347,48.25,43.375,0.408606333,1.299145625,15.296441 51 | 561,347,46.75,43.375,0.705417,2.50517425,28.506398 52 | 529,424,48.09090909,47.11111111,0.349791909,2.01566,21.988651 53 | 442,415,44.2,41.5,0.3843969,1.953759,23.381559 54 | 10089,398,840.75,49.75,0.151534417,0.600073,6.618997 55 | 7344,330,612,41.25,0.305987917,1.43025625,15.113905 56 | 442,441,44.2,49,1.0502016,2.444317889,32.505294 57 | 582,342,48.5,42.75,0.896522917,1.227278375,20.576502 58 | 559,444,50.81818182,49.33333333,0.629939091,1.482922556,20.275633 59 | 16912,240,1208,40,0.010454286,0.000471167,0.149187 60 | 514,380,46.72727273,42.22222222,0.564592364,2.802134778,31.429729 61 | 1902,427,172.9090909,47.44444444,0.196882364,1.972188444,19.915402 62 | 15616,240,1115.428571,40,0.001663,0.000615167,0.026973 63 | 424,477,47.11111111,43.36363636,0.796911889,2.901721,39.091138 64 | 527,428,47.90909091,47.55555556,0.304932091,1.780674222,19.380321 65 | 16912,240,1208,40,0.029941214,0.000561333,0.422545 66 | 490,383,49,42.55555556,4.9714792,0.709038667,56.100564 67 | 1878,957,170.7272727,106.3333333,0.747293182,2.118064222,27.282803 68 | 12674,4712,1267.4,471.2,0.0010817,0.00014,0.012217 69 | 11326,444,1029.636364,49.33333333,0.001054455,0.000265,0.013984 70 | 4539,477,453.9,47.7,0.3307535,0.3713376,7.020911 71 | 492,392,44.72727273,43.55555556,1.325905091,2.026035778,32.819278 72 | 580,452,52.72727273,50.22222222,0.375975455,0.405636667,7.78646 73 | 527,433,47.90909091,48.11111111,0.127961273,1.465372556,14.595927 74 | 518,488,51.8,48.8,0.2271266,0.8433118,10.704384 75 | 513,421,46.63636364,46.77777778,0.687467818,0.529674,12.329212 76 | 549,340,45.75,42.5,1.547604417,2.61769375,39.512803 77 | -------------------------------------------------------------------------------- /Dataset/Dataset 2/trojan/HawkShaw_75.csv: -------------------------------------------------------------------------------- 1 | total_length_out,total_length_in,avg_packet_length_out,avg_packet_length_in,avg_duration_out,avg_duration_in,total_duration 2 | 1338,3769,148.6666667,418.7777778,0.015766,0.037854,0.596236 3 | 2781,1671,309,151.9090909,0.007215,0.021438273,0.300753 4 | 1567,1125,195.875,93.75,2.593616,0.034014333,21.157102 5 | 2065,602,206.5,66.88888889,0.019814,0.004838333,0.293994 6 | 2986,520,298.6,52,0.003246,0.0006183,0.038639 7 | 2798,1986,559.6,132.4,0.015186,0.005378867,0.156615 8 | 2853,1342,407.5714286,111.8333333,0.004111,0.002159417,0.119335 9 | 1796,974,199.5555556,88.54545455,0.021278,0.015513364,0.362146 10 | 2036,1041,226.2222222,94.63636364,0.014209,0.009590273,0.233374 11 | 1184,1093,148,91.08333333,0.077522,0.014618083,0.795596 12 | 19600,312,1400,52,0.000438,0.000223833,0.007481 13 | 19600,312,1400,52,0.000113,0.000211333,0.002849 14 | 16800,416,1400,52,0.000271,0.000250625,0.00526 15 | 21000,260,1400,52,0.000122,0.00231,0.00231 16 | 18200,364,1400,52,0.000267,0.000229,0.005072 17 | 19600,312,1400,52,0.000158,0.0001345,0.003025 18 | 16800,416,1400,52,0.000124,0.000134625,0.00257 19 | 19600,312,1400,52,0.000100,0.00012,0.002126 20 | 18200,312,1400,52,0.000178,0.000162,0.003368 21 | 18200,364,1400,52,0.000125,0.000211714,0.003111 22 | 21000,260,1400,52,0.000121,0.0001416,0.002521 23 | 18200,364,1400,52,0.000091,0.000147571,0.002211 24 | 18200,364,1400,52,0.000129,0.00018,0.002932 25 | 19600,312,1400,52,0.000093,0.000139667,0.002144 26 | 18200,364,1400,52,0.000139,0.000101857,0.002517 27 | 16800,364,1400,52,0.000418,0.000220571,0.006589 28 | 18200,364,1400,52,0.000816,0.000856714,0.016609 29 | 21000,260,1400,52,0.000056,0.0001496,0.001587 30 | 5374,654,597.1111111,65.4,0.015354,0.023884,0.793215 31 | 1902,1617,237.75,134.75,0.161324,0.028680083,1.634756 32 | 2088,922,208.8,92.2,0.502505,0.0485692,5.510743 33 | 1747,855,174.7,85.5,1.811875,0.0450097,18.568849 34 | 1345,855,134.5,85.5,0.750307,0.0464091,7.967163 35 | 1025,653,102.5,65.3,25.919710,12.0291471,379.488571 36 | 2419,4186,268.7777778,380.5454545,0.041541,0.042222909,0.838322 37 | 6860,1318,762.2222222,119.8181818,0.019397,0.020134545,0.396052 38 | 989,2616,164.8333333,186.8571429,0.080633,0.010938429,0.636936 39 | 1215,860,121.5,86,10.065422,0.0469063,101.12328 40 | 1589,860,158.9,86,0.071415,0.0576619,1.290768 41 | 1614,860,161.4,86,0.048999,0.0576762,1.066747 42 | 1639,860,163.9,86,0.064626,0.0340103,0.986367 43 | 1895,792,189.5,79.2,0.032678,0.0463268,0.790049 44 | 1458,928,145.8,92.8,0.047416,0.055569,1.029849 45 | 1714,860,171.4,86,0.187834,0.0577943,2.456282 46 | 1252,822,139.1111111,74.72727273,6.841962,0.041854,62.038049 47 | 1217,3757,110.6363636,417.4444444,3.064854,0.039054667,34.064883 48 | 2298,1820,255.3333333,165.4545455,7.590469,0.043089182,68.788203 49 | 1028,3864,114.2222222,351.2727273,6.086610,0.035679909,55.171972 50 | 2995,1863,332.7777778,169.3636364,0.015072,0.042623455,0.604502 51 | 1875,792,187.5,79.2,3.570179,0.02908,35.992589 52 | 1795,928,179.5,92.8,0.285265,0.0378509,3.231163 53 | 2057,1963,228.5555556,196.3,4.314500,0.0471139,39.314757 54 | 1964,2271,196.4,227.1,0.181284,0.0235935,2.048776 55 | 2154,980,239.3333333,89.09090909,12.021741,0.085423909,109.135335 56 | 2270,975,227,97.5,0.131224,1.55829,1.55829 57 | 2040,860,204,86,3.346793,0.0350576,33.81851 58 | 2648,740,240.7272727,82.22222222,1.347818,0.012898333,14.942087 59 | 1452,996,145.2,99.6,0.013731,0.0122205,0.259513 60 | 2295,1392,459,92.8,0.012347,0.015879267,0.299922 61 | 2090,844,232.2222222,76.72727273,0.013231,0.011104909,0.241232 62 | 1423,1408,284.6,100.5714286,0.127665,0.025620286,0.998865 63 | 1641,860,164.1,86,4.060108,0.0475922,41.076997 64 | 1908,912,212,82.90909091,13.096996,0.032658091,118.232202 65 | 2175,1220,310.7142857,93.84615385,0.027279,0.018778077,0.435065 66 | 1977,1152,282.4285714,88.61538462,0.029776,0.018739846,0.452048 67 | 1963,1084,280.4285714,83.38461538,0.017568,0.017978615,0.356699 68 | 1567,1100,195.875,91.66666667,0.073538,0.030069667,0.949139 69 | 1625,860,162.5,86,5.902242,0.0590767,59.613187 70 | 1937,860,193.7,86,6.068196,0.0355086,61.037045 71 | 2227,860,222.7,86,0.206149,0.0237078,2.298564 72 | 2028,860,202.8,86,12.344468,0.0688068,124.132743 73 | 3011,724,301.1,72.4,0.026888,0.0038576,0.307453 74 | 932,1184,103.5555556,107.6363636,0.006000,0.004229091,0.100518 75 | 4098,1504,455.3333333,136.7272727,0.026160,0.011338636,0.360161 76 | 1047,1340,174.5,95.71428571,0.015656,0.008912643,0.218713 77 | -------------------------------------------------------------------------------- /Dataset/Dataset 2/trojan/SpyMax_75.csv: -------------------------------------------------------------------------------- 1 | total_length_out,total_length_in,avg_packet_length_out,avg_packet_length_in,avg_duration_out,avg_duration_in,total_duration 2 | 826,10152,82.6,1015.2,0.12666,0.0217003,1.483603 3 | 468,14104,52,1282.181818,0.000463222,0.001868182,0.024719 4 | 520,13172,52,1317.2,0.0002183,0.0011614,0.013797 5 | 520,13092,52,1309.2,0.0003639,0.0013296,0.016935 6 | 624,9757,52,1219.625,0.003626333,0.00346975,0.071274 7 | 5787,1420,526.0909091,157.7777778,1.000515909,3.711587222,44.40996 8 | 1014,996,101.4,99.6,0.2021121,8.6185294,88.206415 9 | 1228,788,102.3333333,98.5,0.443867583,10.818956,91.878059 10 | 996,974,90.54545455,108.2222222,0.287269091,9.652376,90.031344 11 | 857,1075,85.7,107.5,0.1532421,7.9874633,81.407054 12 | 1263,772,105.25,96.5,0.234897667,6.35791525,53.682094 13 | 990,996,99,99.6,0.1322238,8.7539815,88.862053 14 | 1260,788,105,98.5,0.279665583,10.98897188,91.267762 15 | 988,976,98.8,97.6,0.1109985,7.0384344,71.494329 16 | 1008,902,91.63636364,100.2222222,0.169979364,8.465874667,78.062645 17 | 996,979,90.54545455,108.7777778,0.158102455,9.791487444,89.862514 18 | 1254,882,114,98,0.163565636,8.254633556,76.090924 19 | 1011,1142,101.1,114.2,0.0204721,8.8920615,89.125336 20 | 1275,897,115.9090909,99.66666667,0.130686909,8.191028222,75.15681 21 | 1064,6139,96.72727273,682.1111111,0.080817,3.018422667,28.054791 22 | 608,8715,55.27272727,968.3333333,0.000739455,0.000432222,0.012024 23 | 416,12915,52,1076.25,0.00079075,0.003094167,0.043456 24 | 312,15715,52,1122.5,0.001864167,0.003644071,0.062202 25 | 312,14420,52,1030,0.001584667,0.002784143,0.048486 26 | 364,14315,52,1101.153846,0.001120571,0.004044077,0.060417 27 | 312,15715,52,1122.5,0.001177667,0.002842357,0.046859 28 | 364,13020,52,1001.538462,0.001901286,0.003154385,0.054316 29 | 312,15715,52,1122.5,0.002144,0.003084786,0.056051 30 | 364,14315,52,1101.153846,0.002700429,0.002597538,0.052671 31 | 364,13020,52,1001.538462,0.002321143,0.002335846,0.046614 32 | 312,15715,52,1122.5,0.002924167,0.002739786,0.055902 33 | 364,14315,52,1101.153846,0.002100714,0.002587923,0.048348 34 | 364,13020,52,1001.538462,0.001599429,0.002908231,0.049003 35 | 312,15715,52,1122.5,0.001403833,0.003628214,0.059218 36 | 364,14315,52,1101.153846,0.001497,0.002933231,0.048611 37 | 364,13020,52,1001.538462,0.001539571,0.002925769,0.048812 38 | 15715,15715,1122.5,1122.5,0.003536429,0.003536429,0.058592 39 | 766,4237,85.11111111,385.1818182,0.888631556,2.110949182,31.218125 40 | 1228,788,102.3333333,98.5,0.120679583,11.14400463,90.600192 41 | 1062,1011,106.2,101.1,0.0467927,8.9275389,89.743316 42 | 1228,788,102.3333333,98.5,0.055113833,11.206878,90.31639 43 | 1294,897,117.6363636,99.66666667,0.824873636,8.995013222,90.028729 44 | 1080,912,90,114,2.490228167,105.248529,105.248529 45 | 1048,850,87.33333333,106.25,2.4790745,9.433771,105.219062 46 | 1294,897,117.6363636,99.66666667,2.630769636,5.135479556,75.157782 47 | 996,902,90.54545455,100.2222222,2.650070455,8.452001222,105.218786 48 | 11372,4308,1033.818182,478.6666667,0.008387182,0.360653111,3.338137 49 | 16859,1884,1296.846154,314,0.000267308,0.0000985,0.004166 50 | 6417,654,583.3636364,72.66666667,1.009899273,1.734581556,26.720126 51 | 1305,976,118.6363636,108.4444444,2.616980909,5.096328444,74.653746 52 | 732,1266,73.2,126.6,1.288615,6.6868396,79.754546 53 | 1305,840,118.6363636,93.33333333,2.181283455,6.901020778,86.103305 54 | 1228,788,102.3333333,98.5,3.49053675,7.771528125,104.058666 55 | 1028,840,93.45454545,93.33333333,2.512567818,6.959213111,90.271164 56 | 1040,973,104,108.1111111,2.7263012,6.958272222,89.888707 57 | 1028,840,93.45454545,93.33333333,2.485776455,7.005427556,90.392389 58 | 996,984,90.54545455,109.3333333,2.466362818,6.989017,90.031144 59 | 996,976,90.54545455,108.4444444,2.464541,6.989515778,90.015593 60 | 1277,959,116.0909091,106.5555556,2.417429818,7.027518,89.83939 61 | 1028,840,93.45454545,93.33333333,90.238448,7.115695778,90.238448 62 | 996,976,90.54545455,108.4444444,2.353422545,7.127103,90.031575 63 | 1040,1145,104,114.5,1.2887131,6.4206516,77.093647 64 | 1228,788,102.3333333,98.5,3.137792917,8.140542125,102.777852 65 | 1028,840,93.45454545,93.33333333,2.272796727,7.253093,90.278601 66 | 996,902,90.54545455,100.2222222,2.270013636,7.486191444,92.345873 67 | 950,6216,86.36363636,690.6666667,0.024406,3.273078778,29.726175 68 | 416,12915,52,1076.25,0.00074825,0.000337583,0.010037 69 | 312,15715,52,1122.5,0.001034333,0.000356571,0.011198 70 | 312,14420,52,1030,0.000980667,0.000331286,0.010522 71 | 416,12862,52,1071.833333,0.001092875,0.00054225,0.013465 72 | 312,15715,52,1122.5,0.001011333,0.000387714,0.011496 73 | 312,14420,52,1030,0.000966,0.000308,0.010108 74 | 416,12915,52,1076.25,0.001479875,0.000473,0.017515 75 | 312,15715,52,1122.5,0.001565667,0.000498786,0.016377 76 | 364,14315,52,1101.153846,0.001513,0.000451615,0.016462 77 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Android-RAT-Dataset 2 |

This repo contains all dataset for my research/analysis about :
3 | "DETEKSI REMOTE ACCESS TROJAN PADA ANDROID BERBASIS PENGAMATAN LALU LINTAS JARINGAN
MENGGUNAKAN MACHINE LEARNING."

4 | 5 | # Apps Name 6 | | Name | Label | 7 | | ---------- | ---------- | 8 | | AhMyth | Trojan | 9 | | AndroidRAT | Trojan | 10 | | AndroidTester | Trojan | 11 | | DroidJack | Trojan | 12 | | HawkShaw | Trojan | 13 | | SpyMax | Trojan | 14 | | Google | Benign | 15 | | Facebook | Benign | 16 | | Twitter | Benign | 17 | 18 | # DATA SET 1 FEATURES 19 | | Name | Desc | 20 | | ----------- | ------------ | 21 | | Source | The port number of source packet | 22 | | Destination | The port number of destination packet | 23 | | Protocol | The type protocol used in packet | 24 | | Length | The size of data generated by the packet (Byte) | 25 | | Type | Type of packet - In or Out | 26 | | Duration | The duration between previous package to the next packet (Second) | 27 | 28 | # DATA SET 2 FEATURES 29 | | Name | Desc | 30 | | ------------- | ------------ | 31 | | Total Length Out | The amount of data size generated by packet out of 20 mixed packet (Byte) | 32 | | Total Length In | The amount of data size generated by packet in of 20 mixed packet (Byte) | 33 | | Avg Packet Length Out | The average data size generated by packets out of 20 mixed packets (Byte) | 34 | | Avg Packet Length In | The average data size generated by incoming packets of 20 mixed packets (Byte) | 35 | | Avg Duration Out | The average duration required by packets out of 20 mixed packets (Second) | 36 | | Avg Duration In | The average duration required by incoming packets from 20 mixed packets (Second) | 37 | | Total Duration | The total duration required by 20 mixed packets (Second) | 38 | 39 | # Dataset 40 | | | Dataset 1 | Dataset 2 | 41 | | ----- | ---- | ---- | 42 | | Training | 14400 | 5400 | 43 | | Testing | 3600 | 600 | 44 | | Total | 18000 | 6000 | 45 | 46 | *note : raw/original dataset contains more row perpcap rat 47 | 48 | # Accuracy and Machine Learning Algorithm Used 49 | | Dataset 1 | Decision Tree | Random Forest | Naive Bayes | 50 | | ------------ | ---------- | ---------- | ---------- | 51 | | N900 - RAT90 | 0.9989 | 0.9990 | 0.1181 | 52 | | N1800 - RAT90 | 0.9994 | 0.9989 | 0.0756 | 53 | | N1800 - RAT1800 | 0.9997 | 0.9997 | 0.4691 | 54 | 55 | | Dataset 2 | Decision Tree | Random Forest | Naive Bayes | 56 | | ------------ | ---------- | ---------- | ---------- | 57 | | N150 - RAT10 | 0.873 | 0.933 | 0.513 | 58 | | N300 - RAT10 | 0.883 | 0.929 | 0.545 | 59 | | N300 - RAT300 | 0.90 | 0.928 | 0.71 | 60 | 61 | *note : accuracy based on jupyter python result 62 | 63 | # Source Original Dataset 64 | Name : Android Mischief Dataset v1
65 | Author : Kamila Babayeva, Stratosphere Laboratory
66 | Date : November 18th 2020
67 | URL : https://www.stratosphereips.org/android-mischief-dataset 68 | 69 | # Looking for Android Remote Access Trojan ? 70 | URL : https://github.com/wishihab/Android-RATList 71 | -------------------------------------------------------------------------------- /Remote Access Trojan Detection On Android Based On Network Traffic Observation Using Machine Learning.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wishihab/Android-RAT-Dataset/fa4b9f7c94e19cd2c47e7985e47aa08b1530bed7/Remote Access Trojan Detection On Android Based On Network Traffic Observation Using Machine Learning.pdf -------------------------------------------------------------------------------- /Testing Scratch/Dataset 1/Decision Tree/Decision Tree Scratch.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "from sklearn import tree" 10 | ] 11 | }, 12 | { 13 | "cell_type": "code", 14 | "execution_count": 2, 15 | "metadata": {}, 16 | "outputs": [], 17 | "source": [ 18 | "clf = tree.DecisionTreeClassifier();" 19 | ] 20 | }, 21 | { 22 | "cell_type": "code", 23 | "execution_count": 15, 24 | "metadata": {}, 25 | "outputs": [], 26 | "source": [ 27 | "# Gmail 3 Facebook 3 Twitter 3 \n", 28 | "# RAT 3 Tester 3 Droidjack 3\n", 29 | "# TLSv.1.3 = 3\n", 30 | "# TLSv.1.2 = 2\n", 31 | "# tcp = 1\n", 32 | "# udp = 0\n", 33 | "\n", 34 | "# out 0\n", 35 | "# in 1\n", 36 | "\n", 37 | "# Length, Data, Protocol, Duration, Type, Source, Destination\n", 38 | "X = [\n", 39 | " ['591','0','3','0.00153','0','54534','443'],\n", 40 | " ['52','0','1','0.001147','0','54534','443'],\n", 41 | " ['60','0','1','0','0','54534','443'],\n", 42 | " ['60','0','1','0','0','33952','443'],\n", 43 | " ['60','0','1','0.000755','1','443','33952'],\n", 44 | " ['452','0','3','0.00153','0','33952','443'],\n", 45 | " ['60','0','1','0.007413','1','443','56228'],\n", 46 | " ['52','0','1','0.001804','0','56228','443'],\n", 47 | " ['569','0','2','0.012655','0','56228','443'],\n", 48 | " ['60','0','1','0','0','37451','1337'],\n", 49 | " ['60','0','1','0.022053','1','1337','37451'],\n", 50 | " ['52','0','1','0.006701','0','37451','1337'],\n", 51 | " ['59','7','1','0.281862','1','1337','37451'],\n", 52 | " ['52','0','1','0.107486','0','37451','1337'],\n", 53 | " ['59','7','1','11.910185','1','1337','37451'],\n", 54 | " ['40','0','1','0.041368','0','41893','1337'],\n", 55 | " ['45','5','1','1.422961','0','41893','1337'],\n", 56 | " ['45','5','1','0.270677','1','1337','41893']\n", 57 | " \n", 58 | "]" 59 | ] 60 | }, 61 | { 62 | "cell_type": "code", 63 | "execution_count": 16, 64 | "metadata": {}, 65 | "outputs": [ 66 | { 67 | "data": { 68 | "text/plain": [ 69 | "[['591', '0', '3', '0.00153', '0', '54534', '443'],\n", 70 | " ['52', '0', '1', '0.001147', '0', '54534', '443'],\n", 71 | " ['60', '0', '1', '0', '0', '54534', '443'],\n", 72 | " ['60', '0', '1', '0', '0', '33952', '443'],\n", 73 | " ['60', '0', '1', '0.000755', '1', '443', '33952'],\n", 74 | " ['452', '0', '3', '0.00153', '0', '33952', '443'],\n", 75 | " ['60', '0', '1', '0.007413', '1', '443', '56228'],\n", 76 | " ['52', '0', '1', '0.001804', '0', '56228', '443'],\n", 77 | " ['569', '0', '2', '0.012655', '0', '56228', '443'],\n", 78 | " ['60', '0', '1', '0', '0', '37451', '1337'],\n", 79 | " ['60', '0', '1', '0.022053', '1', '1337', '37451'],\n", 80 | " ['52', '0', '1', '0.006701', '0', '37451', '1337'],\n", 81 | " ['59', '7', '1', '0.281862', '1', '1337', '37451'],\n", 82 | " ['52', '0', '1', '0.107486', '0', '37451', '1337'],\n", 83 | " ['59', '7', '1', '11.910185', '1', '1337', '37451'],\n", 84 | " ['40', '0', '1', '0.041368', '0', '41893', '1337'],\n", 85 | " ['45', '5', '1', '1.422961', '0', '41893', '1337'],\n", 86 | " ['45', '5', '1', '0.270677', '1', '1337', '41893']]" 87 | ] 88 | }, 89 | "execution_count": 16, 90 | "metadata": {}, 91 | "output_type": "execute_result" 92 | } 93 | ], 94 | "source": [ 95 | "X" 96 | ] 97 | }, 98 | { 99 | "cell_type": "code", 100 | "execution_count": 17, 101 | "metadata": {}, 102 | "outputs": [], 103 | "source": [ 104 | "Y = [\n", 105 | " ['Benign'],\n", 106 | " ['Benign'],\n", 107 | " ['Benign'],\n", 108 | " ['Benign'],\n", 109 | " ['Benign'],\n", 110 | " ['Benign'],\n", 111 | " ['Benign'],\n", 112 | " ['Benign'],\n", 113 | " ['Benign'],\n", 114 | " ['Trojan'],\n", 115 | " ['Trojan'],\n", 116 | " ['Trojan'],\n", 117 | " ['Trojan'],\n", 118 | " ['Trojan'],\n", 119 | " ['Trojan'],\n", 120 | " ['Trojan'],\n", 121 | " ['Trojan'],\n", 122 | " ['Trojan']\n", 123 | "]" 124 | ] 125 | }, 126 | { 127 | "cell_type": "code", 128 | "execution_count": 13, 129 | "metadata": {}, 130 | "outputs": [ 131 | { 132 | "data": { 133 | "text/plain": [ 134 | "[['Benign'],\n", 135 | " ['Benign'],\n", 136 | " ['Benign'],\n", 137 | " ['Benign'],\n", 138 | " ['Benign'],\n", 139 | " ['Benign'],\n", 140 | " ['Benign'],\n", 141 | " ['Benign'],\n", 142 | " ['Benign'],\n", 143 | " ['Trojan'],\n", 144 | " ['Trojan'],\n", 145 | " ['Trojan'],\n", 146 | " ['Trojan'],\n", 147 | " ['Trojan'],\n", 148 | " ['Trojan'],\n", 149 | " ['Trojan'],\n", 150 | " ['Trojan'],\n", 151 | " ['Trojan']]" 152 | ] 153 | }, 154 | "execution_count": 13, 155 | "metadata": {}, 156 | "output_type": "execute_result" 157 | } 158 | ], 159 | "source": [ 160 | "Y" 161 | ] 162 | }, 163 | { 164 | "cell_type": "code", 165 | "execution_count": 18, 166 | "metadata": {}, 167 | "outputs": [], 168 | "source": [ 169 | "clf = clf.fit(X,Y)" 170 | ] 171 | }, 172 | { 173 | "cell_type": "code", 174 | "execution_count": 19, 175 | "metadata": {}, 176 | "outputs": [ 177 | { 178 | "name": "stdout", 179 | "output_type": "stream", 180 | "text": [ 181 | "['Benign']\n" 182 | ] 183 | } 184 | ], 185 | "source": [ 186 | "prediction = clf.predict([['52','0','1','0.007339','1','443','56228']])\n", 187 | "print(prediction)" 188 | ] 189 | }, 190 | { 191 | "cell_type": "code", 192 | "execution_count": null, 193 | "metadata": {}, 194 | "outputs": [], 195 | "source": [] 196 | } 197 | ], 198 | "metadata": { 199 | "kernelspec": { 200 | "display_name": "Python 3", 201 | "language": "python", 202 | "name": "python3" 203 | }, 204 | "language_info": { 205 | "codemirror_mode": { 206 | "name": "ipython", 207 | "version": 3 208 | }, 209 | "file_extension": ".py", 210 | "mimetype": "text/x-python", 211 | "name": "python", 212 | "nbconvert_exporter": "python", 213 | "pygments_lexer": "ipython3", 214 | "version": "3.8.5" 215 | } 216 | }, 217 | "nbformat": 4, 218 | "nbformat_minor": 4 219 | } 220 | -------------------------------------------------------------------------------- /Testing Scratch/Dataset 1/Decision Tree/Decision Trees - Model 1 - 2021.rmp: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | 64 | 65 | 66 | 67 | 68 | 69 | 70 | 71 | 72 | 73 | 74 | 75 | 76 | 77 | 78 | 79 | 80 | 81 | 82 | 83 | 84 | 85 | 86 | 87 | 88 | 89 | 90 | 91 | -------------------------------------------------------------------------------- /Testing Scratch/Dataset 1/Decision Tree/Images/Decision Tree - RapidMiner.PNG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wishihab/Android-RAT-Dataset/fa4b9f7c94e19cd2c47e7985e47aa08b1530bed7/Testing Scratch/Dataset 1/Decision Tree/Images/Decision Tree - RapidMiner.PNG -------------------------------------------------------------------------------- /Testing Scratch/Dataset 1/Decision Tree/Images/Decision Tree Result - Python (Jupyter) vs RapidMiner.PNG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wishihab/Android-RAT-Dataset/fa4b9f7c94e19cd2c47e7985e47aa08b1530bed7/Testing Scratch/Dataset 1/Decision Tree/Images/Decision Tree Result - Python (Jupyter) vs RapidMiner.PNG -------------------------------------------------------------------------------- /Testing Scratch/Dataset 1/Decision Tree/Images/imagename.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wishihab/Android-RAT-Dataset/fa4b9f7c94e19cd2c47e7985e47aa08b1530bed7/Testing Scratch/Dataset 1/Decision Tree/Images/imagename.png -------------------------------------------------------------------------------- /Testing Scratch/Dataset 1/Decision Tree/Images/imagenamev02.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wishihab/Android-RAT-Dataset/fa4b9f7c94e19cd2c47e7985e47aa08b1530bed7/Testing Scratch/Dataset 1/Decision Tree/Images/imagenamev02.png -------------------------------------------------------------------------------- /Testing Scratch/Dataset 1/Decision Tree/Images/imagenamev03.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wishihab/Android-RAT-Dataset/fa4b9f7c94e19cd2c47e7985e47aa08b1530bed7/Testing Scratch/Dataset 1/Decision Tree/Images/imagenamev03.png -------------------------------------------------------------------------------- /Testing Scratch/Dataset 1/Naive Bayes/Images/Naive Bayes - Rapid Miner.PNG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wishihab/Android-RAT-Dataset/fa4b9f7c94e19cd2c47e7985e47aa08b1530bed7/Testing Scratch/Dataset 1/Naive Bayes/Images/Naive Bayes - Rapid Miner.PNG -------------------------------------------------------------------------------- /Testing Scratch/Dataset 1/Naive Bayes/Images/Naive Bayes Result - Python (Jupyter) vs RapidMiner.PNG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wishihab/Android-RAT-Dataset/fa4b9f7c94e19cd2c47e7985e47aa08b1530bed7/Testing Scratch/Dataset 1/Naive Bayes/Images/Naive Bayes Result - Python (Jupyter) vs RapidMiner.PNG -------------------------------------------------------------------------------- /Testing Scratch/Dataset 1/Naive Bayes/Naive Bayes - Model 1 - 2021.rmp: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | 64 | 65 | 66 | 67 | 68 | 69 | 70 | 71 | 72 | 73 | 74 | 75 | 76 | 77 | 78 | 79 | 80 | 81 | 82 | 83 | -------------------------------------------------------------------------------- /Testing Scratch/Dataset 1/Naive Bayes/NaiveBayesClassifier with sklearn - Dataset 1 - testingv01.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "import pandas as pd\n", 10 | "import numpy as np\n", 11 | "\n", 12 | "from sklearn.naive_bayes import GaussianNB" 13 | ] 14 | }, 15 | { 16 | "cell_type": "code", 17 | "execution_count": 2, 18 | "metadata": {}, 19 | "outputs": [], 20 | "source": [ 21 | "def read_file(filename):\n", 22 | " df = pd.read_csv(filename)\n", 23 | " print(df.shape)\n", 24 | " df['Label'] = df['Label'].apply(lambda x: x.strip().lower())\n", 25 | " return df" 26 | ] 27 | }, 28 | { 29 | "cell_type": "code", 30 | "execution_count": 3, 31 | "metadata": {}, 32 | "outputs": [ 33 | { 34 | "name": "stdout", 35 | "output_type": "stream", 36 | "text": [ 37 | "(14400, 7)\n", 38 | "(990, 7)\n" 39 | ] 40 | } 41 | ], 42 | "source": [ 43 | "FILE_HO_TRAIN = 'training.csv'\n", 44 | "FILE_HO_TEST = 'testingv01.csv'\n", 45 | "\n", 46 | "df_ho_train = read_file(FILE_HO_TRAIN)\n", 47 | "df_ho_test = read_file(FILE_HO_TEST)" 48 | ] 49 | }, 50 | { 51 | "cell_type": "code", 52 | "execution_count": 4, 53 | "metadata": {}, 54 | "outputs": [], 55 | "source": [ 56 | "from sklearn.preprocessing import LabelEncoder\n", 57 | "\n", 58 | "target_encoder = LabelEncoder()\n", 59 | "df_ho_train['Target'] = target_encoder.fit_transform(df_ho_train['Label'])\n", 60 | "df_ho_test['Target'] = target_encoder.transform(df_ho_test['Label'])\n", 61 | "\n", 62 | "x_ho_train = df_ho_train.drop(['Label', 'Target'], axis=1)\n", 63 | "y_ho_train = df_ho_train['Target']\n", 64 | "\n", 65 | "x_ho_test = df_ho_test.drop(['Label', 'Target'], axis=1)\n", 66 | "y_ho_test = df_ho_test['Target']" 67 | ] 68 | }, 69 | { 70 | "cell_type": "code", 71 | "execution_count": 5, 72 | "metadata": {}, 73 | "outputs": [ 74 | { 75 | "data": { 76 | "text/plain": [ 77 | "0.11818181818181818" 78 | ] 79 | }, 80 | "execution_count": 5, 81 | "metadata": {}, 82 | "output_type": "execute_result" 83 | } 84 | ], 85 | "source": [ 86 | "# Mengaktifkan/memanggil/membuat fungsi klasifikasi Naive Bayes\n", 87 | "modelnb = GaussianNB()\n", 88 | "\n", 89 | "# Memasukkan data training pada fungsi klasifikasi Naive Bayes\n", 90 | "nbtrain = modelnb.fit(x_ho_train, y_ho_train)\n", 91 | "modelnb.score(x_ho_test,y_ho_test)" 92 | ] 93 | }, 94 | { 95 | "cell_type": "code", 96 | "execution_count": 6, 97 | "metadata": {}, 98 | "outputs": [ 99 | { 100 | "name": "stdout", 101 | "output_type": "stream", 102 | "text": [ 103 | "Akurasi pada Algoritma RandomForest menggunakan split dengan persentase 90:10 = 11.818181818181818\n" 104 | ] 105 | } 106 | ], 107 | "source": [ 108 | "from sklearn.metrics import accuracy_score, f1_score\n", 109 | "\n", 110 | "predicted= modelnb.predict(x_ho_test)\n", 111 | "\n", 112 | "accuracy_ho = accuracy_score(predicted, y_ho_test) * 100\n", 113 | "print('Akurasi pada Algoritma RandomForest menggunakan split dengan persentase 90:10 = %s' % accuracy_ho)" 114 | ] 115 | }, 116 | { 117 | "cell_type": "code", 118 | "execution_count": 7, 119 | "metadata": {}, 120 | "outputs": [ 121 | { 122 | "data": { 123 | "image/png": "\n", 124 | "text/plain": [ 125 | "
" 126 | ] 127 | }, 128 | "metadata": { 129 | "needs_background": "light" 130 | }, 131 | "output_type": "display_data" 132 | } 133 | ], 134 | "source": [ 135 | "# Create Confusion Matrix\n", 136 | "\n", 137 | "import seaborn as sns\n", 138 | "import matplotlib.pyplot as plt\n", 139 | "\n", 140 | "from sklearn.metrics import confusion_matrix\n", 141 | "confusion_matrix(y_ho_test, predicted) \n", 142 | "\n", 143 | "f, ax = plt.subplots(figsize=(8,5))\n", 144 | "sns.heatmap(confusion_matrix(y_ho_test, predicted), annot=True, fmt=\".0f\", ax=ax)\n", 145 | "plt.xlabel(\"Predicted Class\")\n", 146 | "plt.ylabel(\"Actual Class\")\n", 147 | "plt.show()" 148 | ] 149 | }, 150 | { 151 | "cell_type": "code", 152 | "execution_count": 8, 153 | "metadata": {}, 154 | "outputs": [ 155 | { 156 | "name": "stdout", 157 | "output_type": "stream", 158 | "text": [ 159 | " precision recall f1-score support\n", 160 | "\n", 161 | " 0 0.91 0.03 0.06 900\n", 162 | " 1 0.09 0.97 0.17 90\n", 163 | "\n", 164 | " accuracy 0.12 990\n", 165 | " macro avg 0.50 0.50 0.12 990\n", 166 | "weighted avg 0.83 0.12 0.07 990\n", 167 | "\n" 168 | ] 169 | } 170 | ], 171 | "source": [ 172 | "# Performance Matrix Report - Precision Recall f1score\n", 173 | "\n", 174 | "from sklearn.metrics import classification_report\n", 175 | "print (classification_report(y_ho_test, predicted))" 176 | ] 177 | }, 178 | { 179 | "cell_type": "code", 180 | "execution_count": null, 181 | "metadata": {}, 182 | "outputs": [], 183 | "source": [] 184 | } 185 | ], 186 | "metadata": { 187 | "kernelspec": { 188 | "display_name": "Python 3", 189 | "language": "python", 190 | "name": "python3" 191 | }, 192 | "language_info": { 193 | "codemirror_mode": { 194 | "name": "ipython", 195 | "version": 3 196 | }, 197 | "file_extension": ".py", 198 | "mimetype": "text/x-python", 199 | "name": "python", 200 | "nbconvert_exporter": "python", 201 | "pygments_lexer": "ipython3", 202 | "version": "3.8.5" 203 | } 204 | }, 205 | "nbformat": 4, 206 | "nbformat_minor": 4 207 | } 208 | -------------------------------------------------------------------------------- /Testing Scratch/Dataset 1/Naive Bayes/NaiveBayesClassifier with sklearn - Dataset 1 - testingv02.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "import pandas as pd\n", 10 | "import numpy as np\n", 11 | "\n", 12 | "from sklearn.naive_bayes import GaussianNB" 13 | ] 14 | }, 15 | { 16 | "cell_type": "code", 17 | "execution_count": 2, 18 | "metadata": {}, 19 | "outputs": [], 20 | "source": [ 21 | "def read_file(filename):\n", 22 | " df = pd.read_csv(filename)\n", 23 | " print(df.shape)\n", 24 | " df['Label'] = df['Label'].apply(lambda x: x.strip().lower())\n", 25 | " return df" 26 | ] 27 | }, 28 | { 29 | "cell_type": "code", 30 | "execution_count": 3, 31 | "metadata": {}, 32 | "outputs": [ 33 | { 34 | "name": "stdout", 35 | "output_type": "stream", 36 | "text": [ 37 | "(14400, 7)\n", 38 | "(1890, 7)\n" 39 | ] 40 | } 41 | ], 42 | "source": [ 43 | "FILE_HO_TRAIN = 'training.csv'\n", 44 | "FILE_HO_TEST = 'testingv02.csv'\n", 45 | "\n", 46 | "df_ho_train = read_file(FILE_HO_TRAIN)\n", 47 | "df_ho_test = read_file(FILE_HO_TEST)" 48 | ] 49 | }, 50 | { 51 | "cell_type": "code", 52 | "execution_count": 4, 53 | "metadata": {}, 54 | "outputs": [], 55 | "source": [ 56 | "from sklearn.preprocessing import LabelEncoder\n", 57 | "\n", 58 | "target_encoder = LabelEncoder()\n", 59 | "df_ho_train['Target'] = target_encoder.fit_transform(df_ho_train['Label'])\n", 60 | "df_ho_test['Target'] = target_encoder.transform(df_ho_test['Label'])\n", 61 | "\n", 62 | "x_ho_train = df_ho_train.drop(['Label', 'Target'], axis=1)\n", 63 | "y_ho_train = df_ho_train['Target']\n", 64 | "\n", 65 | "x_ho_test = df_ho_test.drop(['Label', 'Target'], axis=1)\n", 66 | "y_ho_test = df_ho_test['Target']" 67 | ] 68 | }, 69 | { 70 | "cell_type": "code", 71 | "execution_count": 5, 72 | "metadata": {}, 73 | "outputs": [ 74 | { 75 | "data": { 76 | "text/plain": [ 77 | "0.07566137566137567" 78 | ] 79 | }, 80 | "execution_count": 5, 81 | "metadata": {}, 82 | "output_type": "execute_result" 83 | } 84 | ], 85 | "source": [ 86 | "# Mengaktifkan/memanggil/membuat fungsi klasifikasi Naive Bayes\n", 87 | "modelnb = GaussianNB()\n", 88 | "\n", 89 | "# Memasukkan data training pada fungsi klasifikasi Naive Bayes\n", 90 | "nbtrain = modelnb.fit(x_ho_train, y_ho_train)\n", 91 | "modelnb.score(x_ho_test,y_ho_test)" 92 | ] 93 | }, 94 | { 95 | "cell_type": "code", 96 | "execution_count": 6, 97 | "metadata": {}, 98 | "outputs": [ 99 | { 100 | "name": "stdout", 101 | "output_type": "stream", 102 | "text": [ 103 | "Akurasi pada Algoritma RandomForest menggunakan split dengan persentase 90:10 = 7.5661375661375665\n" 104 | ] 105 | } 106 | ], 107 | "source": [ 108 | "from sklearn.metrics import accuracy_score, f1_score\n", 109 | "\n", 110 | "predicted= modelnb.predict(x_ho_test)\n", 111 | "\n", 112 | "accuracy_ho = accuracy_score(predicted, y_ho_test) * 100\n", 113 | "print('Akurasi pada Algoritma RandomForest menggunakan split dengan persentase 90:10 = %s' % accuracy_ho)" 114 | ] 115 | }, 116 | { 117 | "cell_type": "code", 118 | "execution_count": 7, 119 | "metadata": {}, 120 | "outputs": [ 121 | { 122 | "data": { 123 | "image/png": "\n", 124 | "text/plain": [ 125 | "
" 126 | ] 127 | }, 128 | "metadata": { 129 | "needs_background": "light" 130 | }, 131 | "output_type": "display_data" 132 | } 133 | ], 134 | "source": [ 135 | "# Create Confusion Matrix\n", 136 | "\n", 137 | "import seaborn as sns\n", 138 | "import matplotlib.pyplot as plt\n", 139 | "\n", 140 | "from sklearn.metrics import confusion_matrix\n", 141 | "confusion_matrix(y_ho_test, predicted) \n", 142 | "\n", 143 | "f, ax = plt.subplots(figsize=(8,5))\n", 144 | "sns.heatmap(confusion_matrix(y_ho_test, predicted), annot=True, fmt=\".0f\", ax=ax)\n", 145 | "plt.xlabel(\"Predicted Class\")\n", 146 | "plt.ylabel(\"Actual Class\")\n", 147 | "plt.show()" 148 | ] 149 | }, 150 | { 151 | "cell_type": "code", 152 | "execution_count": 8, 153 | "metadata": {}, 154 | "outputs": [ 155 | { 156 | "name": "stdout", 157 | "output_type": "stream", 158 | "text": [ 159 | " precision recall f1-score support\n", 160 | "\n", 161 | " 0 0.95 0.03 0.06 1800\n", 162 | " 1 0.05 0.97 0.09 90\n", 163 | "\n", 164 | " accuracy 0.08 1890\n", 165 | " macro avg 0.50 0.50 0.08 1890\n", 166 | "weighted avg 0.91 0.08 0.06 1890\n", 167 | "\n" 168 | ] 169 | } 170 | ], 171 | "source": [ 172 | "# Performance Matrix Report - Precision Recall f1score\n", 173 | "\n", 174 | "from sklearn.metrics import classification_report\n", 175 | "print (classification_report(y_ho_test, predicted))" 176 | ] 177 | }, 178 | { 179 | "cell_type": "code", 180 | "execution_count": null, 181 | "metadata": {}, 182 | "outputs": [], 183 | "source": [] 184 | } 185 | ], 186 | "metadata": { 187 | "kernelspec": { 188 | "display_name": "Python 3", 189 | "language": "python", 190 | "name": "python3" 191 | }, 192 | "language_info": { 193 | "codemirror_mode": { 194 | "name": "ipython", 195 | "version": 3 196 | }, 197 | "file_extension": ".py", 198 | "mimetype": "text/x-python", 199 | "name": "python", 200 | "nbconvert_exporter": "python", 201 | "pygments_lexer": "ipython3", 202 | "version": "3.8.5" 203 | } 204 | }, 205 | "nbformat": 4, 206 | "nbformat_minor": 4 207 | } 208 | -------------------------------------------------------------------------------- /Testing Scratch/Dataset 1/README.md: -------------------------------------------------------------------------------- 1 | # csv 2 | 3 | # Algoritm 4 | 5 | 1. Decision Tree 6 | 2. Random Forest 7 | 3. Naive Bayes -------------------------------------------------------------------------------- /Testing Scratch/Dataset 1/Random Forest/Images/Random Forest - Rapid Miner.PNG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wishihab/Android-RAT-Dataset/fa4b9f7c94e19cd2c47e7985e47aa08b1530bed7/Testing Scratch/Dataset 1/Random Forest/Images/Random Forest - Rapid Miner.PNG -------------------------------------------------------------------------------- /Testing Scratch/Dataset 1/Random Forest/Images/Random Forest Result - Python (Jupyter) vs RapidMiner.PNG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wishihab/Android-RAT-Dataset/fa4b9f7c94e19cd2c47e7985e47aa08b1530bed7/Testing Scratch/Dataset 1/Random Forest/Images/Random Forest Result - Python (Jupyter) vs RapidMiner.PNG -------------------------------------------------------------------------------- /Testing Scratch/Dataset 1/Random Forest/Random Forest - Model 1 - 2021.rmp: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | 64 | 65 | 66 | 67 | 68 | 69 | 70 | 71 | 72 | 73 | 74 | 75 | 76 | 77 | 78 | 79 | 80 | 81 | 82 | 83 | 84 | 85 | 86 | 87 | 88 | 89 | 90 | 91 | 92 | 93 | 94 | 95 | 96 | 97 | 98 | 99 | -------------------------------------------------------------------------------- /Testing Scratch/Dataset 1/Random Forest/RandomForestClassifier with sklearn - Dataset 1 - testingv01.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "import pandas as pd\n", 10 | "import numpy as np\n", 11 | "\n", 12 | "from sklearn.ensemble import RandomForestClassifier" 13 | ] 14 | }, 15 | { 16 | "cell_type": "code", 17 | "execution_count": 2, 18 | "metadata": {}, 19 | "outputs": [], 20 | "source": [ 21 | "def read_file(filename):\n", 22 | " df = pd.read_csv(filename)\n", 23 | " print(df.shape)\n", 24 | " df['Label'] = df['Label'].apply(lambda x: x.strip().lower())\n", 25 | " return df" 26 | ] 27 | }, 28 | { 29 | "cell_type": "code", 30 | "execution_count": 3, 31 | "metadata": {}, 32 | "outputs": [ 33 | { 34 | "name": "stdout", 35 | "output_type": "stream", 36 | "text": [ 37 | "(14400, 7)\n", 38 | "(990, 7)\n" 39 | ] 40 | } 41 | ], 42 | "source": [ 43 | "# training csv is 90% of 18000 dataset model 1 - transformed\n", 44 | "# testing csv is 10% of 18000 dataset model 1 - transformed\n", 45 | "\n", 46 | "FILE_HO_TRAIN = 'training.csv'\n", 47 | "FILE_HO_TEST = 'testingv01.csv'\n", 48 | "\n", 49 | "df_ho_train = read_file(FILE_HO_TRAIN)\n", 50 | "df_ho_test = read_file(FILE_HO_TEST)" 51 | ] 52 | }, 53 | { 54 | "cell_type": "code", 55 | "execution_count": 4, 56 | "metadata": {}, 57 | "outputs": [], 58 | "source": [ 59 | "from sklearn.preprocessing import LabelEncoder\n", 60 | "\n", 61 | "target_encoder = LabelEncoder()\n", 62 | "df_ho_train['Target'] = target_encoder.fit_transform(df_ho_train['Label'])\n", 63 | "df_ho_test['Target'] = target_encoder.transform(df_ho_test['Label'])\n", 64 | "\n", 65 | "x_ho_train = df_ho_train.drop(['Label', 'Target'], axis=1)\n", 66 | "y_ho_train = df_ho_train['Target']\n", 67 | "\n", 68 | "x_ho_test = df_ho_test.drop(['Label', 'Target'], axis=1)\n", 69 | "y_ho_test = df_ho_test['Target']" 70 | ] 71 | }, 72 | { 73 | "cell_type": "code", 74 | "execution_count": 5, 75 | "metadata": {}, 76 | "outputs": [ 77 | { 78 | "data": { 79 | "text/plain": [ 80 | "0.998989898989899" 81 | ] 82 | }, 83 | "execution_count": 5, 84 | "metadata": {}, 85 | "output_type": "execute_result" 86 | } 87 | ], 88 | "source": [ 89 | "# do like rapidminer setting\n", 90 | "# criterion entropy is information gain\n", 91 | "# estimator set 100\n", 92 | "# max depth 10\n", 93 | "\n", 94 | "modelRF = RandomForestClassifier(criterion=\"entropy\", n_estimators=100, max_depth=10)\n", 95 | "modelRF.fit(x_ho_train,y_ho_train)\n", 96 | "modelRF.score(x_ho_test,y_ho_test)" 97 | ] 98 | }, 99 | { 100 | "cell_type": "code", 101 | "execution_count": 6, 102 | "metadata": {}, 103 | "outputs": [ 104 | { 105 | "name": "stdout", 106 | "output_type": "stream", 107 | "text": [ 108 | "Akurasi pada Algoritma RandomForest menggunakan split dengan persentase 90:10 = 99.8989898989899\n" 109 | ] 110 | } 111 | ], 112 | "source": [ 113 | "from sklearn.metrics import accuracy_score, f1_score\n", 114 | "\n", 115 | "predicted= modelRF.predict(x_ho_test)\n", 116 | "\n", 117 | "accuracy_ho = accuracy_score(predicted, y_ho_test) * 100\n", 118 | "print('Akurasi pada Algoritma RandomForest menggunakan split dengan persentase 90:10 = %s' % accuracy_ho)" 119 | ] 120 | }, 121 | { 122 | "cell_type": "code", 123 | "execution_count": 7, 124 | "metadata": {}, 125 | "outputs": [ 126 | { 127 | "data": { 128 | "image/png": "\n", 129 | "text/plain": [ 130 | "
" 131 | ] 132 | }, 133 | "metadata": { 134 | "needs_background": "light" 135 | }, 136 | "output_type": "display_data" 137 | } 138 | ], 139 | "source": [ 140 | "# Create Confusion Matrix\n", 141 | "\n", 142 | "import seaborn as sns\n", 143 | "import matplotlib.pyplot as plt\n", 144 | "\n", 145 | "from sklearn.metrics import confusion_matrix\n", 146 | "confusion_matrix(y_ho_test, predicted) \n", 147 | "\n", 148 | "f, ax = plt.subplots(figsize=(8,5))\n", 149 | "sns.heatmap(confusion_matrix(y_ho_test, predicted), annot=True, fmt=\".0f\", ax=ax)\n", 150 | "plt.xlabel(\"Predicted Class\")\n", 151 | "plt.ylabel(\"Actual Class\")\n", 152 | "plt.show()" 153 | ] 154 | }, 155 | { 156 | "cell_type": "code", 157 | "execution_count": 8, 158 | "metadata": {}, 159 | "outputs": [ 160 | { 161 | "name": "stdout", 162 | "output_type": "stream", 163 | "text": [ 164 | " precision recall f1-score support\n", 165 | "\n", 166 | " 0 1.00 1.00 1.00 900\n", 167 | " 1 0.99 1.00 0.99 90\n", 168 | "\n", 169 | " accuracy 1.00 990\n", 170 | " macro avg 0.99 1.00 1.00 990\n", 171 | "weighted avg 1.00 1.00 1.00 990\n", 172 | "\n" 173 | ] 174 | } 175 | ], 176 | "source": [ 177 | "# Performance Matrix Report - Precision Recall f1score\n", 178 | "\n", 179 | "from sklearn.metrics import classification_report\n", 180 | "print (classification_report(y_ho_test, predicted))" 181 | ] 182 | }, 183 | { 184 | "cell_type": "code", 185 | "execution_count": null, 186 | "metadata": {}, 187 | "outputs": [], 188 | "source": [] 189 | } 190 | ], 191 | "metadata": { 192 | "kernelspec": { 193 | "display_name": "Python 3", 194 | "language": "python", 195 | "name": "python3" 196 | }, 197 | "language_info": { 198 | "codemirror_mode": { 199 | "name": "ipython", 200 | "version": 3 201 | }, 202 | "file_extension": ".py", 203 | "mimetype": "text/x-python", 204 | "name": "python", 205 | "nbconvert_exporter": "python", 206 | "pygments_lexer": "ipython3", 207 | "version": "3.8.5" 208 | } 209 | }, 210 | "nbformat": 4, 211 | "nbformat_minor": 4 212 | } 213 | -------------------------------------------------------------------------------- /Testing Scratch/Dataset 2/Decision Tree/Images/Decision Tree Result dataset 2- Python (Jupyter) vs RapidMiner.PNG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wishihab/Android-RAT-Dataset/fa4b9f7c94e19cd2c47e7985e47aa08b1530bed7/Testing Scratch/Dataset 2/Decision Tree/Images/Decision Tree Result dataset 2- Python (Jupyter) vs RapidMiner.PNG -------------------------------------------------------------------------------- /Testing Scratch/Dataset 2/Decision Tree/Images/RapidMiner Process Model.PNG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wishihab/Android-RAT-Dataset/fa4b9f7c94e19cd2c47e7985e47aa08b1530bed7/Testing Scratch/Dataset 2/Decision Tree/Images/RapidMiner Process Model.PNG -------------------------------------------------------------------------------- /Testing Scratch/Dataset 2/Decision Tree/Images/imagename01.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wishihab/Android-RAT-Dataset/fa4b9f7c94e19cd2c47e7985e47aa08b1530bed7/Testing Scratch/Dataset 2/Decision Tree/Images/imagename01.png -------------------------------------------------------------------------------- /Testing Scratch/Dataset 2/Decision Tree/Images/imagename02.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wishihab/Android-RAT-Dataset/fa4b9f7c94e19cd2c47e7985e47aa08b1530bed7/Testing Scratch/Dataset 2/Decision Tree/Images/imagename02.png -------------------------------------------------------------------------------- /Testing Scratch/Dataset 2/Decision Tree/Images/imagename03.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wishihab/Android-RAT-Dataset/fa4b9f7c94e19cd2c47e7985e47aa08b1530bed7/Testing Scratch/Dataset 2/Decision Tree/Images/imagename03.png -------------------------------------------------------------------------------- /Testing Scratch/Dataset 2/Naive Bayes/Images/Naive Bayes result dataset2 - Python (Jupyter) vs RapidMiner.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wishihab/Android-RAT-Dataset/fa4b9f7c94e19cd2c47e7985e47aa08b1530bed7/Testing Scratch/Dataset 2/Naive Bayes/Images/Naive Bayes result dataset2 - Python (Jupyter) vs RapidMiner.png -------------------------------------------------------------------------------- /Testing Scratch/Dataset 2/Naive Bayes/Images/RapidMiner Naive Bayes Process Model.PNG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wishihab/Android-RAT-Dataset/fa4b9f7c94e19cd2c47e7985e47aa08b1530bed7/Testing Scratch/Dataset 2/Naive Bayes/Images/RapidMiner Naive Bayes Process Model.PNG -------------------------------------------------------------------------------- /Testing Scratch/Dataset 2/Naive Bayes/NaiveBayesClassifier with sklearn - Dataset 1 - testingv01.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "import pandas as pd\n", 10 | "\n", 11 | "from sklearn.naive_bayes import GaussianNB" 12 | ] 13 | }, 14 | { 15 | "cell_type": "code", 16 | "execution_count": 2, 17 | "metadata": {}, 18 | "outputs": [], 19 | "source": [ 20 | "def read_file(filename):\n", 21 | " df = pd.read_csv(filename)\n", 22 | " print(df.shape)\n", 23 | " df['label'] = df['label'].apply(lambda x: x.strip().lower())\n", 24 | " return df" 25 | ] 26 | }, 27 | { 28 | "cell_type": "code", 29 | "execution_count": 4, 30 | "metadata": {}, 31 | "outputs": [ 32 | { 33 | "name": "stdout", 34 | "output_type": "stream", 35 | "text": [ 36 | "(5400, 8)\n", 37 | "(150, 8)\n" 38 | ] 39 | } 40 | ], 41 | "source": [ 42 | "FILE_HO_TRAIN = 'training.csv'\n", 43 | "FILE_HO_TEST = 'testingv1.csv'\n", 44 | "\n", 45 | "df_ho_train = read_file(FILE_HO_TRAIN)\n", 46 | "df_ho_test = read_file(FILE_HO_TEST)" 47 | ] 48 | }, 49 | { 50 | "cell_type": "code", 51 | "execution_count": 5, 52 | "metadata": {}, 53 | "outputs": [], 54 | "source": [ 55 | "from sklearn.preprocessing import LabelEncoder\n", 56 | "\n", 57 | "target_encoder = LabelEncoder()\n", 58 | "df_ho_train['Target'] = target_encoder.fit_transform(df_ho_train['label'])\n", 59 | "df_ho_test['Target'] = target_encoder.transform(df_ho_test['label'])\n", 60 | "\n", 61 | "x_ho_train = df_ho_train.drop(['label', 'Target'], axis=1)\n", 62 | "y_ho_train = df_ho_train['Target']\n", 63 | "\n", 64 | "x_ho_test = df_ho_test.drop(['label', 'Target'], axis=1)\n", 65 | "y_ho_test = df_ho_test['Target']" 66 | ] 67 | }, 68 | { 69 | "cell_type": "code", 70 | "execution_count": 7, 71 | "metadata": {}, 72 | "outputs": [ 73 | { 74 | "data": { 75 | "text/plain": [ 76 | "0.5133333333333333" 77 | ] 78 | }, 79 | "execution_count": 7, 80 | "metadata": {}, 81 | "output_type": "execute_result" 82 | } 83 | ], 84 | "source": [ 85 | "# Mengaktifkan/memanggil/membuat fungsi klasifikasi Naive Bayes\n", 86 | "modelnb = GaussianNB()\n", 87 | "\n", 88 | "# Memasukkan data training pada fungsi klasifikasi Naive Bayes\n", 89 | "nbtrain = modelnb.fit(x_ho_train, y_ho_train)\n", 90 | "modelnb.score(x_ho_test,y_ho_test)" 91 | ] 92 | }, 93 | { 94 | "cell_type": "code", 95 | "execution_count": 8, 96 | "metadata": {}, 97 | "outputs": [ 98 | { 99 | "name": "stdout", 100 | "output_type": "stream", 101 | "text": [ 102 | "Akurasi pada Algoritma RandomForest menggunakan split dengan persentase 90:10 = 51.33333333333333\n" 103 | ] 104 | } 105 | ], 106 | "source": [ 107 | "from sklearn.metrics import accuracy_score, f1_score\n", 108 | "\n", 109 | "predicted= modelnb.predict(x_ho_test)\n", 110 | "\n", 111 | "accuracy_ho = accuracy_score(predicted, y_ho_test) * 100\n", 112 | "print('Akurasi pada Algoritma RandomForest menggunakan split dengan persentase 90:10 = %s' % accuracy_ho)" 113 | ] 114 | }, 115 | { 116 | "cell_type": "code", 117 | "execution_count": 9, 118 | "metadata": {}, 119 | "outputs": [ 120 | { 121 | "data": { 122 | "image/png": "\n", 123 | "text/plain": [ 124 | "
" 125 | ] 126 | }, 127 | "metadata": { 128 | "needs_background": "light" 129 | }, 130 | "output_type": "display_data" 131 | } 132 | ], 133 | "source": [ 134 | "# Create Confusion Matrix\n", 135 | "\n", 136 | "import seaborn as sns\n", 137 | "import matplotlib.pyplot as plt\n", 138 | "\n", 139 | "from sklearn.metrics import confusion_matrix\n", 140 | "confusion_matrix(y_ho_test, predicted) \n", 141 | "\n", 142 | "f, ax = plt.subplots(figsize=(8,5))\n", 143 | "sns.heatmap(confusion_matrix(y_ho_test, predicted), annot=True, fmt=\".0f\", ax=ax)\n", 144 | "plt.xlabel(\"Predicted Class\")\n", 145 | "plt.ylabel(\"Actual Class\")\n", 146 | "plt.show()" 147 | ] 148 | }, 149 | { 150 | "cell_type": "code", 151 | "execution_count": 10, 152 | "metadata": {}, 153 | "outputs": [ 154 | { 155 | "name": "stdout", 156 | "output_type": "stream", 157 | "text": [ 158 | " precision recall f1-score support\n", 159 | "\n", 160 | " 0 1.00 0.48 0.65 140\n", 161 | " 1 0.12 1.00 0.22 10\n", 162 | "\n", 163 | " accuracy 0.51 150\n", 164 | " macro avg 0.56 0.74 0.43 150\n", 165 | "weighted avg 0.94 0.51 0.62 150\n", 166 | "\n" 167 | ] 168 | } 169 | ], 170 | "source": [ 171 | "# Performance Matrix Report - Precision Recall f1score\n", 172 | "\n", 173 | "from sklearn.metrics import classification_report\n", 174 | "print (classification_report(y_ho_test, predicted))" 175 | ] 176 | }, 177 | { 178 | "cell_type": "code", 179 | "execution_count": null, 180 | "metadata": {}, 181 | "outputs": [], 182 | "source": [] 183 | } 184 | ], 185 | "metadata": { 186 | "kernelspec": { 187 | "display_name": "Python 3", 188 | "language": "python", 189 | "name": "python3" 190 | }, 191 | "language_info": { 192 | "codemirror_mode": { 193 | "name": "ipython", 194 | "version": 3 195 | }, 196 | "file_extension": ".py", 197 | "mimetype": "text/x-python", 198 | "name": "python", 199 | "nbconvert_exporter": "python", 200 | "pygments_lexer": "ipython3", 201 | "version": "3.8.5" 202 | } 203 | }, 204 | "nbformat": 4, 205 | "nbformat_minor": 4 206 | } 207 | -------------------------------------------------------------------------------- /Testing Scratch/Dataset 2/Naive Bayes/NaiveBayesClassifier with sklearn - Dataset 1 - testingv02.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "import pandas as pd\n", 10 | "\n", 11 | "from sklearn.naive_bayes import GaussianNB" 12 | ] 13 | }, 14 | { 15 | "cell_type": "code", 16 | "execution_count": 2, 17 | "metadata": {}, 18 | "outputs": [], 19 | "source": [ 20 | "def read_file(filename):\n", 21 | " df = pd.read_csv(filename)\n", 22 | " print(df.shape)\n", 23 | " df['label'] = df['label'].apply(lambda x: x.strip().lower())\n", 24 | " return df" 25 | ] 26 | }, 27 | { 28 | "cell_type": "code", 29 | "execution_count": 4, 30 | "metadata": {}, 31 | "outputs": [ 32 | { 33 | "name": "stdout", 34 | "output_type": "stream", 35 | "text": [ 36 | "(5400, 8)\n", 37 | "(310, 8)\n" 38 | ] 39 | } 40 | ], 41 | "source": [ 42 | "FILE_HO_TRAIN = 'training.csv'\n", 43 | "FILE_HO_TEST = 'testingv2.csv'\n", 44 | "\n", 45 | "df_ho_train = read_file(FILE_HO_TRAIN)\n", 46 | "df_ho_test = read_file(FILE_HO_TEST)" 47 | ] 48 | }, 49 | { 50 | "cell_type": "code", 51 | "execution_count": 5, 52 | "metadata": {}, 53 | "outputs": [], 54 | "source": [ 55 | "from sklearn.preprocessing import LabelEncoder\n", 56 | "\n", 57 | "target_encoder = LabelEncoder()\n", 58 | "df_ho_train['Target'] = target_encoder.fit_transform(df_ho_train['label'])\n", 59 | "df_ho_test['Target'] = target_encoder.transform(df_ho_test['label'])\n", 60 | "\n", 61 | "x_ho_train = df_ho_train.drop(['label', 'Target'], axis=1)\n", 62 | "y_ho_train = df_ho_train['Target']\n", 63 | "\n", 64 | "x_ho_test = df_ho_test.drop(['label', 'Target'], axis=1)\n", 65 | "y_ho_test = df_ho_test['Target']" 66 | ] 67 | }, 68 | { 69 | "cell_type": "code", 70 | "execution_count": 8, 71 | "metadata": {}, 72 | "outputs": [ 73 | { 74 | "data": { 75 | "text/plain": [ 76 | "0.5451612903225806" 77 | ] 78 | }, 79 | "execution_count": 8, 80 | "metadata": {}, 81 | "output_type": "execute_result" 82 | } 83 | ], 84 | "source": [ 85 | "# Mengaktifkan/memanggil/membuat fungsi klasifikasi Naive Bayes\n", 86 | "modelnb = GaussianNB()\n", 87 | "\n", 88 | "# Memasukkan data training pada fungsi klasifikasi Naive Bayes\n", 89 | "nbtrain = modelnb.fit(x_ho_train, y_ho_train)\n", 90 | "modelnb.score(x_ho_test,y_ho_test)" 91 | ] 92 | }, 93 | { 94 | "cell_type": "code", 95 | "execution_count": 9, 96 | "metadata": {}, 97 | "outputs": [ 98 | { 99 | "name": "stdout", 100 | "output_type": "stream", 101 | "text": [ 102 | "Akurasi pada Algoritma RandomForest menggunakan split dengan persentase 90:10 = 54.516129032258064\n" 103 | ] 104 | } 105 | ], 106 | "source": [ 107 | "from sklearn.metrics import accuracy_score, f1_score\n", 108 | "\n", 109 | "predicted= modelnb.predict(x_ho_test)\n", 110 | "\n", 111 | "accuracy_ho = accuracy_score(predicted, y_ho_test) * 100\n", 112 | "print('Akurasi pada Algoritma RandomForest menggunakan split dengan persentase 90:10 = %s' % accuracy_ho)" 113 | ] 114 | }, 115 | { 116 | "cell_type": "code", 117 | "execution_count": 10, 118 | "metadata": {}, 119 | "outputs": [ 120 | { 121 | "data": { 122 | "image/png": "iVBORw0KGgoAAAANSUhEUgAAAdAAAAE9CAYAAAC7hzNcAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjMuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8vihELAAAACXBIWXMAAAsTAAALEwEAmpwYAAAcoklEQVR4nO3debRdZX3/8fcngCBTAIHIpKCkKkLFSqnVJWWogqIMTsCvWLTYOKAotipWW5ZdorRSLC2gBkRYVaBRtCAOQJHBCQGRMgQVFIqRSEBAHFAD9/v7427oNSS55+zck33PPe9X1l7nnGfvs/f33hXy5fs8z352qgpJktSfWV0HIEnSMDKBSpLUgglUkqQWTKCSJLVgApUkqQUTqCRJLazZdQArsvSeH3l/jYbeQwuv6DoEaUo8frfXZlDnbvPv/VqbPmVg8fTKClSSpBambQUqSRoRYw93HUErJlBJUrdqrOsIWjGBSpK6NWYClSSpb2UFKklSC1agkiS1YAUqSVILzsKVJKkFK1BJklpwDFSSpP45C1eSpDasQCVJasEKVJKkFpyFK0lSC1agkiS14BioJEktDGkF6gO1JUlqwQpUktQtu3AlSepflbNwJUnq35COgZpAJUndsgtXkqQWrEAlSWrBlYgkSWrBClSSpBYcA5UkqQUrUEmSWhjSCtSl/CRJ3Rob63+bRJLTkyxJcuNy9v1tkkqy6YS29yS5Ncn3k+zdS9gmUElSp6oe7nvrwRnAPss2JtkGeCFwx4S2HYCDgWc23zklyRqTXcAEKknq1gAq0Kq6Arh3Obs+ArwLqAlt+wPnVNVvq+o24FZg18muYQKVJHWrxvreksxLcs2Ebd5kl0myH/CTqvqfZXZtBfx4wudFTdtKOYlIktStFpOIqmo+ML/X45OsC7wXeNHydi/vEpOd0wQqSerW6rmN5anAdsD/JAHYGrg2ya6MV5zbTDh2a+DOyU5oF64kacarqhuqavOq2raqtmU8af5RVf0UOB84OMnaSbYD5gJXTXZOE6gkqVuDuY3lbOBbwNOSLEpy+IqOraqbgAXAQuArwBHVw1Rfu3AlSd0aQBduVR0yyf5tl/l8LHBsP9cwgUqSujWkKxGZQCVJ3TKBSpLUgovJS5LUghWoJEktWIFKktSCFagkSS1YgUqS1IIVqCRJLZhAJUlqoSZ98Mm0ZAKVJHXLClSSpBZMoJIkteAsXEmSWhjSCtTngUqS1IIVqCSpW87ClSSphSHtwjWBSpK6ZQKVJKkFZ+FKktS/GnMMVJKk/tmFK0lSC3bhSpLUgl24kiS1YBeuJEktDGkCdSm/Gex9HzyB3fY9mAMOfeOjbSd/4lPsuf+hvOKwI3jFYUdwxTevAmDp0qW879gTOPA1b+Llh72Zq669vquwpcc45owvssc7TuQVx5z6mH1nXvhtdv7rD3HfL34NwP2//DWvP/7T/OlbjudDZ124ukNVG1X9b5NIcnqSJUlunND24STfS3J9ks8n2WjCvvckuTXJ95Ps3UvYJtAZ7ICXvJCPnfCBx7S/5qADOPfMkzn3zJPZ7Xm7AvDZ878CwOf/46Oc+q8f5PiTTmVsSP+vUDPPfs/biVPedtBj2n967wNcufA2tthkw0fb1l5rTY7Yfzfe8co9V2eIWhVjY/1vkzsD2GeZtouBHavqD4EfAO8BSLIDcDDwzOY7pyRZY7ILmEBnsF123onZG27Q07E/vP0O/mSXnQF4wsYbscH663HT924ZYHRS757zB09iw/XWeUz78f/537z9lXtA8mjb49d+HM+euw2PW8sRqqExVv1vk6iqK4B7l2m7qKoeaj5eCWzdvN8fOKeqfltVtwG3ArtOdo2BJdAkT0/y7iT/luTE5v0zBnU99e7sc7/AgX/5Jt73wRP4+QO/AOBp22/HpV/7Fg899DCL7vwpC79/Kz+96+6OI5VW7LLrbmGzjTfgadvM6ToUraoa639bdX8FfLl5vxXw4wn7FjVtKzWQBJrk3cA5QICrgKub92cnOXoQ11RvDjpwX7684HTOPeNkNnvCJnz4pPExpQP33Zs5m23KQYcfyT+d+HF23vEZrLHmpD0YUice/O1STvvSN3jzfi/oOhRNhRYVaJJ5Sa6ZsM3r9XJJ3gs8BHz6kablHDZpmTuoPo7DgWdW1dKJjUlOAG4Cjlvel5pfwDyAU/7lA7z+Lw8ZUHija9NNNn70/Sv3ezFHvPMYANZccw3e/bY3PLrvL97wDp689ZarPT6pF4vuvo+f3PNzXv2PpwOw5L4HOOQDn+RTf3cYm85ev+Po1K9qMd+iquYD8/v9XpLDgJcCe1U9OhtpEbDNhMO2Bu6c7FyDSqBjwJbA/y7TvkWzb7km/kKW3vOj4byzdpq7+5572WzTTQC45PJvsv1TngzAg7/5DVWw7uPX4ZtXXcuaa6zBU7d7cpehSis0d+vNufSEtz36+cVHn8JZ730tG2+wbodRabpLsg/wbuDPqurXE3adD5zVFHlbAnMZ7z1dqUEl0LcDlyS5hf/rV34SsD3wlgFdU8t45zHHcfV3r+f++x9grwMO5c2Hv4arv3s937/lRxDY6olzOOZdRwJw730/5w1HvZfMmsWczZ7Ah/7hbzuOXvo/R8//L675wR3c/8sHedE7T+JN+72AA1/wrBUe/+KjT+FXD/6WpQ8/zKXfvYWPHnUwT91y09UYsfoygJWIkpwN7A5smmQRcAzjs27XBi7O+MSzK6vqjVV1U5IFwELGu3aPqKqHJ71GDehJ4ElmMT6LaSvG+5cXAVf3EhRYgWpmeGjhFV2HIE2Jx+/22uWNE06JX33g0L7/vV/vfZ8aWDy9Gtg876oaY3yasCRJK+ZauJIktTCki7aYQCVJ3bIClSSpBZ8HKklSC1agkiT1r81CCtOBCVSS1C0rUEmSWjCBSpLUgpOIJElqwQpUkqT+lQlUkqQWTKCSJLXgbSySJLVgBSpJUgtDmkBndR2AJEnDyApUktSpquGsQE2gkqRuDWkXrglUktQtE6gkSf1zIQVJktowgUqS1MJwrqNgApUkdcsuXEmS2jCBSpLUwpB24boSkSSpUzVWfW+TSXJ6kiVJbpzQtkmSi5Pc0rxuPGHfe5LcmuT7SfbuJW4TqCSpW2MttsmdAeyzTNvRwCVVNRe4pPlMkh2Ag4FnNt85Jckak13ABCpJ6tQgKtCqugK4d5nm/YEzm/dnAgdMaD+nqn5bVbcBtwK7TnYNE6gkqVuDqUCXZ05VLQZoXjdv2rcCfjzhuEVN20qZQCVJnaqx/rck85JcM2GbtwohZHlhTfYlZ+FKkrrVoqKsqvnA/D6/dleSLapqcZItgCVN+yJgmwnHbQ3cOdnJrEAlSZ1qU4G2dD5wWPP+MOC8Ce0HJ1k7yXbAXOCqyU5mBSpJmnGSnA3sDmyaZBFwDHAcsCDJ4cAdwKsAquqmJAuAhcBDwBFV9fBk1zCBSpK6NYCFFKrqkBXs2msFxx8LHNvPNUygkqROrUKXbKcmHQNN8tQkazfvd09yZJKNBh6ZJGkkrMYx0CnVyySic4GHk2wPfALYDjhroFFJkkbGTE6gY1X1EHAg8K9VdRSwxWDDkiSNjEr/2zTQyxjo0iSHMD7l92VN21qDC0mSNEqmS0XZr14S6OuANwLHVtVtzT0ynxpsWJKkUVFj06Oi7NekCbSqFgJHAjSPftmgqo4bdGCSpNEwYyvQJJcB+zXHXgfcneTyqnrHYEOTJI2CmiZjmv3qZRLR7Kp6AHg58Mmqeg7w54MNS5I0KoZ1Fm4vY6BrNovuvhp474DjkSSNmGEdA+2lAv1H4ELg1qq6OslTgFsGG5YkaVRU9b9NB71MIvoM8JkJn38EvGKQQUmSRsewVqC9TCJaBzgceCawziPtVfVXA4xLkjQihjWB9tKF+x/AE4G9gcsZf9DoLwYZlCRpdAxrF24vCXT7qvp74FdVdSawL7DTYMOSJI2KGkvf23TQSwJd2rzen2RHYDaw7cAikiRpCPRyG8v8ZgWivwfOB9YH/mGgUUmSRsawLqTQyyzc05q3lwNPGWw4kqRRM10WRujXChNokpUu1VdVJ0x9OJKkUTM2AyvQDVZbFJKkkTXjunCr6v2rMxBJ0miaLrNq+7XCWbhJ/jnJG5fTflSSfxpsWJKkUTGs94GurAv3pcCOy2k/EbgeePdAIpIkjZRhrUBXlkCr6rFzo6pqLMlw/rSSpGlnWCcRrWwhhV8nmbtsY9P24OBCkiSNkqr0vU0HK0ug/wB8Oclrk+zUbK8DvogLKUiSpsigxkCbOTs3JbkxydlJ1kmySZKLk9zSvG7cNu4VJtCq+jJwALAHcEaz7Q68oqq+1PaCkiRNNFbpe5tMkq2AI4FdqmpHYA3gYOBo4JKqmgtc0nxuZaUrEVXVjcBhbU8uSdJkBtgluybw+CRLgXWBO4H3MF4MApwJXEbLSbG9LCYvSdLADKILt6p+AhwP3AEsBn5eVRcBc6pqcXPMYmDztnH3sph8Jx6/5Qu6DkFaZZutO7vrEKQpsfj+1w7s3G1m4SaZB8yb0DS/quZP2L8xsD+wHXA/8Jkkh65apL9v2iZQSdJoaNOF2yTL+Ss55M+B26rqboAknwOeB9yVZIuqWpxkC2BJi5CBlS8m/+/ACgvlqjqy7UUlSXrEgO4DvQN4bpJ1Gb/1ci/gGuBXjM/tOa55Pa/tBVZWgV7T9qSSJHWpqr6d5LPAtcBDwHcZr1jXBxYkOZzxJPuqttdY2WLyZ7Y9qSRJvRrU0rZVdQxwzDLNv2W8Gl1lk46BJtmM8Sm+OwDrTAhsz6kIQJI02mbiUn6P+DRwM+Mzmd4P3A5cPcCYJEkjZCYu5feIJ1TVJ4ClVXV5Vf0V8NwBxyVJGhFjLbbpoJfbWJY2r4uT7Mv4Sg5bDy4kSdIoKaZHRdmvXhLoB5LMBv4G+HdgQ+CogUYlSRoZY9PkAdn9mjSBVtUFzdufM76wvCRJU2ZsplagST7JcmYZN2OhkiStkpnchXvBhPfrAAcyPg4qSdIqmy6TgvrVSxfuuRM/Jzkb+O+BRSRJGikzuQJd1lzgSVMdiCRpNM3YCjTJL/j9MdCf0vLho5IkLWvGJtCq2mB1BCJJGk3D2oU76UpESS7ppU2SpDbG0v82HazseaDrAOsCmzZP9n4k5A2BLVdDbJKkETAT7wN9A/B2xpPld/i/BPoAcPJgw5IkjYohXYhopc8DPRE4Mclbq+rfV2NMkiRNe708jWUsyUaPfEiycZI3Dy4kSdIoGdansfSSQP+6qu5/5ENV3Qf89cAikiSNlLGk72066GUhhVlJUlUFkGQN4HGDDUuSNCpm3BjoBBcCC5J8jPGf843AVwYalSRpZEyXLtl+9ZJA3w3MA97E+Ezci4BTBxmUJGl0TJf7Ovs16RhoVY1V1ceq6pVV9QrgJsYfrC1J0iobI31v00FPi8kn2Rk4BDgIuA343ABjkiSNkBk3BprkD4CDGU+cPwP+E0hV7bGaYpMkjYBh7cJdWQX6PeBrwMuq6laAJEetlqgkSSNjWCcRrWwM9BWMP7rs0iSnJtkLpknHsyRpxqgWWy+SbJTks0m+l+TmJH+aZJMkFye5pXnduG3cK0ygVfX5qjoIeDpwGXAUMCfJR5O8qO0FJUmaaIBPYzkR+EpVPR14FnAzcDRwSVXNBS5pPrfSyyzcX1XVp6vqpcDWwHWrckFJkiYaxFJ+STYEdgM+AVBVv2tW1dsfOLM57EzggLZx97KU36Oq6t6q+nhV7dn2gpIkTTSgtXCfAtwNfDLJd5OclmQ9YE5VLQZoXjdvG3dfCVSSpKlW6X9LMi/JNRO2ecucdk3gj4CPVtWzgV8xxb2nPd0HKknSoLSZhVtV84H5KzlkEbCoqr7dfP4s4wn0riRbVNXiJFsAS1pcHrAClSR1bBBduFX1U+DHSZ7WNO0FLATOBw5r2g4DzmsbtxWoJKlTA1yJ6K3Ap5M8DvgR8DrGC8cFSQ4H7gBe1fbkJlBJ0oxUVdcBuyxn115TcX4TqCSpUzNxKT9JkgZuWJfyM4FKkjplApUkqYUZ9zgzSZJWB8dAJUlqwS5cSZJasAtXkqQWxoY0hZpAJUmdsgtXkqQWhrP+NIFKkjpmBSpJUgvexiJJUgtOIpIkqYXhTJ8mUElSxxwDlSSphWHtwp3VdQCSJA0jK1BJUqeGs/40gUqSOuYYqCRJLQzrGKgJVJLUqeFMnyZQSVLH7MKVJKmFGtIa1AQqSeqUFagkSS0M6yQiF1IYUXu/aHduuvEKvrfw67zrnUd0HY7UsxNO+gA33PI1Lv3meY+2bbTRbM75/Gl84ztf5pzPn8bs2Rt2GKH6VS226cAEOoJmzZrFv514LC992aHs9Kw9OOigA3jGM+Z2HZbUkwVnfZ7/98p5v9f2lqNez9cvv5LnP+fFfP3yK3nLUa/vKDq1MUb1vfUqyRpJvpvkgubzJkkuTnJL87px27hNoCNo1z9+Nj/84e3cdtsdLF26lAULzmO/l+3ddVhST6785ne4776f/17b3i/ZkwVn/xcAC87+L/bZd68OIlNbYy22PrwNuHnC56OBS6pqLnBJ87mV1Z5Ak7xudV9Tv2/LrZ7Ijxfd+ejnRT9ZzJZbPrHDiKRVs9nmT2DJXfcAsOSue9h0s006jkj9qBZ/epFka2Bf4LQJzfsDZzbvzwQOaBt3FxXo+zu4piZIHvv496rpMqogadS0qUCTzEtyzYRt3nJO/a/Au/j9onVOVS0GaF43bxv3QGbhJrl+RbuAOSv53jxgHkDWmM2sWesNIDr9ZNFittl6y0c/b73VFixefFeHEUmr5u4lP2PzOZuy5K572HzOptxz971dh6Q+tLkPtKrmA/NXtD/JS4ElVfWdJLu3Dm4lBlWBzgH+EnjZcrafrehLVTW/qnapql1MnoNz9TXXsf3227Htttuw1lpr8epX788XLrio67Ck1i768qW8+pADAHj1IQdw4Ze+2m1A6suAxkCfD+yX5HbgHGDPJJ8C7kqyBUDzuqRt3INKoBcA61fV/y6z3Q5cNqBrqkcPP/wwb3v7+/jSF8/ixusv47Of/QILF/6g67Cknpxy2oe54KKzeercbfnOTV/lkNe8nJM+ciq77fE8vvGdL7PbHs/jpI+cNvmJNG2MVfW9Taaq3lNVW1fVtsDBwFer6lDgfOCw5rDDgPNWcIpJZbqOfa35uK2mZ2BSHzZbd3bXIUhTYvH9Cx87eWKKvObJL+/73/v/+N/P9RxP04X7t1X10iRPABYATwLuAF5VVa36/F2JSJLUqUFXS1V1GU3vZ1X9DJiS+5xMoJKkTg3rUn4mUElSp3waiyRJLfg0FkmSWrALV5KkFuzClSSpBbtwJUlqYbquRzAZE6gkqVOOgUqS1IJduJIkteAkIkmSWrALV5KkFpxEJElSC46BSpLUgmOgkiS1MKxjoLO6DkCSpGFkBSpJ6pSTiCRJamFYu3BNoJKkTjmJSJKkFsbswpUkqX/DmT5NoJKkjjkGKklSCyZQSZJa8DYWSZJaGNYK1JWIJEmdqhZ/JpNkmySXJrk5yU1J3ta0b5Lk4iS3NK8bt43bBCpJ6lRV9b314CHgb6rqGcBzgSOS7AAcDVxSVXOBS5rPrZhAJUmdGqP63iZTVYur6trm/S+Am4GtgP2BM5vDzgQOaBu3Y6CSpE4NehJRkm2BZwPfBuZU1eLmuouTbN72vFagkqROtalAk8xLcs2Ebd7yzp1kfeBc4O1V9cBUxm0FKknqVJu1cKtqPjB/ZcckWYvx5Pnpqvpc03xXki2a6nMLYEnfF29YgUqSOjVW1fc2mSQBPgHcXFUnTNh1PnBY8/4w4Ly2cVuBSpJmoucDrwFuSHJd0/Z3wHHAgiSHA3cAr2p7AROoJKlTg3icWVV9HcgKdu81FdcwgUqSOuXjzCRJasEHakuS1IIVqCRJLViBSpLUghWoJEktWIFKktRC1VjXIbRiApUkdWpYH6htApUkdWrQT2MZFBOoJKlTVqCSJLVgBSpJUgvexiJJUgvexiJJUgt24UqS1IKTiCRJamFYK9BZXQcgSdIwsgKVJHXKWbiSJLUwrF24JlBJUqecRCRJUgtWoJIkteAYqCRJLbgSkSRJLViBSpLUwrCOgbqQgiSpU9XiTy+S7JPk+0luTXL0VMdtBSpJ6tQgKtAkawAnAy8EFgFXJzm/qhZO1TWsQCVJnaqqvrce7ArcWlU/qqrfAecA+09l3CZQSVKnqsXWg62AH0/4vKhpmzLTtgv3od/9JF3HMNMlmVdV87uOQ1pV/l0ebm3+vU8yD5g3oWn+Mn8HlnfOKe0rtgIdbfMmP0QaCv5dHjFVNb+qdpmwLfs/UIuAbSZ83hq4cypjMIFKkmaiq4G5SbZL8jjgYOD8qbzAtO3ClSSprap6KMlbgAuBNYDTq+qmqbyGCXS0OWakmcK/y3qMqvoS8KVBnT/DugKEJEldcgxUkqQWTKAjatBLXEmrQ5LTkyxJcmPXsWj0mEBH0IQlrl4M7AAckmSHbqOSWjkD2KfrIDSaTKCjaeBLXEmrQ1VdAdzbdRwaTSbQ0TTwJa4kaaYzgY6mgS9xJUkznQl0NA18iStJmulMoKNp4EtcSdJMZwIdQVX1EPDIElc3AwumeokraXVIcjbwLeBpSRYlObzrmDQ6XIlIkqQWrEAlSWrBBCpJUgsmUEmSWjCBSpLUgglUkqQWTKCaUZI8nOS6JDcm+UySdVfhXGckeWXz/rSVLbifZPckz2txjduTbLqc9vWTfDzJD5PclOSKJH/S7Ptlv9eRNPVMoJppHqyqnatqR+B3wBsn7myeRNO3qnp9VS1cySG7A30n0JU4jfFF0udW1TOB1wKPSbSSumMC1Uz2NWD7pjq8NMlZwA1J1kjy4SRXJ7k+yRsAMu6kJAuTfBHY/JETJbksyS7N+32SXJvkf5JckmRbxhP1UU31+4IkmyU5t7nG1Ume33z3CUkuSvLdJB9nOesSJ3kq8CfA+6pqDKB5cs4Xlzlu/eb61ya5Icn+Tft6Sb7YxHdjkoOa9uOan+36JMdP8e9aGjlrdh2ANAhJ1mT8eadfaZp2BXasqtuSzAN+XlV/nGRt4BtJLgKeDTwN2AmYAywETl/mvJsBpwK7NefapKruTfIx4JdVdXxz3FnAR6rq60mexPiqT88AjgG+XlX/mGRfYN5ywn8mcF1VPTzJj/kb4MCqeqDpBr4yyfmMPx/zzqrat4lldpJNgAOBp1dVJdmop1+kpBUygWqmeXyS65r3XwM+wXjX6lVVdVvT/iLgDx8Z3wRmA3OB3YCzm8R1Z5KvLuf8zwWueORcVbWiZ1H+ObBD8miBuWGSDZprvLz57heT3NfuxwTGq9cPJtkNGGP8kXRzgBuA45P8E3BBVX2t+R+K3wCnNdX1BatwXUmYQDXzPFhVO09saJLYryY2AW+tqguXOe4lTP5Yt/RwDIwPj/xpVT24nFgm+/5NwLOSzHqkC3cF/gLYDHhOVS1NcjuwTlX9IMlzgJcAH0pyUVPx7grsxfjDA94C7NnDzyFpBRwD1Si6EHhTkrUAkvxBkvWAK4CDmzHSLYA9lvPdbwF/lmS75rubNO2/ADaYcNxFjCcpmuN2bt5ewXjiI8mLgY2XvUBV/RC4Bnh/moybZO4jY5wTzAaWNMlzD+DJzbFbAr+uqk8BxwN/lGR9YHZVfQl4O7AzklaJFahG0WnAtsC1TYK6GzgA+DzjVdkNwA+Ay5f9YlXd3Yyhfi7JLGAJ8ELgC8BnmyT3VuBI4OQk1zP+39kVjE80ej9wdpJrm/PfsYIYXw/8C3Brkl8DPwPeucwxnwa+kOQa4Drge037TsCHk4wBS4E3MZ7cz0uyDuNV9FG9/KIkrZhPY5EkqQW7cCVJasEEKklSCyZQSZJaMIFKktSCCVSSpBZMoJIktWAClSSpBROoJEkt/H97fimDLew+6wAAAABJRU5ErkJggg==\n", 123 | "text/plain": [ 124 | "
" 125 | ] 126 | }, 127 | "metadata": { 128 | "needs_background": "light" 129 | }, 130 | "output_type": "display_data" 131 | } 132 | ], 133 | "source": [ 134 | "# Create Confusion Matrix\n", 135 | "\n", 136 | "import seaborn as sns\n", 137 | "import matplotlib.pyplot as plt\n", 138 | "\n", 139 | "from sklearn.metrics import confusion_matrix\n", 140 | "confusion_matrix(y_ho_test, predicted) \n", 141 | "\n", 142 | "f, ax = plt.subplots(figsize=(8,5))\n", 143 | "sns.heatmap(confusion_matrix(y_ho_test, predicted), annot=True, fmt=\".0f\", ax=ax)\n", 144 | "plt.xlabel(\"Predicted Class\")\n", 145 | "plt.ylabel(\"Actual Class\")\n", 146 | "plt.show()" 147 | ] 148 | }, 149 | { 150 | "cell_type": "code", 151 | "execution_count": 11, 152 | "metadata": {}, 153 | "outputs": [ 154 | { 155 | "name": "stdout", 156 | "output_type": "stream", 157 | "text": [ 158 | " precision recall f1-score support\n", 159 | "\n", 160 | " 0 1.00 0.53 0.69 300\n", 161 | " 1 0.07 1.00 0.12 10\n", 162 | "\n", 163 | " accuracy 0.55 310\n", 164 | " macro avg 0.53 0.77 0.41 310\n", 165 | "weighted avg 0.97 0.55 0.67 310\n", 166 | "\n" 167 | ] 168 | } 169 | ], 170 | "source": [ 171 | "# Performance Matrix Report - Precision Recall f1score\n", 172 | "\n", 173 | "from sklearn.metrics import classification_report\n", 174 | "print (classification_report(y_ho_test, predicted))" 175 | ] 176 | }, 177 | { 178 | "cell_type": "code", 179 | "execution_count": null, 180 | "metadata": {}, 181 | "outputs": [], 182 | "source": [] 183 | } 184 | ], 185 | "metadata": { 186 | "kernelspec": { 187 | "display_name": "Python 3", 188 | "language": "python", 189 | "name": "python3" 190 | }, 191 | "language_info": { 192 | "codemirror_mode": { 193 | "name": "ipython", 194 | "version": 3 195 | }, 196 | "file_extension": ".py", 197 | "mimetype": "text/x-python", 198 | "name": "python", 199 | "nbconvert_exporter": "python", 200 | "pygments_lexer": "ipython3", 201 | "version": "3.8.5" 202 | } 203 | }, 204 | "nbformat": 4, 205 | "nbformat_minor": 4 206 | } 207 | -------------------------------------------------------------------------------- /Testing Scratch/Dataset 2/Naive Bayes/NaiveBayesClassifier with sklearn - Dataset 1 - testingv03.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "import pandas as pd\n", 10 | "\n", 11 | "from sklearn.naive_bayes import GaussianNB" 12 | ] 13 | }, 14 | { 15 | "cell_type": "code", 16 | "execution_count": 2, 17 | "metadata": {}, 18 | "outputs": [], 19 | "source": [ 20 | "def read_file(filename):\n", 21 | " df = pd.read_csv(filename)\n", 22 | " print(df.shape)\n", 23 | " df['label'] = df['label'].apply(lambda x: x.strip().lower())\n", 24 | " return df" 25 | ] 26 | }, 27 | { 28 | "cell_type": "code", 29 | "execution_count": 4, 30 | "metadata": {}, 31 | "outputs": [ 32 | { 33 | "name": "stdout", 34 | "output_type": "stream", 35 | "text": [ 36 | "(5400, 8)\n", 37 | "(600, 8)\n" 38 | ] 39 | } 40 | ], 41 | "source": [ 42 | "FILE_HO_TRAIN = 'training.csv'\n", 43 | "FILE_HO_TEST = 'testingv3.csv'\n", 44 | "\n", 45 | "df_ho_train = read_file(FILE_HO_TRAIN)\n", 46 | "df_ho_test = read_file(FILE_HO_TEST)" 47 | ] 48 | }, 49 | { 50 | "cell_type": "code", 51 | "execution_count": 5, 52 | "metadata": {}, 53 | "outputs": [], 54 | "source": [ 55 | "from sklearn.preprocessing import LabelEncoder\n", 56 | "\n", 57 | "target_encoder = LabelEncoder()\n", 58 | "df_ho_train['Target'] = target_encoder.fit_transform(df_ho_train['label'])\n", 59 | "df_ho_test['Target'] = target_encoder.transform(df_ho_test['label'])\n", 60 | "\n", 61 | "x_ho_train = df_ho_train.drop(['label', 'Target'], axis=1)\n", 62 | "y_ho_train = df_ho_train['Target']\n", 63 | "\n", 64 | "x_ho_test = df_ho_test.drop(['label', 'Target'], axis=1)\n", 65 | "y_ho_test = df_ho_test['Target']" 66 | ] 67 | }, 68 | { 69 | "cell_type": "code", 70 | "execution_count": 8, 71 | "metadata": {}, 72 | "outputs": [ 73 | { 74 | "data": { 75 | "text/plain": [ 76 | "0.71" 77 | ] 78 | }, 79 | "execution_count": 8, 80 | "metadata": {}, 81 | "output_type": "execute_result" 82 | } 83 | ], 84 | "source": [ 85 | "# Mengaktifkan/memanggil/membuat fungsi klasifikasi Naive Bayes\n", 86 | "modelnb = GaussianNB()\n", 87 | "\n", 88 | "# Memasukkan data training pada fungsi klasifikasi Naive Bayes\n", 89 | "nbtrain = modelnb.fit(x_ho_train, y_ho_train)\n", 90 | "modelnb.score(x_ho_test,y_ho_test)" 91 | ] 92 | }, 93 | { 94 | "cell_type": "code", 95 | "execution_count": 9, 96 | "metadata": {}, 97 | "outputs": [ 98 | { 99 | "name": "stdout", 100 | "output_type": "stream", 101 | "text": [ 102 | "Akurasi pada Algoritma RandomForest menggunakan split dengan persentase 90:10 = 71.0\n" 103 | ] 104 | } 105 | ], 106 | "source": [ 107 | "from sklearn.metrics import accuracy_score, f1_score\n", 108 | "\n", 109 | "predicted= modelnb.predict(x_ho_test)\n", 110 | "\n", 111 | "accuracy_ho = accuracy_score(predicted, y_ho_test) * 100\n", 112 | "print('Akurasi pada Algoritma RandomForest menggunakan split dengan persentase 90:10 = %s' % accuracy_ho)" 113 | ] 114 | }, 115 | { 116 | "cell_type": "code", 117 | "execution_count": 11, 118 | "metadata": {}, 119 | "outputs": [ 120 | { 121 | "data": { 122 | "image/png": "\n", 123 | "text/plain": [ 124 | "
" 125 | ] 126 | }, 127 | "metadata": { 128 | "needs_background": "light" 129 | }, 130 | "output_type": "display_data" 131 | } 132 | ], 133 | "source": [ 134 | "# Create Confusion Matrix\n", 135 | "\n", 136 | "import seaborn as sns\n", 137 | "import matplotlib.pyplot as plt\n", 138 | "\n", 139 | "from sklearn.metrics import confusion_matrix\n", 140 | "confusion_matrix(y_ho_test, predicted) \n", 141 | "\n", 142 | "f, ax = plt.subplots(figsize=(8,5))\n", 143 | "sns.heatmap(confusion_matrix(y_ho_test, predicted), annot=True, fmt=\".0f\", ax=ax)\n", 144 | "plt.xlabel(\"Predicted Class\")\n", 145 | "plt.ylabel(\"Actual Class\")\n", 146 | "plt.show()" 147 | ] 148 | }, 149 | { 150 | "cell_type": "code", 151 | "execution_count": 12, 152 | "metadata": {}, 153 | "outputs": [ 154 | { 155 | "name": "stdout", 156 | "output_type": "stream", 157 | "text": [ 158 | " precision recall f1-score support\n", 159 | "\n", 160 | " 0 0.83 0.53 0.65 300\n", 161 | " 1 0.65 0.89 0.75 300\n", 162 | "\n", 163 | " accuracy 0.71 600\n", 164 | " macro avg 0.74 0.71 0.70 600\n", 165 | "weighted avg 0.74 0.71 0.70 600\n", 166 | "\n" 167 | ] 168 | } 169 | ], 170 | "source": [ 171 | "# Performance Matrix Report - Precision Recall f1score\n", 172 | "\n", 173 | "from sklearn.metrics import classification_report\n", 174 | "print (classification_report(y_ho_test, predicted))" 175 | ] 176 | }, 177 | { 178 | "cell_type": "code", 179 | "execution_count": null, 180 | "metadata": {}, 181 | "outputs": [], 182 | "source": [] 183 | } 184 | ], 185 | "metadata": { 186 | "kernelspec": { 187 | "display_name": "Python 3", 188 | "language": "python", 189 | "name": "python3" 190 | }, 191 | "language_info": { 192 | "codemirror_mode": { 193 | "name": "ipython", 194 | "version": 3 195 | }, 196 | "file_extension": ".py", 197 | "mimetype": "text/x-python", 198 | "name": "python", 199 | "nbconvert_exporter": "python", 200 | "pygments_lexer": "ipython3", 201 | "version": "3.8.5" 202 | } 203 | }, 204 | "nbformat": 4, 205 | "nbformat_minor": 4 206 | } 207 | -------------------------------------------------------------------------------- /Testing Scratch/Dataset 2/README.md: -------------------------------------------------------------------------------- 1 | # csv 2 | 3 | # Algoritm 4 | 5 | 1. Decision Tree 6 | 2. Random Forest 7 | 3. Naive Bayes -------------------------------------------------------------------------------- /Testing Scratch/Dataset 2/Random Forest/Images/Random Forest result dataset 2 Python (Jupyter) vs RapidMiner.PNG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wishihab/Android-RAT-Dataset/fa4b9f7c94e19cd2c47e7985e47aa08b1530bed7/Testing Scratch/Dataset 2/Random Forest/Images/Random Forest result dataset 2 Python (Jupyter) vs RapidMiner.PNG -------------------------------------------------------------------------------- /Testing Scratch/Dataset 2/Random Forest/Images/RapidMiner Random Forest Model.PNG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wishihab/Android-RAT-Dataset/fa4b9f7c94e19cd2c47e7985e47aa08b1530bed7/Testing Scratch/Dataset 2/Random Forest/Images/RapidMiner Random Forest Model.PNG -------------------------------------------------------------------------------- /Testing Scratch/Dataset 2/Random Forest/RandomForestClassifier with sklearn - Dataset 1 - testingv01.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "import pandas as pd\n", 10 | "import numpy as np\n", 11 | "\n", 12 | "from sklearn.ensemble import RandomForestClassifier" 13 | ] 14 | }, 15 | { 16 | "cell_type": "code", 17 | "execution_count": 2, 18 | "metadata": {}, 19 | "outputs": [], 20 | "source": [ 21 | "def read_file(filename):\n", 22 | " df = pd.read_csv(filename)\n", 23 | " print(df.shape)\n", 24 | " df['label'] = df['label'].apply(lambda x: x.strip().lower())\n", 25 | " return df" 26 | ] 27 | }, 28 | { 29 | "cell_type": "code", 30 | "execution_count": 4, 31 | "metadata": {}, 32 | "outputs": [ 33 | { 34 | "name": "stdout", 35 | "output_type": "stream", 36 | "text": [ 37 | "(5400, 8)\n", 38 | "(150, 8)\n" 39 | ] 40 | } 41 | ], 42 | "source": [ 43 | "# training 5400\n", 44 | "# testing 150-10\n", 45 | "\n", 46 | "FILE_HO_TRAIN = 'training.csv'\n", 47 | "FILE_HO_TEST = 'testingv1.csv'\n", 48 | "\n", 49 | "df_ho_train = read_file(FILE_HO_TRAIN)\n", 50 | "df_ho_test = read_file(FILE_HO_TEST)" 51 | ] 52 | }, 53 | { 54 | "cell_type": "code", 55 | "execution_count": 5, 56 | "metadata": {}, 57 | "outputs": [], 58 | "source": [ 59 | "from sklearn.preprocessing import LabelEncoder\n", 60 | "\n", 61 | "target_encoder = LabelEncoder()\n", 62 | "df_ho_train['Target'] = target_encoder.fit_transform(df_ho_train['label'])\n", 63 | "df_ho_test['Target'] = target_encoder.transform(df_ho_test['label'])\n", 64 | "\n", 65 | "x_ho_train = df_ho_train.drop(['label', 'Target'], axis=1)\n", 66 | "y_ho_train = df_ho_train['Target']\n", 67 | "\n", 68 | "x_ho_test = df_ho_test.drop(['label', 'Target'], axis=1)\n", 69 | "y_ho_test = df_ho_test['Target']" 70 | ] 71 | }, 72 | { 73 | "cell_type": "code", 74 | "execution_count": 6, 75 | "metadata": {}, 76 | "outputs": [ 77 | { 78 | "data": { 79 | "text/plain": [ 80 | "0.9333333333333333" 81 | ] 82 | }, 83 | "execution_count": 6, 84 | "metadata": {}, 85 | "output_type": "execute_result" 86 | } 87 | ], 88 | "source": [ 89 | "# do like rapidminer setting\n", 90 | "# criterion entropy is information gain\n", 91 | "# estimator set 100\n", 92 | "# max depth 10\n", 93 | "\n", 94 | "modelRF = RandomForestClassifier(criterion=\"entropy\", n_estimators=100, max_depth=10)\n", 95 | "modelRF.fit(x_ho_train,y_ho_train)\n", 96 | "modelRF.score(x_ho_test,y_ho_test)" 97 | ] 98 | }, 99 | { 100 | "cell_type": "code", 101 | "execution_count": 7, 102 | "metadata": {}, 103 | "outputs": [ 104 | { 105 | "name": "stdout", 106 | "output_type": "stream", 107 | "text": [ 108 | "Akurasi pada Algoritma RandomForest menggunakan split dengan persentase 90:10 = 93.33333333333333\n" 109 | ] 110 | } 111 | ], 112 | "source": [ 113 | "from sklearn.metrics import accuracy_score, f1_score\n", 114 | "\n", 115 | "predicted= modelRF.predict(x_ho_test)\n", 116 | "\n", 117 | "accuracy_ho = accuracy_score(predicted, y_ho_test) * 100\n", 118 | "print('Akurasi pada Algoritma RandomForest menggunakan split dengan persentase 90:10 = %s' % accuracy_ho)" 119 | ] 120 | }, 121 | { 122 | "cell_type": "code", 123 | "execution_count": 8, 124 | "metadata": {}, 125 | "outputs": [ 126 | { 127 | "data": { 128 | "image/png": "\n", 129 | "text/plain": [ 130 | "
" 131 | ] 132 | }, 133 | "metadata": { 134 | "needs_background": "light" 135 | }, 136 | "output_type": "display_data" 137 | } 138 | ], 139 | "source": [ 140 | "# Create Confusion Matrix\n", 141 | "\n", 142 | "import seaborn as sns\n", 143 | "import matplotlib.pyplot as plt\n", 144 | "\n", 145 | "from sklearn.metrics import confusion_matrix\n", 146 | "confusion_matrix(y_ho_test, predicted) \n", 147 | "\n", 148 | "f, ax = plt.subplots(figsize=(8,5))\n", 149 | "sns.heatmap(confusion_matrix(y_ho_test, predicted), annot=True, fmt=\".0f\", ax=ax)\n", 150 | "plt.xlabel(\"Predicted Class\")\n", 151 | "plt.ylabel(\"Actual Class\")\n", 152 | "plt.show()" 153 | ] 154 | }, 155 | { 156 | "cell_type": "code", 157 | "execution_count": 9, 158 | "metadata": {}, 159 | "outputs": [ 160 | { 161 | "name": "stdout", 162 | "output_type": "stream", 163 | "text": [ 164 | " precision recall f1-score support\n", 165 | "\n", 166 | " 0 1.00 0.93 0.96 140\n", 167 | " 1 0.50 1.00 0.67 10\n", 168 | "\n", 169 | " accuracy 0.93 150\n", 170 | " macro avg 0.75 0.96 0.81 150\n", 171 | "weighted avg 0.97 0.93 0.94 150\n", 172 | "\n" 173 | ] 174 | } 175 | ], 176 | "source": [ 177 | "# Performance Matrix Report - Precision Recall f1score\n", 178 | "\n", 179 | "from sklearn.metrics import classification_report\n", 180 | "print (classification_report(y_ho_test, predicted))" 181 | ] 182 | }, 183 | { 184 | "cell_type": "code", 185 | "execution_count": null, 186 | "metadata": {}, 187 | "outputs": [], 188 | "source": [] 189 | } 190 | ], 191 | "metadata": { 192 | "kernelspec": { 193 | "display_name": "Python 3", 194 | "language": "python", 195 | "name": "python3" 196 | }, 197 | "language_info": { 198 | "codemirror_mode": { 199 | "name": "ipython", 200 | "version": 3 201 | }, 202 | "file_extension": ".py", 203 | "mimetype": "text/x-python", 204 | "name": "python", 205 | "nbconvert_exporter": "python", 206 | "pygments_lexer": "ipython3", 207 | "version": "3.8.5" 208 | } 209 | }, 210 | "nbformat": 4, 211 | "nbformat_minor": 4 212 | } 213 | -------------------------------------------------------------------------------- /Testing Scratch/Dataset 2/Random Forest/RandomForestClassifier with sklearn - Dataset 1 - testingv02.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "import pandas as pd\n", 10 | "import numpy as np\n", 11 | "\n", 12 | "from sklearn.ensemble import RandomForestClassifier" 13 | ] 14 | }, 15 | { 16 | "cell_type": "code", 17 | "execution_count": 2, 18 | "metadata": {}, 19 | "outputs": [], 20 | "source": [ 21 | "def read_file(filename):\n", 22 | " df = pd.read_csv(filename)\n", 23 | " print(df.shape)\n", 24 | " df['label'] = df['label'].apply(lambda x: x.strip().lower())\n", 25 | " return df" 26 | ] 27 | }, 28 | { 29 | "cell_type": "code", 30 | "execution_count": 3, 31 | "metadata": {}, 32 | "outputs": [ 33 | { 34 | "name": "stdout", 35 | "output_type": "stream", 36 | "text": [ 37 | "(5400, 8)\n", 38 | "(310, 8)\n" 39 | ] 40 | } 41 | ], 42 | "source": [ 43 | "# training 5400\n", 44 | "# testing 300-10\n", 45 | "\n", 46 | "FILE_HO_TRAIN = 'training.csv'\n", 47 | "FILE_HO_TEST = 'testingv2.csv'\n", 48 | "\n", 49 | "df_ho_train = read_file(FILE_HO_TRAIN)\n", 50 | "df_ho_test = read_file(FILE_HO_TEST)" 51 | ] 52 | }, 53 | { 54 | "cell_type": "code", 55 | "execution_count": 4, 56 | "metadata": {}, 57 | "outputs": [], 58 | "source": [ 59 | "from sklearn.preprocessing import LabelEncoder\n", 60 | "\n", 61 | "target_encoder = LabelEncoder()\n", 62 | "df_ho_train['Target'] = target_encoder.fit_transform(df_ho_train['label'])\n", 63 | "df_ho_test['Target'] = target_encoder.transform(df_ho_test['label'])\n", 64 | "\n", 65 | "x_ho_train = df_ho_train.drop(['label', 'Target'], axis=1)\n", 66 | "y_ho_train = df_ho_train['Target']\n", 67 | "\n", 68 | "x_ho_test = df_ho_test.drop(['label', 'Target'], axis=1)\n", 69 | "y_ho_test = df_ho_test['Target']" 70 | ] 71 | }, 72 | { 73 | "cell_type": "code", 74 | "execution_count": 5, 75 | "metadata": {}, 76 | "outputs": [ 77 | { 78 | "data": { 79 | "text/plain": [ 80 | "0.9290322580645162" 81 | ] 82 | }, 83 | "execution_count": 5, 84 | "metadata": {}, 85 | "output_type": "execute_result" 86 | } 87 | ], 88 | "source": [ 89 | "# do like rapidminer setting\n", 90 | "# criterion entropy is information gain\n", 91 | "# estimator set 100\n", 92 | "# max depth 10\n", 93 | "\n", 94 | "modelRF = RandomForestClassifier(criterion=\"entropy\", n_estimators=100, max_depth=10)\n", 95 | "modelRF.fit(x_ho_train,y_ho_train)\n", 96 | "modelRF.score(x_ho_test,y_ho_test)" 97 | ] 98 | }, 99 | { 100 | "cell_type": "code", 101 | "execution_count": 6, 102 | "metadata": {}, 103 | "outputs": [ 104 | { 105 | "name": "stdout", 106 | "output_type": "stream", 107 | "text": [ 108 | "Akurasi pada Algoritma RandomForest menggunakan split dengan persentase 90:10 = 92.90322580645162\n" 109 | ] 110 | } 111 | ], 112 | "source": [ 113 | "from sklearn.metrics import accuracy_score, f1_score\n", 114 | "\n", 115 | "predicted= modelRF.predict(x_ho_test)\n", 116 | "\n", 117 | "accuracy_ho = accuracy_score(predicted, y_ho_test) * 100\n", 118 | "print('Akurasi pada Algoritma RandomForest menggunakan split dengan persentase 90:10 = %s' % accuracy_ho)" 119 | ] 120 | }, 121 | { 122 | "cell_type": "code", 123 | "execution_count": 7, 124 | "metadata": {}, 125 | "outputs": [ 126 | { 127 | "data": { 128 | "image/png": "\n", 129 | "text/plain": [ 130 | "
" 131 | ] 132 | }, 133 | "metadata": { 134 | "needs_background": "light" 135 | }, 136 | "output_type": "display_data" 137 | } 138 | ], 139 | "source": [ 140 | "# Create Confusion Matrix\n", 141 | "\n", 142 | "import seaborn as sns\n", 143 | "import matplotlib.pyplot as plt\n", 144 | "\n", 145 | "from sklearn.metrics import confusion_matrix\n", 146 | "confusion_matrix(y_ho_test, predicted) \n", 147 | "\n", 148 | "f, ax = plt.subplots(figsize=(8,5))\n", 149 | "sns.heatmap(confusion_matrix(y_ho_test, predicted), annot=True, fmt=\".0f\", ax=ax)\n", 150 | "plt.xlabel(\"Predicted Class\")\n", 151 | "plt.ylabel(\"Actual Class\")\n", 152 | "plt.show()" 153 | ] 154 | }, 155 | { 156 | "cell_type": "code", 157 | "execution_count": 8, 158 | "metadata": {}, 159 | "outputs": [ 160 | { 161 | "name": "stdout", 162 | "output_type": "stream", 163 | "text": [ 164 | " precision recall f1-score support\n", 165 | "\n", 166 | " 0 1.00 0.93 0.96 300\n", 167 | " 1 0.31 1.00 0.48 10\n", 168 | "\n", 169 | " accuracy 0.93 310\n", 170 | " macro avg 0.66 0.96 0.72 310\n", 171 | "weighted avg 0.98 0.93 0.95 310\n", 172 | "\n" 173 | ] 174 | } 175 | ], 176 | "source": [ 177 | "# Performance Matrix Report - Precision Recall f1score\n", 178 | "\n", 179 | "from sklearn.metrics import classification_report\n", 180 | "print (classification_report(y_ho_test, predicted))" 181 | ] 182 | }, 183 | { 184 | "cell_type": "code", 185 | "execution_count": null, 186 | "metadata": {}, 187 | "outputs": [], 188 | "source": [] 189 | } 190 | ], 191 | "metadata": { 192 | "kernelspec": { 193 | "display_name": "Python 3", 194 | "language": "python", 195 | "name": "python3" 196 | }, 197 | "language_info": { 198 | "codemirror_mode": { 199 | "name": "ipython", 200 | "version": 3 201 | }, 202 | "file_extension": ".py", 203 | "mimetype": "text/x-python", 204 | "name": "python", 205 | "nbconvert_exporter": "python", 206 | "pygments_lexer": "ipython3", 207 | "version": "3.8.5" 208 | } 209 | }, 210 | "nbformat": 4, 211 | "nbformat_minor": 4 212 | } 213 | -------------------------------------------------------------------------------- /Testing Scratch/Dataset 2/Random Forest/RandomForestClassifier with sklearn - Dataset 1 - testingv03.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "import pandas as pd\n", 10 | "\n", 11 | "from sklearn.ensemble import RandomForestClassifier" 12 | ] 13 | }, 14 | { 15 | "cell_type": "code", 16 | "execution_count": 2, 17 | "metadata": {}, 18 | "outputs": [], 19 | "source": [ 20 | "def read_file(filename):\n", 21 | " df = pd.read_csv(filename)\n", 22 | " print(df.shape)\n", 23 | " df['label'] = df['label'].apply(lambda x: x.strip().lower())\n", 24 | " return df" 25 | ] 26 | }, 27 | { 28 | "cell_type": "code", 29 | "execution_count": 3, 30 | "metadata": {}, 31 | "outputs": [ 32 | { 33 | "name": "stdout", 34 | "output_type": "stream", 35 | "text": [ 36 | "(5400, 8)\n", 37 | "(600, 8)\n" 38 | ] 39 | } 40 | ], 41 | "source": [ 42 | "# training 5400\n", 43 | "# testing 300-300\n", 44 | "\n", 45 | "FILE_HO_TRAIN = 'training.csv'\n", 46 | "FILE_HO_TEST = 'testingv3.csv'\n", 47 | "\n", 48 | "df_ho_train = read_file(FILE_HO_TRAIN)\n", 49 | "df_ho_test = read_file(FILE_HO_TEST)" 50 | ] 51 | }, 52 | { 53 | "cell_type": "code", 54 | "execution_count": 4, 55 | "metadata": {}, 56 | "outputs": [], 57 | "source": [ 58 | "from sklearn.preprocessing import LabelEncoder\n", 59 | "\n", 60 | "target_encoder = LabelEncoder()\n", 61 | "df_ho_train['Target'] = target_encoder.fit_transform(df_ho_train['label'])\n", 62 | "df_ho_test['Target'] = target_encoder.transform(df_ho_test['label'])\n", 63 | "\n", 64 | "x_ho_train = df_ho_train.drop(['label', 'Target'], axis=1)\n", 65 | "y_ho_train = df_ho_train['Target']\n", 66 | "\n", 67 | "x_ho_test = df_ho_test.drop(['label', 'Target'], axis=1)\n", 68 | "y_ho_test = df_ho_test['Target']" 69 | ] 70 | }, 71 | { 72 | "cell_type": "code", 73 | "execution_count": 10, 74 | "metadata": {}, 75 | "outputs": [ 76 | { 77 | "data": { 78 | "text/plain": [ 79 | "0.9283333333333333" 80 | ] 81 | }, 82 | "execution_count": 10, 83 | "metadata": {}, 84 | "output_type": "execute_result" 85 | } 86 | ], 87 | "source": [ 88 | "# do like rapidminer setting\n", 89 | "# criterion entropy is information gain\n", 90 | "# estimator set 100\n", 91 | "# max depth 10\n", 92 | "\n", 93 | "modelRF = RandomForestClassifier(criterion=\"entropy\", n_estimators=100, max_depth=10)\n", 94 | "modelRF.fit(x_ho_train,y_ho_train)\n", 95 | "modelRF.score(x_ho_test,y_ho_test)" 96 | ] 97 | }, 98 | { 99 | "cell_type": "code", 100 | "execution_count": 11, 101 | "metadata": {}, 102 | "outputs": [ 103 | { 104 | "name": "stdout", 105 | "output_type": "stream", 106 | "text": [ 107 | "Akurasi pada Algoritma RandomForest menggunakan split dengan persentase 90:10 = 92.83333333333333\n" 108 | ] 109 | } 110 | ], 111 | "source": [ 112 | "from sklearn.metrics import accuracy_score, f1_score\n", 113 | "\n", 114 | "predicted= modelRF.predict(x_ho_test)\n", 115 | "\n", 116 | "accuracy_ho = accuracy_score(predicted, y_ho_test) * 100\n", 117 | "print('Akurasi pada Algoritma RandomForest menggunakan split dengan persentase 90:10 = %s' % accuracy_ho)" 118 | ] 119 | }, 120 | { 121 | "cell_type": "code", 122 | "execution_count": 12, 123 | "metadata": {}, 124 | "outputs": [ 125 | { 126 | "data": { 127 | "image/png": "\n", 128 | "text/plain": [ 129 | "
" 130 | ] 131 | }, 132 | "metadata": { 133 | "needs_background": "light" 134 | }, 135 | "output_type": "display_data" 136 | } 137 | ], 138 | "source": [ 139 | "# Create Confusion Matrix\n", 140 | "\n", 141 | "import seaborn as sns\n", 142 | "import matplotlib.pyplot as plt\n", 143 | "\n", 144 | "from sklearn.metrics import confusion_matrix\n", 145 | "confusion_matrix(y_ho_test, predicted) \n", 146 | "\n", 147 | "f, ax = plt.subplots(figsize=(8,5))\n", 148 | "sns.heatmap(confusion_matrix(y_ho_test, predicted), annot=True, fmt=\".0f\", ax=ax)\n", 149 | "plt.xlabel(\"Predicted Class\")\n", 150 | "plt.ylabel(\"Actual Class\")\n", 151 | "plt.show()" 152 | ] 153 | }, 154 | { 155 | "cell_type": "code", 156 | "execution_count": 13, 157 | "metadata": {}, 158 | "outputs": [ 159 | { 160 | "name": "stdout", 161 | "output_type": "stream", 162 | "text": [ 163 | " precision recall f1-score support\n", 164 | "\n", 165 | " 0 0.94 0.92 0.93 300\n", 166 | " 1 0.92 0.94 0.93 300\n", 167 | "\n", 168 | " accuracy 0.93 600\n", 169 | " macro avg 0.93 0.93 0.93 600\n", 170 | "weighted avg 0.93 0.93 0.93 600\n", 171 | "\n" 172 | ] 173 | } 174 | ], 175 | "source": [ 176 | "# Performance Matrix Report - Precision Recall f1score\n", 177 | "\n", 178 | "from sklearn.metrics import classification_report\n", 179 | "print (classification_report(y_ho_test, predicted))" 180 | ] 181 | }, 182 | { 183 | "cell_type": "code", 184 | "execution_count": null, 185 | "metadata": {}, 186 | "outputs": [], 187 | "source": [] 188 | } 189 | ], 190 | "metadata": { 191 | "kernelspec": { 192 | "display_name": "Python 3", 193 | "language": "python", 194 | "name": "python3" 195 | }, 196 | "language_info": { 197 | "codemirror_mode": { 198 | "name": "ipython", 199 | "version": 3 200 | }, 201 | "file_extension": ".py", 202 | "mimetype": "text/x-python", 203 | "name": "python", 204 | "nbconvert_exporter": "python", 205 | "pygments_lexer": "ipython3", 206 | "version": "3.8.5" 207 | } 208 | }, 209 | "nbformat": 4, 210 | "nbformat_minor": 4 211 | } 212 | -------------------------------------------------------------------------------- /Testing Scratch/Dataset 2/testingv1.csv: -------------------------------------------------------------------------------- 1 | total_length_out,total_length_in,avg_packet_length_out,avg_packet_length_in,avg_duration_out,avg_duration_in,total_duration,label 2 | 5856,532,585.6,53.2,1.611882,1.005457,26.173386,Trojan 3 | 19600,312,1400,52,0.000302,0.000200,0.005426,Trojan 4 | 13724,364,1055.692308,52,0.049003,0.000707,0.641126,Trojan 5 | 14378,312,1027,52,0.046377,0.000732,0.653661,Trojan 6 | 812,563,73.81818182,62.55555556,1.908398,1.516605,34.64183,Trojan 7 | 9978,4259,1108.666667,387.1818182,0.021671,0.041039,0.646469,Trojan 8 | 442,415,44.2,41.5,0.384397,1.953759,23.381559,Trojan 9 | 10089,398,840.75,49.75,0.151534,0.600073,6.618997,Trojan 10 | 1795,928,179.5,92.8,0.285265,0.037851,3.231163,Trojan 11 | 732,1266,73.2,126.6,1.288615,6.686840,79.754546,Trojan 12 | 416,15526,52,1293.833333,0.000220,0.000060,0.002477,Benign 13 | 520,13732,52,1373.2,0.000192,0.000135,0.003269,Benign 14 | 936,1526,52,763,0.000272,0.000166,0.005227,Benign 15 | 1040,0,52,0,0.000318,0.000000,0.006355,Benign 16 | 1040,0,52,0,0.000290,0.000000,0.005792,Benign 17 | 1040,0,52,0,0.000283,0.000000,0.005665,Benign 18 | 1040,0,52,0,0.000301,0.000000,0.006016,Benign 19 | 1040,0,52,0,0.000287,0.000000,0.005739,Benign 20 | 948,7000,63.2,1400,0.000202,0.000194,0.004001,Benign 21 | 1288,2800,71.55555556,1400,0.000296,0.000128,0.005577,Benign 22 | 1440,0,72,0,0.000300,0.000000,0.006005,Benign 23 | 1440,0,72,0,0.000291,0.000000,0.005827,Benign 24 | 1200,1400,63.15789474,1400,0.000262,0.000108,0.005089,Benign 25 | 1132,0,56.6,0,0.000263,0.000000,0.005254,Benign 26 | 168,16523,56,971.9411765,0.000270,0.006370,0.109101,Benign 27 | 0,24290,0,1214.5,0.000000,0.000022,0.00043,Benign 28 | 520,9107,52,910.7,0.000171,0.000012,0.001826,Benign 29 | 1040,0,52,0,0.000219,0.000000,0.004375,Benign 30 | 896,828,52.70588235,276,0.000286,0.008357,0.029939,Benign 31 | 683,23130,341.5,1285,0.167521,0.000108,0.336982,Benign 32 | 416,14210,52,1184.166667,0.000284,0.000017,0.002477,Benign 33 | 1040,0,52,0,0.000284,0.000000,0.005673,Benign 34 | 700,20376,233.3333333,1198.588235,0.041032,0.001865,0.154808,Benign 35 | 0,28000,0,1400,0.000000,0.000010,0.000191,Benign 36 | 52,26435,52,1391.315789,0.000173,0.000016,0.000472,Benign 37 | 52,26600,52,1400,0.000053,0.000011,0.000266,Benign 38 | 833,7087,59.5,1181.166667,0.000561,0.000112,0.00853,Benign 39 | 1635,52,86.05263158,52,0.000506,0.000715,0.01033,Benign 40 | 1040,0,52,0,0.000026,0.000000,0.000526,Benign 41 | 520,14000,52,1400,0.000033,0.000035,0.000673,Benign 42 | 0,28000,0,1400,0.000000,0.000026,0.000527,Benign 43 | 312,19600,52,1400,0.000024,0.000013,0.00033,Benign 44 | 104,25200,52,1400,0.000013,0.000031,25200,Benign 45 | 312,19185,52,1370.357143,0.001794,0.000032,0.011208,Benign 46 | 988,126,52,126,0.000020,0.000001,0.000377,Benign 47 | 1040,0,52,0,0.000479,0.000000,0.009583,Benign 48 | 1040,0,52,0,0.000049,0.000000,0.000977,Benign 49 | 1040,0,52,0,0.000041,0.000000,0.000819,Benign 50 | 936,364,52,182,0.000226,0.001219,0.006499,Benign 51 | 1350,17631,270,1175.4,0.009026,0.000152,0.047405,Benign 52 | 260,19618,52,1307.866667,0.001377,0.000021,0.007194,Benign 53 | 1040,0,52,0,0.000327,0.000000,0.006533,Benign 54 | 260,17868,52,1191.2,0.000028,0.000450,0.006885,Benign 55 | 104,20883,52,1160.166667,0.001297,0.000019,0.002939,Benign 56 | 1040,0,52,0,0.000385,0.000000,0.007709,Benign 57 | 468,9316,52,846.9090909,0.001572,0.003209,0.049455,Benign 58 | 927,3851,61.8,770.2,0.000741,0.000023,0.01123,Benign 59 | 208,12481,52,780.0625,0.000644,0.000191,0.005622,Benign 60 | 1006,2813,111.7777778,255.7272727,0.006983,0.008674,0.158257,Benign 61 | 699,3125,87.375,260.4166667,0.011236,0.029025,0.43818,Benign 62 | 1213,6172,121.3,617.2,0.250721,0.023988,2.747083,Benign 63 | 1792,1256,149.3333333,157,1.136611,0.153883,14.870386,Benign 64 | 468,15196,52,1381.454545,0.000602,0.000089,0.006397,Benign 65 | 367,18304,61.16666667,1307.428571,0.002962,0.000128,0.019569,Benign 66 | 777,8291,59.76923077,1036.375,0.001078,0.000154,0.015233,Benign 67 | 3869,4696,386.9,469.6,3.475922,0.008177,34.840995,Benign 68 | 5841,3387,486.75,423.375,0.082086,0.004039,1.017344,Benign 69 | 12600,572,1400,52,0.000237,0.000153,0.003819,Benign 70 | 14000,520,1400,52,0.000144,0.000122,0.002661,Benign 71 | 11049,1828,1227.666667,166.1818182,0.047967,0.028197,0.741869,Benign 72 | 13567,1138,1130.583333,142.25,0.017619,0.002865,0.234347,Benign 73 | 4443,4330,493.6666667,393.6363636,0.028444,0.018051,0.454552,Benign 74 | 6109,3281,555.3636364,364.5555556,0.021890,0.008087,0.313572,Benign 75 | 2254,2739,250.4444444,249,15.417934,0.005021,138.816634,Benign 76 | 2153,4869,215.3,486.9,0.342925,0.004785,3.477093,Benign 77 | 1498,1683,187.25,140.25,0.025416,5.005738,60.272179,Benign 78 | 644,476,58.54545455,52.88888889,5.485595,13.274716,179.813989,Benign 79 | 1144,3178,114.4,317.8,17.252015,0.003792,172.55807,Benign 80 | 2209,1777,200.8181818,177.7,1.448418,0.027127,16.203002,Benign 81 | 2001,4449,222.3333333,404.4545455,0.006135,0.005726,0.118203,Benign 82 | 1810,3857,201.1111111,350.6363636,13.578150,0.011711,122.33217,Benign 83 | 1621,1676,202.625,139.6666667,0.012625,8.470028,101.741331,Benign 84 | 6235,4772,566.8181818,530.2222222,1.324204,0.007430,14.633111,Benign 85 | 1606,3713,160.6,371.3,0.118602,0.022756,1.413574,Benign 86 | 1873,1015,208.1111111,92.27272727,0.021721,0.002725,0.22546,Benign 87 | 671,4417,83.875,368.0833333,0.677163,0.009401,5.530116,Benign 88 | 1945,1825,176.8181818,202.7777778,0.011313,0.016697,0.274721,Benign 89 | 536,536,67,53.6,0.749217,10.893955,116.274163,Benign 90 | 0,22667,0,1133.35,0.000000,0.000021,0.000416,Benign 91 | 1309,9029,187,694.5384615,0.000454,0.000217,0.005991,Benign 92 | 1469,178,81.61111111,89,0.000649,0.001243,0.014167,Benign 93 | 620,9612,56.36363636,1068,0.001414,0.000027,0.015794,Benign 94 | 1245,7338,103.75,917.25,0.007912,0.000309,0.097412,Benign 95 | 588,6543,53.45454545,727,0.001994,0.002583,0.045183,Benign 96 | 416,13312,52,1109.333333,0.001624,0.000021,0.013243,Benign 97 | 1995,2681,153.4615385,446.8333333,0.002476,0.000645,0.036259,Benign 98 | 1019,10573,169.8333333,755.2142857,0.000859,0.000174,0.007595,Benign 99 | 1255,1615,83.66666667,323,0.004289,0.001328,0.070978,Benign 100 | 665,665,95,95,0.000552,0.000552,0.0603,Benign 101 | 4562,972,380.1666667,121.5,0.003639,0.000355,0.046501,Benign 102 | 649,23116,324.5,1284.222222,0.017717,0.000099,0.03722,Benign 103 | 0,28000,0,1400,0.000000,0.000027,0.000549,Benign 104 | 0,28000,0,1400,0.000000,0.000010,0.0002,Benign 105 | 700,23032,233.3333333,1354.823529,0.000722,0.000017,0.002458,Benign 106 | 0,28000,0,1400,0.000000,0.000011,0.000225,Benign 107 | 0,28000,0,1400,0.000000,0.000009,0.000178,Benign 108 | 52,26600,52,1400,0.000061,0.000016,0.000372,Benign 109 | 312,18252,52,1303.714286,0.000275,0.000204,0.004511,Benign 110 | 7859,7243,714.4545455,804.7777778,0.000766,0.000378,0.01183,Benign 111 | 864,18853,144,1346.642857,0.000329,0.005011,0.005011,Benign 112 | 0,26675,0,1333.75,0.000000,0.000100,0.001998,Benign 113 | 0,26464,0,1323.2,0.000000,0.000035,0.000704,Benign 114 | 208,19301,52,1206.3125,0.000290,0.000090,0.002598,Benign 115 | 416,16800,52,1400,0.000694,0.000149,0.007337,Benign 116 | 12238,2427,1359.777778,220.6363636,0.000878,0.039199,0.439087,Benign 117 | 656,7125,72.88888889,647.7272727,3.865589,0.003982,34.834099,Benign 118 | 1452,1447,145.2,144.7,0.005393,0.011599,0.169923,Benign 119 | 1222,3767,135.7777778,342.4545455,0.035985,0.011201,0.447081,Benign 120 | 1449,1695,181.125,141.25,0.026967,19.968305,239.835392,Benign 121 | 708,504,70.8,50.4,2.454773,0.027523,24.822953,Benign 122 | 2272,2611,252.4444444,237.3636364,55.823881,0.007820,502.500952,Benign 123 | 2765,2876,276.5,287.6,0.240199,0.026164,2.663622,Benign 124 | 1715,1872,142.9166667,234,5.016834,29.997419,300.181352,Benign 125 | 3831,1499,383.1,149.9,0.019733,0.003069,0.228017,Benign 126 | 1901,2646,190.1,264.6,2.319603,0.028269,23.478724,Benign 127 | 3911,3027,391.1,302.7,0.080811,0.120381,2.01192,Benign 128 | 3749,2247,374.9,224.7,1.006559,0.083725,10.902833,Benign 129 | 2515,4145,251.5,414.5,0.343112,0.434465,7.775766,Benign 130 | 640,5266,64,526.6,0.001436,0.000747,0.021831,Benign 131 | 640,4705,64,470.5,0.001078,0.001503,0.025806,Benign 132 | 448,6771,64,520.8461538,0.000679,0.000667,0.013428,Benign 133 | 648,4110,54,513.75,0.000890,0.000510,0.014751,Benign 134 | 1475,2291,122.9166667,286.375,0.537064,0.023667,6.634098,Benign 135 | 1570,7112,157,711.2,3.330282,0.002362,33.326437,Benign 136 | 3758,1147,341.6363636,127.4444444,0.010364,0.001746,0.129727,Benign 137 | 7190,1922,653.6363636,213.5555556,0.031467,0.030463,0.620305,Benign 138 | 2772,829,252,92.11111111,6.089106,0.036370,67.307489,Benign 139 | 4578,893,457.8,89.3,1.174172,0.034073,12.082454,Benign 140 | 882,625,67.84615385,89.28571429,8.879899,0.006421,115.483628,Benign 141 | 4336,1834,394.1818182,203.7777778,0.000623,0.000420,0.010631,Benign 142 | 1257,598,89.78571429,99.66666667,4.346160,40.000051,300.846552,Benign 143 | 6060,1307,550.9090909,145.2222222,0.047773,0.000473,0.529757,Benign 144 | 0,20830,0,1041.5,0.000000,0.001919,0.038374,Benign 145 | 696,11618,77.33333333,1056.181818,0.000201,0.000008,0.001893,Benign 146 | 1560,0,78,0,0.000301,0.000000,0.00602,Benign 147 | 838,4705,76.18181818,522.7777778,0.031981,0.029532,0.617579,Benign 148 | 1065,9758,118.3333333,887.0909091,0.003838,0.000256,0.037358,Benign 149 | 1399,7592,174.875,632.6666667,0.001163,0.000183,0.011497,Benign 150 | 1723,6289,132.5384615,898.4285714,0.001663,0.000283,0.023606,Benign 151 | 1493,7375,186.625,614.5833333,0.001381,0.000301,0.014656,Benign 152 | --------------------------------------------------------------------------------