├── memo ├── timer.h ├── asura.jpeg ├── pcaps └── 1.pcap ├── dtreeviz ├── 1.png ├── README.md ├── 1.py └── tmp ├── reduce_local_counts_to_global_count.h ├── reduce_local_sums_to_global_sum.h ├── view.h ├── reduce_min_ind.h ├── scatter.py ├── kmeans_tbb.cpp ├── Makefile ├── reduced2-bak ├── sum_and_count.h ├── repair_empty_clusters.cpp ├── kmeans.h ├── kmeans_tbb.h ├── tmp ├── README.md ├── test_kmeans.cpp ├── asura23.cpp └── reduced2 /memo: -------------------------------------------------------------------------------- 1 | shuf -n 1000 reduced2 > tmp 2 | 3 | -------------------------------------------------------------------------------- /timer.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RuoAndo/Asura/HEAD/timer.h -------------------------------------------------------------------------------- /asura.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RuoAndo/Asura/HEAD/asura.jpeg -------------------------------------------------------------------------------- /pcaps/1.pcap: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RuoAndo/Asura/HEAD/pcaps/1.pcap -------------------------------------------------------------------------------- /dtreeviz/1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RuoAndo/Asura/HEAD/dtreeviz/1.png -------------------------------------------------------------------------------- /dtreeviz/README.md: -------------------------------------------------------------------------------- 1 |
2 | # pip install pip install dtreeviz
3 | # python 1.py
4 | 
5 | 6 | 7 | 8 | -------------------------------------------------------------------------------- /reduce_local_counts_to_global_count.h: -------------------------------------------------------------------------------- 1 | void convert_local_counts_to_global_count( tls_type& tls, view& global ) { 2 | global.change = 0; 3 | for( auto i=tls.begin(); i!=tls.end(); ++i ) { 4 | view& v = *i; 5 | global.change += i->change; 6 | v.change = 0; 7 | } 8 | } 9 | -------------------------------------------------------------------------------- /reduce_local_sums_to_global_sum.h: -------------------------------------------------------------------------------- 1 | void convert_local_sums_to_global_sum( size_t k, tls_type& tls, view& global ) { 2 | for( auto i=tls.begin(); i!=tls.end(); ++i ) { 3 | view& v = *i; 4 | for( size_t j=0; j tls_type; -------------------------------------------------------------------------------- /reduce_min_ind.h: -------------------------------------------------------------------------------- 1 | int calc_shortest_index( const point centroid[], size_t k, point value ) { 2 | int min = -1; 3 | float mind = std::numeric_limits::max(); 4 | for( int j=0; jdst") 19 | plt.xlabel("counts") 20 | plt.ylabel("bytes") 21 | plt.grid() 22 | 23 | plt.show() 24 | -------------------------------------------------------------------------------- /dtreeviz/1.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import matplotlib.pyplot as plt 3 | from sklearn.tree import DecisionTreeRegressor 4 | from dtreeviz.trees import * 5 | 6 | df_cars = pd.read_csv("tmp") 7 | X, y = df_cars[['COUNTS']], df_cars['BYTES'] 8 | 9 | dt = DecisionTreeRegressor(max_depth=3, criterion="mae") 10 | dt.fit(X, y) 11 | 12 | fig = plt.figure() 13 | ax = fig.gca() 14 | rtreeviz_univar(dt, X, y, 'COUNTS', 'BYTES', ax=ax) 15 | plt.show() 16 | -------------------------------------------------------------------------------- /kmeans_tbb.cpp: -------------------------------------------------------------------------------- 1 | #define NOMINMAX // Required on Windows 2 | #include 3 | #include 4 | #include 5 | #include "tbb/tbb.h" 6 | 7 | #include "kmeans.h" 8 | #include "view.h" 9 | #include "reduce_local_counts_to_global_count.h" 10 | #include "reduce_local_sums_to_global_sum.h" 11 | #include "reduce_min_ind.h" 12 | 13 | namespace tbb_asura { 14 | 15 | #include "kmeans_tbb.h" 16 | 17 | } // namespace tbb_example 18 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | LIBROOT = /usr/local/lib 2 | CXXFLAGS = -Wall -Wextra -Wpedantic -std=c++11 -Os -O2 -fpermissive -lpthread -ltbb 3 | LDFLAGS = -L$(LIBROOT) -I$(LIBROOT) -ltins -lpcap -L/usr/local/lib/ 4 | 5 | PROGS = asura 6 | 7 | UNAME := $(shell uname -s) 8 | ifeq ($(UNAME), $(filter $(UNAME), Darwin FreeBSD)) 9 | CC = clang 10 | else 11 | CC = gcc 12 | endif 13 | 14 | 15 | $(PROGS): asura23.cpp kmeans_tbb.cpp repair_empty_clusters.cpp 16 | $(CXX) -o $@ $^ $(CXXFLAGS) $(LDFLAGS) 17 | strip -s $(PROGS) 18 | clean: 19 | rm -f $(PROGS) *.o 20 | -------------------------------------------------------------------------------- /reduced2-bak: -------------------------------------------------------------------------------- 1 | 298,388890 2 | 64,23042 3 | 20,9818 4 | 546,82336 5 | 10,646 6 | 8,416 7 | 18,1980 8 | 14,820 9 | 8,1232 10 | 14,2470 11 | 2,165 12 | 6,312 13 | 30,10468 14 | 4,762 15 | 552,38934 16 | 16,928 17 | 38,10162 18 | 4,278 19 | 12,1170 20 | 30,3534 21 | 392,429312 22 | 10,1170 23 | 24,8646 24 | 6,3300 25 | 4,232 26 | 2,202 27 | 106,99562 28 | 432,596200 29 | 458,457050 30 | 10,5610 31 | 12,1150 32 | 16,4174 33 | 52,14422 34 | 22,6986 35 | 8,4576 36 | 542,109432 37 | 352,103728 38 | 96,45634 39 | 562,39616 40 | 4,208 41 | 12,7778 42 | 30,2740 43 | 12,1848 44 | 574,39350 45 | 230,40244 46 | 10,1546 47 | 2,80 48 | 82,19624 49 | 28,3572 50 | 422,53020 51 | -------------------------------------------------------------------------------- /sum_and_count.h: -------------------------------------------------------------------------------- 1 | // Structure that holds a sum of points and the number of points 2 | // accumulated into that sum, so that the mean can be computed. 3 | struct sum_and_count { 4 | sum_and_count() : sum(), count(0) {} 5 | point sum; 6 | size_t count; 7 | void clear() { 8 | sum = point(); 9 | count = 0; 10 | } 11 | void tally( const point& p ) { 12 | sum += p; 13 | ++count; 14 | } 15 | point mean() const { 16 | return sum/count; 17 | } 18 | void operator+=( const sum_and_count& other ) { 19 | sum += other.sum; 20 | count += other.count; 21 | }; 22 | }; -------------------------------------------------------------------------------- /repair_empty_clusters.cpp: -------------------------------------------------------------------------------- 1 | #include "kmeans.h" 2 | 3 | void fix_empty_clusters( size_t n, const point points[], cluster_id id[], size_t k, point centroid[], sum_and_count sum[] ) { 4 | for( size_t j=0; jmaxd ) { 14 | maxd = d; 15 | farthest = i; 16 | } 17 | } 18 | //#endif 19 | id[farthest] = j; 20 | sum[j].count = 1; 21 | sum[j].sum = points[farthest]; 22 | } 23 | } 24 | } 25 | -------------------------------------------------------------------------------- /kmeans.h: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | struct point { 4 | float x, y; 5 | void operator+=( const point& right ) { 6 | x += right.x; 7 | y += right.y; 8 | } 9 | point operator/( size_t count ) const { 10 | point p = *this; 11 | p.x /= count; 12 | p.y /= count; 13 | return p; 14 | } 15 | }; 16 | 17 | inline float distance2( const point& a, const point& b ) { 18 | float dx = a.x-b.x; 19 | float dy = a.y-b.y; 20 | return dx*dx+dy*dy; 21 | } 22 | 23 | #include "sum_and_count.h" 24 | 25 | typedef unsigned short cluster_id; 26 | 27 | void fix_empty_clusters( size_t n, const point points[], cluster_id id[], size_t k, point centroid[], sum_and_count sum[] ); 28 | 29 | /* 30 | namespace cilk_example { 31 | void compute_k_means( size_t n, const point points[], size_t k, cluster_id id[], point centroid[] ); 32 | } 33 | */ 34 | 35 | namespace tbb_asura { 36 | void do_k_means( size_t n, const point points[], size_t k, cluster_id id[], point centroid[] ); 37 | } 38 | -------------------------------------------------------------------------------- /kmeans_tbb.h: -------------------------------------------------------------------------------- 1 | void do_k_means( size_t n, const point points[], size_t k, cluster_id id[], point centroid[] ) { 2 | 3 | tls_type tls([&]{return k;}); 4 | view global(k); 5 | 6 | tbb::parallel_for( 7 | tbb::blocked_range(0,n), 8 | [=,&tls,&global]( tbb::blocked_range r ) { 9 | view& v = tls.local(); 10 | for( size_t i=r.begin(); i!=r.end(); ++i ) { 11 | id[i] = i % k; 12 | // Peeled "Sum step" 13 | v.array[id[i]].tally(points[i]); 14 | } 15 | } 16 | ); 17 | 18 | size_t change; 19 | do { 20 | convert_local_sums_to_global_sum( k, tls, global ); 21 | fix_empty_clusters( n, points, id, k, centroid, global.array ); 22 | 23 | for( size_t j=0; j(0,n), 30 | [=,&tls,&global]( tbb::blocked_range r ) { 31 | view& v = tls.local(); 32 | for( size_t i=r.begin(); i!=r.end(); ++i ) { 33 | cluster_id j = calc_shortest_index(centroid, k , points[i]); 34 | if( j!=id[i] ) { 35 | id[i] = j; 36 | ++v.change; 37 | } 38 | v.array[j].tally(points[i]); 39 | } 40 | } 41 | ); 42 | 43 | convert_local_counts_to_global_count( tls, global ); 44 | } while( global.change!=0 ); 45 | } 46 | -------------------------------------------------------------------------------- /tmp: -------------------------------------------------------------------------------- 1 | 1,40 2 | 1459,84866 3 | 2,80 4 | 2833,241780 5 | 2988,187275 6 | 7,448 7 | 460,25606 8 | 633,34543 9 | 49328,2.21751e+06 10 | 12,720 11 | 12,720 12 | 113748,9.86882e+06 13 | 42,1766 14 | 22,1180 15 | 9,468 16 | 7,448 17 | 36654,1.48253e+06 18 | 12,720 19 | 2004,120240 20 | 2301,198771 21 | 562,24394 22 | 87613,6.07037e+06 23 | 4785,322049 24 | 7737,702600 25 | 8,336 26 | 5,200 27 | 5,320 28 | 2027,368418 29 | 144147,6.38727e+06 30 | 18552,949675 31 | 36,1682 32 | 6057,313479 33 | 3055,231404 34 | 35,2240 35 | 30,4740 36 | 2005,120300 37 | 2011,120660 38 | 2871,208006 39 | 203,8504 40 | 11,704 41 | 6434,354703 42 | 34,1420 43 | 2,754 44 | 5,200 45 | 8,2453 46 | 1260,50400 47 | 512,27328 48 | 2906,159921 49 | 3051,168545 50 | 36734,1.61892e+06 51 | 2,88 52 | 8,416 53 | 1,152 54 | 8,6356 55 | 336,13836 56 | 319,16338 57 | 3357,149826 58 | 49002,2.05695e+06 59 | 4090,280469 60 | 5,320 61 | 6,384 62 | 13,740 63 | 3,252 64 | 2021,89300 65 | 18566,2.40658e+06 66 | 1543,118895 67 | 2530,158477 68 | 13,892 69 | 7364,493109 70 | 6577,346672 71 | 37800,1.71083e+06 72 | 36197,1.44788e+06 73 | 1,40 74 | 8,336 75 | 1814,106880 76 | 8,336 77 | 3269,131080 78 | 8,336 79 | 142,40720 80 | 1413,59633 81 | 1201,54730 82 | 5052,303120 83 | 8,336 84 | 5,320 85 | 4,168 86 | 2590,158790 87 | 922,47954 88 | 961,55006 89 | 515,31285 90 | 2663,117364 91 | 5021,200840 92 | 7,448 93 | 38,2110 94 | 1357,118210 95 | 57283,2.3587e+06 96 | 18704,2.40904e+06 97 | 4389,305176 98 | 37350,1.59014e+06 99 | 5,1840 100 | 45555,2.00446e+06 101 | 5,320 102 | 1674,90563 103 | 3976,218128 104 | 3227,129424 105 | 12,720 106 | 3259,172496 107 | 212757,1.90418e+07 108 | 4513,184602 109 | 2551,155293 110 | 48,2486 111 | 12,720 112 | 56,3584 113 | 32821,1.44452e+06 114 | 8,336 115 | 47,2246 116 | 4,240 117 | 861,48490 118 | 103,10746 119 | 474,25486 120 | 68694,2.74806e+06 121 | 93578,6.22639e+06 122 | 4273,292429 123 | 16,960 124 | 6,240 125 | 1168,47086 126 | 1294,59918 127 | 12,768 128 | 35,2240 129 | 6104,382255 130 | 5669,345137 131 | 36205,1.59318e+06 132 | 2988,155330 133 | 18,757 134 | 584,32476 135 | 141,7598 136 | 3506,174234 137 | 8948,490344 138 | 36810,1.51835e+06 139 | 8,512 140 | 173,14420 141 | 3193,191540 142 | 1156,46479 143 | 12,504 144 | 2,128 145 | 7760,514978 146 | 32811,1.31244e+06 147 | 12,504 148 | 7,280 149 | 4,200 150 | 1,44 151 | 233,10236 152 | 58105,2.32422e+06 153 | 8,787 154 | 36979,1.67362e+06 155 | 54,3060 156 | 5256,389060 157 | 6,384 158 | 3,363 159 | 18707,2.40858e+06 160 | 1532,73227 161 | 12,504 162 | 1,84 163 | 2015,120900 164 | 1490,112296 165 | 2720,109012 166 | 3247,144870 167 | 58492,2.57383e+06 168 | 3671,223235 169 | 3377,201886 170 | 3337,200544 171 | 6955,482422 172 | 4182,167508 173 | 5,260 174 | 2028,89616 175 | 107,21192 176 | 3223,191110 177 | 36736,1.62466e+06 178 | 18535,776730 179 | 24600,1.05377e+06 180 | 68540,3.01598e+06 181 | 7479,505822 182 | 6688,400927 183 | 7,448 184 | 6,320 185 | 2,135 186 | 143,7594 187 | 4751,283937 188 | 2009,120540 189 | 9366,562608 190 | 4473,267712 191 | 2,428 192 | 3155,168995 193 | 2736,158063 194 | 651,36112 195 | 27,1080 196 | 4271,219702 197 | 2357,156725 198 | 14,768 199 | 114,7200 200 | 1186,56107 201 | 1221,55876 202 | 6334,422758 203 | 7,448 204 | 2007,120420 205 | 8034,545426 206 | 14,696 207 | 36421,1.4571e+06 208 | 6,240 209 | 2,428 210 | 1730,107629 211 | 157,8312 212 | 2339,99378 213 | 70813,4.2474e+06 214 | 4339,273937 215 | 12,504 216 | 11,484 217 | 12,1667 218 | 18548,950511 219 | 4114,229268 220 | 2,210 221 | 4348,261054 222 | 20,880 223 | 112,7896 224 | 44,2320 225 | 8,336 226 | 3146,136999 227 | 5498,252529 228 | 42,2304 229 | 8,320 230 | 92,4004 231 | 18565,946455 232 | 1158,54169 233 | 301,13452 234 | 12,504 235 | 6379,372480 236 | 48127,2.11788e+06 237 | 6,384 238 | 354,15754 239 | 3647,206914 240 | 3217,146468 241 | 28,1228 242 | 6721,390804 243 | 7020,450738 244 | 867,38144 245 | -------------------------------------------------------------------------------- /dtreeviz/tmp: -------------------------------------------------------------------------------- 1 | COUNTS,BYTES 2 | 1,40 3 | 1459,84866 4 | 2,80 5 | 2833,241780 6 | 2988,187275 7 | 7,448 8 | 460,25606 9 | 633,34543 10 | 49328,2.21751e+06 11 | 12,720 12 | 12,720 13 | 113748,9.86882e+06 14 | 42,1766 15 | 22,1180 16 | 9,468 17 | 7,448 18 | 36654,1.48253e+06 19 | 12,720 20 | 2004,120240 21 | 2301,198771 22 | 562,24394 23 | 87613,6.07037e+06 24 | 4785,322049 25 | 7737,702600 26 | 8,336 27 | 5,200 28 | 5,320 29 | 2027,368418 30 | 144147,6.38727e+06 31 | 18552,949675 32 | 36,1682 33 | 6057,313479 34 | 3055,231404 35 | 35,2240 36 | 30,4740 37 | 2005,120300 38 | 2011,120660 39 | 2871,208006 40 | 203,8504 41 | 11,704 42 | 6434,354703 43 | 34,1420 44 | 2,754 45 | 5,200 46 | 8,2453 47 | 1260,50400 48 | 512,27328 49 | 2906,159921 50 | 3051,168545 51 | 36734,1.61892e+06 52 | 2,88 53 | 8,416 54 | 1,152 55 | 8,6356 56 | 336,13836 57 | 319,16338 58 | 3357,149826 59 | 49002,2.05695e+06 60 | 4090,280469 61 | 5,320 62 | 6,384 63 | 13,740 64 | 3,252 65 | 2021,89300 66 | 18566,2.40658e+06 67 | 1543,118895 68 | 2530,158477 69 | 13,892 70 | 7364,493109 71 | 6577,346672 72 | 37800,1.71083e+06 73 | 36197,1.44788e+06 74 | 1,40 75 | 8,336 76 | 1814,106880 77 | 8,336 78 | 3269,131080 79 | 8,336 80 | 142,40720 81 | 1413,59633 82 | 1201,54730 83 | 5052,303120 84 | 8,336 85 | 5,320 86 | 4,168 87 | 2590,158790 88 | 922,47954 89 | 961,55006 90 | 515,31285 91 | 2663,117364 92 | 5021,200840 93 | 7,448 94 | 38,2110 95 | 1357,118210 96 | 57283,2.3587e+06 97 | 18704,2.40904e+06 98 | 4389,305176 99 | 37350,1.59014e+06 100 | 5,1840 101 | 45555,2.00446e+06 102 | 5,320 103 | 1674,90563 104 | 3976,218128 105 | 3227,129424 106 | 12,720 107 | 3259,172496 108 | 212757,1.90418e+07 109 | 4513,184602 110 | 2551,155293 111 | 48,2486 112 | 12,720 113 | 56,3584 114 | 32821,1.44452e+06 115 | 8,336 116 | 47,2246 117 | 4,240 118 | 861,48490 119 | 103,10746 120 | 474,25486 121 | 68694,2.74806e+06 122 | 93578,6.22639e+06 123 | 4273,292429 124 | 16,960 125 | 6,240 126 | 1168,47086 127 | 1294,59918 128 | 12,768 129 | 35,2240 130 | 6104,382255 131 | 5669,345137 132 | 36205,1.59318e+06 133 | 2988,155330 134 | 18,757 135 | 584,32476 136 | 141,7598 137 | 3506,174234 138 | 8948,490344 139 | 36810,1.51835e+06 140 | 8,512 141 | 173,14420 142 | 3193,191540 143 | 1156,46479 144 | 12,504 145 | 2,128 146 | 7760,514978 147 | 32811,1.31244e+06 148 | 12,504 149 | 7,280 150 | 4,200 151 | 1,44 152 | 233,10236 153 | 58105,2.32422e+06 154 | 8,787 155 | 36979,1.67362e+06 156 | 54,3060 157 | 5256,389060 158 | 6,384 159 | 3,363 160 | 18707,2.40858e+06 161 | 1532,73227 162 | 12,504 163 | 1,84 164 | 2015,120900 165 | 1490,112296 166 | 2720,109012 167 | 3247,144870 168 | 58492,2.57383e+06 169 | 3671,223235 170 | 3377,201886 171 | 3337,200544 172 | 6955,482422 173 | 4182,167508 174 | 5,260 175 | 2028,89616 176 | 107,21192 177 | 3223,191110 178 | 36736,1.62466e+06 179 | 18535,776730 180 | 24600,1.05377e+06 181 | 68540,3.01598e+06 182 | 7479,505822 183 | 6688,400927 184 | 7,448 185 | 6,320 186 | 2,135 187 | 143,7594 188 | 4751,283937 189 | 2009,120540 190 | 9366,562608 191 | 4473,267712 192 | 2,428 193 | 3155,168995 194 | 2736,158063 195 | 651,36112 196 | 27,1080 197 | 4271,219702 198 | 2357,156725 199 | 14,768 200 | 114,7200 201 | 1186,56107 202 | 1221,55876 203 | 6334,422758 204 | 7,448 205 | 2007,120420 206 | 8034,545426 207 | 14,696 208 | 36421,1.4571e+06 209 | 6,240 210 | 2,428 211 | 1730,107629 212 | 157,8312 213 | 2339,99378 214 | 70813,4.2474e+06 215 | 4339,273937 216 | 12,504 217 | 11,484 218 | 12,1667 219 | 18548,950511 220 | 4114,229268 221 | 2,210 222 | 4348,261054 223 | 20,880 224 | 112,7896 225 | 44,2320 226 | 8,336 227 | 3146,136999 228 | 5498,252529 229 | 42,2304 230 | 8,320 231 | 92,4004 232 | 18565,946455 233 | 1158,54169 234 | 301,13452 235 | 12,504 236 | 6379,372480 237 | 48127,2.11788e+06 238 | 6,384 239 | 354,15754 240 | 3647,206914 241 | 3217,146468 242 | 28,1228 243 | 6721,390804 244 | 7020,450738 245 | 867,38144 246 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Massive Pcap Analyzer with Massive Multithreading (DEF CON 26) 2 | 3 | “The universe is not complicated, there's just a lot of it.” - Richard Feynman 4 | 5 | 6 | 7 |
  8 | Required: 
  9 | libtins
 10 | http://libtins.github.io/ 
 11 | Intel TBB
 12 | 
 13 | # apt install libpcap-dev
 14 | # apt install libtins-dev
 15 | 
 16 | quick test:
 17 | # ./asura pcaps
 18 | 
19 | With the rapid increase of network traffic, we are suffering the long processing time in coping with large packet capture file in many aspects such as network diagnosis, incidents response and CTF. Wireshark and Linux grep command are basically run in single-process mode and are thus not good at analyzing tens or hundreds gigabytes of packet dump in reasonable processing time. On the other hand, high performance devices such as GPGPU are still bulky and expensive for running on our laptop. 20 | 21 | Asura One is a portable anomaly detector of large packet capture file (around 100GB) leveraged by raw thread (Posix Pthreads) and highly concurrent container of Intel TBB (Threading Building Block). Asura One is designed for the lightweight anomaly detection of large PCAP file ranging from tens to hundreds million packets with the parallel clustering algorithm. Asura One takes two steps: extracting the flow vector {(srcIP,dstIP), X,Y..} and the clustering of which the outputs are {(srcIP,dstIP), Anomaly_Score}. The flow vector {(srcIP,dstIP), X,Y.. } is implemented as the concurrent hashmap of Intel TBB offering a much higher level of concurrency. Compared with the conventional C++ or Java hashmap such as STL, Intel TBB concurrent hashmap adopts lock-free techniques and is thus so faster. Another thrust of Asura One is leveraging task-decomposition by raw threads. Asura's task decomposition based multithreading can handle various types and lengths of IP packets and transforms these into well-organized input of {(srcIP,dstIP), X,Y..}. Asura One with its outputs of {(srcIP,dstIP), Anomaly_Score} should be a good partner (as pre-filter) of Wireshark. 22 | 23 | In experiment, Asura One has processed about 400,000,000 packets with 83GB PCAP file size in about 30 minutes on laptop PC. Packet dump was drastically reduced from 83GB to 7MB with about 110,000 aggregated flow vectors which are feasible for the further careful inspection by Wireshark. 24 | 25 | 1.Lisence: Asura One is now released as open source under MIT license. 26 | 27 | 2.Compiler and libraries: gcc version 7.3.0 (Ubuntu 7.3.0), Posix Pthreads and Intel TBB. 28 | 29 | 3.Input: PCAP files in the directory 30 | 31 | 4.Usage: ./asura DIRECTORY_NAME 32 | 33 |
 34 | 	# mkdir pcap
 35 |         # cd pcap 
 36 | 	# wget -r -l 4 https://download.netresec.com/pcap/ists-12/2015-03-07/
 37 | 	# cp -r download.netresec.com/pcap/ists-12/2015-03-07/* .
 38 | 	# rm -rf download.netresec.com
 39 |         # cd ..
 40 | 	# ./build-asura.sh 
 41 |         # ./asura pcap
 42 | 
43 | 44 | 5.Output: {, Anomaly_Score} 45 | 46 |
 47 | 	sourceIP:sourcePort => destinationIP:destinationPort, clusterID, points(X,Y), percentage
 48 |         173.194.70.94:173.194.70.94=>10.0.2.16:10.0.2.16, clusterID:1, data(954,478932), 22%
 49 |         10.0.2.16:10.0.2.16=>195.169.125.228:195.169.125.228, clusterID:2, data(141,7144), 55%
 50 |       X.X.X.X,Z.Z.Z.Z -> 5 (73445,48),288,118644,0.242743%
 51 | 
52 |   53 | If precentage is low, the pair(srcIP,srcPort->dstIP,dstPort) could be anomaly. 54 | 55 | 6.Procedure 1: extracting flow vector {, X, Y} 56 |
 57 | 	Container: typedef concurrent_hash_map
 58 | 
59 | 60 |
 61 | 	Main loop: 
 62 |   	pthread_create(&master, NULL, (void*)master_func, (void*)&targ[0]);
 63 |     	  for (i = 1; i < thread_num; ++i) { 
 64 |         	targ[i].id = i;
 65 |        		pthread_create(&worker[i],NULL,(void*)worker_func,(void*)&targ[i]); }
 66 |     	  for (i = 1; i < thread_num; ++i) 
 67 |  	       pthread_join(worker[i], NULL);
 68 | 
69 | 70 | 7.Procedure 2: calculating anomaly score {, Anomaly_Score} 71 |
 72 | 	  Main loop (K-Means):
 73 | 	  tbb::parallel_for(
 74 |           tbb::blocked_range(0,n),
 75 |           [=,&tls,&global]( tbb::blocked_range r ) {
 76 |               view& v = tls.local();
 77 |               for( size_t i=r.begin(); i!=r.end(); ++i ) {
 78 |                   cluster_id j = calc_shortest_index(centroid, k , points[i]); 
 79 |                   if( j!=id[i] ) {
 80 |                       id[i] = j;
 81 |                       ++v.change;
 82 |                   }
 83 |                   v.array[j].tally(points[i]);
 84 |               }
 85 |             }
 86 | 
87 | 88 | 8. Processing time: about 30 minutes for about 400,000,000 packets 89 | 90 |
91 | 2022-01-11 92 | libpcap and libtins are required. 93 | 94 |
 95 | # apt install libpcap-dev
 96 | # git clone https://github.com/mfontanini/libtins.git
 97 | # cd libtins/
 98 | # apt-get install libpcap-dev libssl-dev cmake
 99 | # mkdir build
100 | # cd build/
101 | # cmake ../ -DLIBTINS_ENABLE_CXX11=1
102 | # make install
103 | # ldconfig
104 | 
105 | -------------------------------------------------------------------------------- /test_kmeans.cpp: -------------------------------------------------------------------------------- 1 | #include "kmeans.h" 2 | #include 3 | #include 4 | #include 5 | #include 6 | 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include 14 | #include 15 | #include 16 | #include 17 | #include "timer.h" 18 | 19 | #include 20 | 21 | #include 22 | #include 23 | #include 24 | #include 25 | #include 26 | #include 27 | #include 28 | #include 29 | #include 30 | #include 31 | #include 32 | #include 33 | #include 34 | 35 | #include 36 | #include 37 | #include 38 | #include 39 | #include 40 | #include 41 | 42 | #include 43 | #include 44 | #include 45 | 46 | #include 47 | 48 | using namespace std; 49 | // using namespace tbb; 50 | 51 | // Square root of number of intended centroids 52 | /* 53 | const size_t SQRT_K = 4; 54 | const size_t K = SQRT_K*SQRT_K; 55 | const size_t N = 533; 56 | 57 | point points[N]; 58 | point centroid[K]; 59 | cluster_id id[N]; 60 | */ 61 | 62 | std::vector < std::vector< std::string > > parse_csv(const char* filepath) 63 | { 64 | std::vector< std::vector< std::string > > cells; 65 | std::string line; 66 | std::ifstream ifs(filepath); 67 | 68 | // csvを走査 69 | while (std::getline(ifs, line)) { 70 | 71 | std::vector< std::string > data; 72 | 73 | // 1行を走査 74 | boost::tokenizer< boost::escaped_list_separator< char > > tokens(line); 75 | for (const std::string& token : tokens) { 76 | data.push_back(token); 77 | } 78 | 79 | // 1行読み込んだ結果を入れる 80 | cells.push_back(data); 81 | } 82 | 83 | return cells; 84 | } 85 | 86 | /* 87 | int main(int argc, char* argv[]) { 88 | 89 | const size_t SQRT_K = 4; 90 | const size_t K = SQRT_K*SQRT_K; 91 | const size_t N = 533; 92 | 93 | point* points; 94 | points = (struct point *)malloc(N*sizeof(struct point)); 95 | 96 | point* centroid; 97 | centroid = (struct point *)malloc(N*sizeof(struct point)); 98 | 99 | cluster_id* id; 100 | id = (unsigned short *)malloc(N*sizeof(unsigned short)); 101 | 102 | std::vector pair; 103 | 104 | printf("Testing TBB kmeans algorithm...\n"); 105 | 106 | const auto cells = parse_csv(argv[1]); 107 | 108 | int counter = 0; 109 | for (const auto& rows : cells) { 110 | 111 | point& p = points[counter]; 112 | 113 | p.x = std::stof(rows[1]); 114 | p.y = std::stof(rows[2]); 115 | pair.push_back(rows[0]); 116 | 117 | counter = counter + 1; 118 | } 119 | 120 | tbb_example::compute_k_means( N, points, K, id, centroid ); 121 | 122 | #if 1 123 | 124 | int* counts; 125 | counts = (int *)malloc(K*sizeof(int)); 126 | 127 | for( size_t i=0; i bset_pair = std::bitset<64>(pair_long); 140 | string bset_pair_string = bset_pair.to_string(); 141 | 142 | std::string ip1 = bset_pair_string.substr(0, 8); 143 | std::string ip2 = bset_pair_string.substr(8, 8); 144 | std::string ip3 = bset_pair_string.substr(16, 8); 145 | std::string ip4 = bset_pair_string.substr(24, 8); 146 | 147 | std::bitset<8> ip1_bset = std::bitset<8>(ip1); 148 | std::bitset<8> ip2_bset = std::bitset<8>(ip2); 149 | std::bitset<8> ip3_bset = std::bitset<8>(ip3); 150 | std::bitset<8> ip4_bset = std::bitset<8>(ip4); 151 | 152 | std::cout << ip1_bset.to_ulong() << "." << ip2_bset.to_ulong() << "." << ip3_bset.to_ulong() << "." << ip4_bset.to_ulong() << ","; 153 | 154 | std::string ip5 = bset_pair_string.substr(32, 8); 155 | std::string ip6 = bset_pair_string.substr(40, 8); 156 | std::string ip7 = bset_pair_string.substr(48, 8); 157 | std::string ip8 = bset_pair_string.substr(56, 8); 158 | 159 | std::bitset<8> ip5_bset = std::bitset<8>(ip5); 160 | std::bitset<8> ip6_bset = std::bitset<8>(ip6); 161 | std::bitset<8> ip7_bset = std::bitset<8>(ip7); 162 | std::bitset<8> ip8_bset = std::bitset<8>(ip8); 163 | 164 | std::cout << ip5_bset.to_ulong() << "." << ip6_bset.to_ulong() << "." << ip7_bset.to_ulong() << "." << ip8_bset.to_ulong() << " -> "; 165 | 166 | float percent = (float)counts[id[i]]/(float)N; 167 | printf("%d (%g %g) counts %d / %d [%f%] \n",id[i],points[i].x,points[i].y, counts[id[i]], N, percent); 168 | } 169 | #endif 170 | #if 1 171 | printf("centroids = "); 172 | for( size_t j=0; j 2 | #include 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include 14 | 15 | #include 16 | #include 17 | #include 18 | #include 19 | #include 20 | #include 21 | #include 22 | 23 | #include "timer.h" 24 | #include "kmeans.h" 25 | 26 | #define MB (1048576.0) 27 | #define UNUSE(x) (void)(x) 28 | 29 | using Tins::SnifferConfiguration; 30 | using Tins::FileSniffer; 31 | using Tins::IP; 32 | using Tins::DNS; 33 | using Tins::TCP; 34 | using Tins::UDP; 35 | using Tins::RawPDU; 36 | using Tins::PDU; 37 | using Tins::Packet; 38 | using namespace Tins; 39 | using namespace std; 40 | 41 | #include "tbb/task_scheduler_init.h" 42 | #include "tbb/concurrent_hash_map.h" 43 | #include "tbb/blocked_range.h" 44 | #include "tbb/parallel_for.h" 45 | 46 | using namespace std; 47 | using namespace tbb; 48 | 49 | #define N 1 50 | #define WORKER_THREAD_NUM N 51 | #define MAX_QUEUE_NUM N 52 | #define END_MARK_FNAME "///" 53 | #define END_MARK_FLENGTH 3 54 | 55 | #define DISP_FREQ 100000 56 | 57 | /* srcIP, destIP */ 58 | typedef struct _addrpair { 59 | map m; 60 | pthread_mutex_t mutex; 61 | } addrpair_t; 62 | addrpair_t addrpair; 63 | 64 | struct HashCompare { 65 | static size_t hash( unsigned long long x ) { 66 | return (size_t)x; 67 | } 68 | static bool equal( unsigned long long x, unsigned long long y ) { 69 | return x==y; 70 | } 71 | }; 72 | 73 | typedef concurrent_hash_map CharTable; 74 | static CharTable table; 75 | 76 | typedef concurrent_hash_map CharTable2; 77 | static CharTable2 table2; 78 | 79 | typedef struct _result { 80 | int num; 81 | char* fname; 82 | pthread_mutex_t mutex; 83 | } result_t; 84 | result_t result; 85 | 86 | typedef struct _queue { 87 | char* fname[MAX_QUEUE_NUM]; 88 | int flength[MAX_QUEUE_NUM]; 89 | int rp, wp; 90 | int remain; 91 | pthread_mutex_t mutex; 92 | pthread_cond_t not_full; 93 | pthread_cond_t not_empty; 94 | } queue_t; 95 | 96 | typedef struct _thread_arg { 97 | int id; 98 | int cpuid; 99 | queue_t* q; 100 | char* srchstr; 101 | char* dirname; 102 | int filenum; 103 | } thread_arg_t; 104 | 105 | 106 | bool callback(const PDU &pdu) { 107 | // Parse the IP header 108 | const IP &ip = pdu.rfind_pdu(); 109 | 110 | // Parse the TCP or UDP header 111 | const TCP &tcp = pdu.rfind_pdu(); 112 | 113 | string source_ip = ip.src_addr().to_string(); 114 | string source_port = std::to_string(tcp.sport()); 115 | 116 | string dest_ip = ip.dst_addr().to_string(); 117 | string dest_port = std::to_string(tcp.dport()); 118 | 119 | string info = source_ip + ":" + source_port + "=>" + dest_ip + ":" + dest_port; 120 | 121 | string info2 = source_ip + "=>" + dest_ip; 122 | 123 | // cout << "IP:" << "[" << ip.tot_len() << "]" << ip.src_addr() << ":" << source_ip << ':' << tcp.sport() << ":" << " -> " << ip.dst_addr() << ':' << tcp.dport() << endl; 124 | 125 | CharTable::accessor a; 126 | 127 | table.insert(a, info2); 128 | a->second += 1; 129 | 130 | CharTable2::accessor a2; 131 | 132 | table2.insert(a2, info2); 133 | a2->second += int(ip.tot_len()); 134 | 135 | return true; 136 | } 137 | 138 | 139 | int traverse_file(char* filename, char* srchstr, int thread_id) { 140 | 141 | cout << filename << endl; 142 | 143 | try { 144 | SnifferConfiguration config; 145 | config.set_filter("tcp or udp"); 146 | config.set_promisc_mode(false); 147 | // config.set_snap_len(65536); 148 | 149 | FileSniffer sniffer(filename, config); 150 | sniffer.sniff_loop(callback); 151 | 152 | return 0; 153 | } 154 | catch (std::exception& ex) { 155 | std::cerr << "[X] Error: " << ex.what() << '\n'; 156 | return -1; 157 | } 158 | 159 | return 0; 160 | } 161 | 162 | void initqueue(queue_t* q) { 163 | int i; 164 | q->rp = q->wp = q->remain= 0; 165 | for (i = 0; i < MAX_QUEUE_NUM; ++i) q->fname[i] = NULL; 166 | pthread_mutex_init(&q->mutex, NULL); 167 | pthread_cond_init(&q->not_full, NULL); 168 | pthread_cond_init(&q->not_empty, NULL); 169 | return; 170 | } 171 | 172 | void enqueue(queue_t* q, char* path, int size) { 173 | 174 | pthread_mutex_lock(&q->mutex); 175 | while (q->remain == MAX_QUEUE_NUM) { 176 | pthread_cond_wait(&q->not_full, &q->mutex); 177 | } 178 | char** fname = (char**)&q->fname[q->wp]; 179 | if (*fname != NULL) free(*fname); 180 | *fname = (char*)malloc(size); 181 | strcpy(*fname, path); 182 | q->flength[q->wp] = size; 183 | q->wp++; q->remain++; 184 | 185 | if (q->wp == MAX_QUEUE_NUM) q->wp = 0; 186 | 187 | pthread_cond_signal(&q->not_empty); 188 | pthread_mutex_unlock(&q->mutex); 189 | return; 190 | } 191 | 192 | void dequeue(queue_t* q, char** fname, int* flen) { 193 | 194 | pthread_mutex_lock(&q->mutex); 195 | while (q->remain == 0) 196 | pthread_cond_wait(&q->not_empty, &q->mutex); 197 | 198 | *flen = q->flength[q->rp]; 199 | if (*fname != NULL) free(*fname); 200 | *fname = (char*)malloc(*flen); 201 | strcpy(*fname, q->fname[q->rp]); 202 | q->rp++; q->remain--; 203 | if (q->rp == MAX_QUEUE_NUM) q->rp = 0; 204 | pthread_cond_signal(&q->not_full); 205 | pthread_mutex_unlock(&q->mutex); 206 | if (strcmp(*fname,"")==0) printf("rp=%d\n", q->rp-1); 207 | return; 208 | } 209 | 210 | int traverse_dir_thread(queue_t* q, char* dirname) { 211 | static int cnt = 0; 212 | struct dirent* dent; 213 | DIR* dd = opendir(dirname); 214 | 215 | if (dd == NULL) { 216 | printf("Could not open the directory %s\n", dirname); return 0; 217 | } 218 | 219 | while ((dent = readdir(dd)) != NULL) { 220 | if (strncmp(dent->d_name, ".", 2) == 0) continue; 221 | if (strncmp(dent->d_name, "..", 3) == 0) continue; 222 | 223 | int size = strlen(dirname) + strlen(dent->d_name) + 2; 224 | #if 0 225 | char* path = (char*)malloc(size); 226 | sprintf(path, "%s/%s", dirname, dent->d_name); 227 | 228 | struct stat fs; 229 | if (stat(path, &fs) < 0) 230 | continue; 231 | else { 232 | if (S_ISDIR(fs.st_mode)) 233 | traverse_dir_thread(q, path); 234 | else if (S_ISREG(fs.st_mode)) { 235 | enqueue(q, path, size); 236 | cnt++; 237 | } 238 | } 239 | #else 240 | { 241 | char* path = (char*)alloca(size); 242 | sprintf(path, "%s/%s", dirname, dent->d_name); 243 | 244 | struct stat fs; 245 | if (stat(path, &fs) < 0) 246 | continue; 247 | else { 248 | if (S_ISDIR(fs.st_mode)) 249 | traverse_dir_thread(q, path); 250 | else if (S_ISREG(fs.st_mode)) { 251 | enqueue(q, path, size); 252 | cnt++; 253 | } 254 | } 255 | } 256 | #endif 257 | } 258 | closedir(dd); 259 | return cnt; 260 | } 261 | 262 | void master_func(thread_arg_t* arg) { 263 | queue_t* q = arg->q; 264 | int i; 265 | arg->filenum = traverse_dir_thread(q, arg->dirname); 266 | 267 | /* enqueue END_MARK */ 268 | for (i = 0; i < WORKER_THREAD_NUM; ++i) 269 | enqueue(q, END_MARK_FNAME, END_MARK_FLENGTH); 270 | return; 271 | } 272 | 273 | void worker_func(thread_arg_t* arg) { 274 | int flen; 275 | char* fname = NULL; 276 | queue_t* q = arg->q; 277 | char* srchstr = arg->srchstr; 278 | 279 | int thread_id = arg->id; 280 | 281 | printf("worker func %d launched \n", thread_id); 282 | 283 | #ifdef __CPU_SET 284 | cpu_set_t mask; 285 | __CPU_ZERO(&mask); 286 | __CPU_SET(arg->cpuid, &mask); 287 | if (sched_setaffinity(0, sizeof(mask), &mask) == -1) 288 | printf("WARNING: faild to set CPU affinity...\n"); 289 | #endif 290 | 291 | #if 0 292 | while (1) { 293 | int n; 294 | 295 | dequeue(q, &fname, &flen)); 296 | 297 | if (strncmp(fname, END_MARK_FNAME, END_MARK_FLENGTH + 1) == 0) 298 | break; 299 | 300 | n = traverse_file(fname, srchstr, thread_id); 301 | pthread_mutex_lock(&result.mutex); 302 | 303 | if (n > result.num) { 304 | result.num = n; 305 | if (result.fname != NULL) free(result.fname); 306 | result.fname = (char*)malloc(flen); 307 | strcpy(result.fname, fname); 308 | } 309 | pthread_mutex_unlock(&result.mutex); 310 | } 311 | #else 312 | char* my_result_fname; 313 | int my_result_num = 0; 314 | int my_result_len = 0; 315 | while (1) { 316 | int n; 317 | 318 | dequeue(q, &fname, &flen); 319 | 320 | if (strncmp(fname, END_MARK_FNAME, END_MARK_FLENGTH + 1) == 0) 321 | break; 322 | 323 | n = traverse_file(fname, srchstr, thread_id); 324 | 325 | if (n > my_result_num) { 326 | my_result_num = n; 327 | my_result_len = flen; 328 | my_result_fname = (char*)alloca(flen); 329 | strcpy(my_result_fname, fname); 330 | } 331 | } 332 | pthread_mutex_lock(&result.mutex); 333 | if (my_result_num > result.num) { 334 | result.num = my_result_num; 335 | if (result.fname != NULL) free(result.fname); 336 | result.fname = (char*)malloc(my_result_len); 337 | strcpy(result.fname, my_result_fname); 338 | } 339 | pthread_mutex_unlock(&result.mutex); 340 | #endif 341 | return; 342 | } 343 | 344 | void print_result(thread_arg_t* arg) { 345 | if (result.num) { 346 | printf("Total %d files\n", arg->filenum); 347 | printf("Max include file: %s[include %d]\n", result.fname, result.num); 348 | } 349 | return; 350 | } 351 | 352 | int main(int argc, char* argv[]) { 353 | int i; 354 | int thread_num = 1 + WORKER_THREAD_NUM; 355 | unsigned int t, travdirtime; 356 | queue_t q; 357 | thread_arg_t targ[thread_num]; 358 | pthread_t master; 359 | pthread_t worker[thread_num]; 360 | pthread_t worker2[thread_num]; 361 | int cpu_num; 362 | 363 | if (argc != 2) { 364 | printf("Usage: ./asura [DIR] \n"); return 0; 365 | } 366 | cpu_num = sysconf(_SC_NPROCESSORS_CONF); 367 | 368 | initqueue(&q); 369 | 370 | for (i = 0; i < thread_num; ++i) { 371 | targ[i].q = &q; 372 | targ[i].dirname = argv[1]; 373 | targ[i].filenum = 0; 374 | targ[i].cpuid = i%cpu_num; 375 | } 376 | result.fname = NULL; 377 | 378 | start_timer(&t); 379 | 380 | pthread_mutex_init(&result.mutex, NULL); 381 | 382 | pthread_create(&master, NULL, (void*)master_func, (void*)&targ[0]); 383 | for (i = 1; i < thread_num; ++i) 384 | { 385 | targ[i].id = i; 386 | pthread_create(&worker[i], NULL, (void*)worker_func, (void*)&targ[i]); 387 | } 388 | for (i = 1; i < thread_num; ++i) 389 | pthread_join(worker[i], NULL); 390 | 391 | travdirtime = stop_timer(&t); 392 | print_timer(travdirtime); 393 | 394 | std::vector s_vec_1; 395 | std::vector s_vec_2; 396 | std::vector s_vec_3; 397 | std::vector s_vec_4; 398 | 399 | for( CharTable::iterator i=table.begin(); i!=table.end(); ++i ) 400 | { 401 | s_vec_1.push_back(i->first); 402 | s_vec_2.push_back((float)i->second); 403 | 404 | cout << "[counts]:" << i->first << "," << i->second << endl; 405 | } 406 | 407 | for( CharTable2::iterator i=table2.begin(); i!=table2.end(); ++i ) 408 | { 409 | s_vec_3.push_back(i->first); 410 | s_vec_4.push_back((float)i->second); 411 | 412 | cout << "[size]:" << i->first << "," << i->second << endl; 413 | } 414 | 415 | std::remove("reduced"); 416 | ofstream outputfile3("reduced"); 417 | 418 | std::remove("reduced2"); 419 | ofstream outputfile3_2("reduced2"); 420 | 421 | cout << "reduced" << endl; 422 | 423 | for(int i = 0; i < s_vec_1.size(); i++) 424 | { 425 | /* 426 | double tmp_div; 427 | if (s_vec_4[i] == 0) 428 | tmp_div = 0; 429 | else if (s_vec_4[i] > 0) 430 | tmp_div = (double)s_vec_2[i] / (double)s_vec_4[i]; 431 | */ 432 | 433 | cout << s_vec_1[i] << "," << s_vec_2[i] << "," << s_vec_4[i] << endl; 434 | outputfile3 << s_vec_1[i] << "," << s_vec_2[i] << "," << s_vec_4[i] << endl; 435 | outputfile3_2 << s_vec_2[i] << "," << s_vec_4[i] << endl; 436 | } 437 | 438 | outputfile3.close(); 439 | outputfile3_2.close(); 440 | 441 | cout << "strat KMeans " << endl; 442 | 443 | const size_t SQRT_K = 2; 444 | const size_t K = SQRT_K*SQRT_K; 445 | const size_t M = s_vec_1.size(); 446 | 447 | cout << "data size:" << M << endl; 448 | cout << "cluster size:" << K << endl; 449 | 450 | point* points; 451 | points = (struct point *)malloc(M*sizeof(struct point)); 452 | 453 | point* centroid; 454 | centroid = (struct point *)malloc(M*sizeof(struct point)); 455 | 456 | cluster_id* id; 457 | id = (unsigned short *)malloc(M*sizeof(unsigned short)); 458 | 459 | std::vector pair; 460 | 461 | for (int i = 0; i < M; i++) { 462 | 463 | point& p = points[i]; 464 | 465 | p.x = (float)s_vec_2[i]; 466 | p.y = (float)s_vec_4[i]; 467 | pair.push_back(s_vec_1[i]); 468 | } 469 | 470 | start_timer(&t); 471 | tbb_asura::do_k_means( M, points, K, id, centroid ); 472 | 473 | cout << "KMeans finished" << endl; 474 | 475 | /* 476 | travdirtime = stop_timer(&t); 477 | print_timer(travdirtime); 478 | */ 479 | 480 | 481 | 482 | #if 1 483 | int* counts; 484 | counts = (int *)malloc(K*sizeof(int)); 485 | 486 | for( size_t i=0; i bset_pair = std::bitset<64>(pair_long); 507 | string bset_pair_string = bset_pair.to_string(); 508 | 509 | std::string ip1 = bset_pair_string.substr(0, 8); 510 | std::string ip2 = bset_pair_string.substr(8, 8); 511 | std::string ip3 = bset_pair_string.substr(16, 8); 512 | std::string ip4 = bset_pair_string.substr(24, 8); 513 | 514 | std::bitset<8> ip1_bset = std::bitset<8>(ip1); 515 | std::bitset<8> ip2_bset = std::bitset<8>(ip2); 516 | std::bitset<8> ip3_bset = std::bitset<8>(ip3); 517 | std::bitset<8> ip4_bset = std::bitset<8>(ip4); 518 | 519 | outputfile4 << ip1_bset.to_ulong() << "." << ip2_bset.to_ulong() << "." << ip3_bset.to_ulong() << "." << ip4_bset.to_ulong() << ","; 520 | 521 | std::string ip5 = bset_pair_string.substr(32, 8); 522 | std::string ip6 = bset_pair_string.substr(40, 8); 523 | std::string ip7 = bset_pair_string.substr(48, 8); 524 | std::string ip8 = bset_pair_string.substr(56, 8); 525 | 526 | std::bitset<8> ip5_bset = std::bitset<8>(ip5); 527 | std::bitset<8> ip6_bset = std::bitset<8>(ip6); 528 | std::bitset<8> ip7_bset = std::bitset<8>(ip7); 529 | std::bitset<8> ip8_bset = std::bitset<8>(ip8); 530 | 531 | outputfile4 << ip5_bset.to_ulong() << "." << ip6_bset.to_ulong() << "." << ip7_bset.to_ulong() << "." << ip8_bset.to_ulong() << " -> "; 532 | 533 | float percent = (float)counts[id[i]]/(float)M; 534 | 535 | outputfile4 << id[i] << " (" << points[i].x << "," << points[i].y << ")," << counts[id[i]] << "," << M << "," << percent << endl; 536 | 537 | */ 538 | } 539 | 540 | outputfile4.close(); 541 | 542 | #endif 543 | 544 | #if 0 545 | printf("centroids = "); 546 | for( size_t j=0; j