├── .gitignore ├── .travis.yml ├── CODEOWNERS ├── LICENSE ├── README.md ├── SConstruct ├── flow_db.c ├── pkt2flow.c ├── pkt2flow.h └── utilities.c /.gitignore: -------------------------------------------------------------------------------- 1 | .sconsign.dblite 2 | pkt2flow 3 | *.o 4 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: c 2 | 3 | before_script: 4 | - sudo apt-get install libpcap-dev 5 | 6 | script: scons 7 | -------------------------------------------------------------------------------- /CODEOWNERS: -------------------------------------------------------------------------------- 1 | * @caesar0301 2 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | Copyright © 2012-2016 3 | 4 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the “Software”), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: 5 | 6 | The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. 7 | 8 | THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | pkt2flow 2 | ======== 3 | 4 | by chenxm, Shanghai Jiao Tong Univ. 5 | chenxm35@gmail.com 6 | 7 | 2012-2019 8 | 9 | **©MIT LICENSED** 10 | 11 | A simple utility to classify packets into flows. It's so simple that only one task 12 | is aimed to finish. 13 | 14 | For Deep Packet Inspection or flow classification, it's so common to analyze the 15 | feature of one specific flow. I have make the attempt to use made-ready tools like 16 | `tcpflows`, `tcpslice`, `tcpsplit`, but all these tools try to either decrease the 17 | trace volume (under requirement) or resemble the packets into flow payloads (over 18 | requirement). I have not found a simple tool to classify the packets into flows without 19 | further processing. This is why this program is born. 20 | 21 | The inner function of this program behaves using the 4-tuple (src_ip, dst_ip, src_port, dst_port) 22 | to seperate the packets into TCP or UDP flows. Each flow will be saved into a pcap 23 | file named with 4-tuple and the timestamp of the first packet of the flow. The packets are 24 | saved in the order as read from the source. Any further processing like TCP resembling is 25 | not performed. The flow timeout is considered as 30 minutes which can be changed in pkt2flow.h. 26 | 27 | 28 | How to compile 29 | ---------- 30 | 31 | 32 | This program is structured and compiled with a tool called SCons (http://www.scons.org/). 33 | You can follow simple steps to make a compile (e.g. Ubuntu): 34 | 35 | 1. Make sure you have library `libpcap` in your system. 36 | ```bash 37 | sudo apt install -y libpcap-dev 38 | ``` 39 | 40 | 2. Install "Scons" that can be downloaded from its official website given above. 41 | ```bash 42 | sudo apt install -y scons 43 | ``` 44 | 45 | 3. Get source code and run `scons` under the project folder: 46 | ```bash 47 | git clone https://github.com/caesar0301/pkt2flow.git 48 | cd pkt2flow 49 | scons # You got binary pkt2flow 50 | ```` 51 | 52 | How to install (optional) 53 | ---------- 54 | 55 | You can optionally let scons automatically handle the installation for you by 56 | providing an installation prefix, e.g.: 57 | 58 | $ PREFIX=/usr/local 59 | $ scons --prefix=$PREFIX install 60 | 61 | This will build pkt2flow and install the binary to /usr/local/bin/pkt2flow. 62 | Depending on where you want to install it, you might need to use sudo or 63 | become the appropriate user. 64 | 65 | Usage 66 | -------- 67 | ```bash 68 | Usage: ./pkt2flow [-huvx] [-o outdir] pcapfile 69 | 70 | Options: 71 | -h print this help and exit 72 | -u also dump (U)DP flows 73 | -v also dump the in(v)alid TCP flows without the SYN option 74 | -x also dump non-UDP/non-TCP IP flows 75 | -o (o)utput directory 76 | ``` 77 | 78 | Contributors 79 | -------- 80 | 81 | [![Contributors](https://contrib.rocks/image?repo=caesar0301/pkt2flow "pkt2flow contributors")](https://github.com/caesar0301/pkt2flow/graphs/contributors) 82 | 83 | -------------------------------------------------------------------------------- /SConstruct: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | import sys 4 | env = Environment(CCFLAGS='-Wall -g', CPPFLAGS='-D_GNU_SOURCE') 5 | 6 | AddOption('--prefix', 7 | dest='prefix', 8 | nargs=1, type='string', 9 | action='store', 10 | metavar='DIR', 11 | help='installation prefix') 12 | env = Environment(PREFIX = GetOption('prefix')) 13 | 14 | idir_prefix = '$PREFIX' 15 | idir_bin = '$PREFIX/bin' 16 | 17 | Export('env idir_prefix idir_bin') 18 | 19 | platform = sys.platform 20 | lib_path = ['/usr/local/lib', '/usr/lib'] 21 | libs = Glob('./*.a') + ['pcap'] 22 | cpp_path=['.'] 23 | 24 | if platform == 'darwin': 25 | env.Append(CPPFLAGS=['-Ddarwin']) 26 | 27 | # Compile the programs 28 | pkt2flow = env.Program(target = './pkt2flow', 29 | source = Glob('./*.c'), 30 | LIBPATH = lib_path, 31 | LIBS = libs, 32 | CPPPATH = cpp_path) 33 | 34 | # install the program 35 | env.Install(dir = idir_bin, source = pkt2flow) 36 | 37 | # create an install alias 38 | env.Alias('install', idir_prefix) 39 | -------------------------------------------------------------------------------- /flow_db.c: -------------------------------------------------------------------------------- 1 | /* pkt2flow 2 | * Xiaming Chen (chen_xm@sjtu.edu.cn) 3 | * 4 | * Copyright (c) 2012 5 | * Copyright (c) 2014 Sven Eckelmann 6 | * 7 | * Permission is hereby granted, free of charge, to any person 8 | * obtaining a copy of this software and associated documentation files 9 | * (the "Software"), to deal in the Software without restriction, 10 | * including without limitation the rights to use, copy, modify, merge, 11 | * publish, distribute, sublicense, and/or sell copies of the Software, 12 | * and to permit persons to whom the Software is furnished to do so, 13 | * subject to the following conditions: 14 | * 15 | * The above copyright notice and this permission notice shall be 16 | * included in all copies or substantial portions of the Software. 17 | * 18 | * The names and trademarks of copyright holders may not be used in 19 | * advertising or publicity pertaining to the software without specific 20 | * prior permission. Title to copyright in this software and any 21 | * associated documentation will at all times remain with the copyright 22 | * holders. 23 | * 24 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 25 | * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 26 | * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 27 | * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 28 | * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 29 | * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 30 | * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 31 | * SOFTWARE. 32 | */ 33 | 34 | #include 35 | #include 36 | #include 37 | #include 38 | #include 39 | #include 40 | #include "pkt2flow.h" 41 | 42 | struct ip_pair *pairs [HASH_TBL_SIZE]; 43 | 44 | void init_hash_table(void) 45 | { 46 | memset(pairs, 0, sizeof(struct ip_pair *) * HASH_TBL_SIZE); 47 | } 48 | 49 | void free_hash_table(void) 50 | { 51 | size_t b; 52 | struct ip_pair *curp; 53 | 54 | for (b = 0; b < HASH_TBL_SIZE; b++) { 55 | while (pairs[b]) { 56 | curp = pairs[b]; 57 | pairs[b] = pairs[b]->next; 58 | reset_pdf(&curp->pdf); 59 | free(curp); 60 | } 61 | } 62 | 63 | init_hash_table(); 64 | } 65 | 66 | static unsigned int hashf(const void *key, size_t sz, unsigned int hash) 67 | { 68 | unsigned int h; 69 | unsigned int i; 70 | const unsigned char *array = key; 71 | 72 | h = hash; 73 | for (i = 0; i < sz; i++) 74 | h = (h * HASH_MULTIPLIER) + array[i]; 75 | return (h); 76 | } 77 | 78 | void reset_pdf(struct pkt_dump_file *f) 79 | { 80 | f->pkts = 0; 81 | f->start_time = 0; 82 | f->status = STS_UNSET; 83 | free(f->file_name); 84 | f->file_name = NULL; 85 | } 86 | 87 | static unsigned int hash_5tuple(struct af_6tuple af_6tuple) 88 | { 89 | unsigned int hash1 = 0; 90 | unsigned int hash2 = 0; 91 | int i; 92 | 93 | for (i = 0; i < 2; i++) { 94 | if (i == 0) { 95 | switch (af_6tuple.af_family) { 96 | case AF_INET: 97 | hash1 = hashf(&af_6tuple.ip1.v4, 4, hash1); 98 | hash1 = hashf(&af_6tuple.ip2.v4, 4, hash1); 99 | break; 100 | case AF_INET6: 101 | hash1 = hashf(&af_6tuple.ip1.v6, 16, hash1); 102 | hash1 = hashf(&af_6tuple.ip2.v6, 16, hash1); 103 | break; 104 | } 105 | if (af_6tuple.port1) 106 | hash1 = hashf(&af_6tuple.port1, 2, hash1); 107 | if (af_6tuple.port2) 108 | hash1 = hashf(&af_6tuple.port2, 2, hash1); 109 | } else { 110 | switch (af_6tuple.af_family) { 111 | case AF_INET: 112 | hash2 = hashf(&af_6tuple.ip2.v4, 4, hash2); 113 | hash2 = hashf(&af_6tuple.ip1.v4, 4, hash2); 114 | break; 115 | case AF_INET6: 116 | hash2 = hashf(&af_6tuple.ip2.v6, 16, hash2); 117 | hash2 = hashf(&af_6tuple.ip1.v6, 16, hash2); 118 | break; 119 | } 120 | if (af_6tuple.port2) 121 | hash2 = hashf(&af_6tuple.port2, 2, hash2); 122 | if (af_6tuple.port1) 123 | hash2 = hashf(&af_6tuple.port1, 2, hash2); 124 | } 125 | } 126 | 127 | return (hash1 + hash2) % HASH_TBL_SIZE; 128 | } 129 | 130 | static int compare_5tuple(struct af_6tuple af1, struct af_6tuple af2) 131 | { 132 | if (af1.af_family != af2.af_family) 133 | return 0; 134 | 135 | if (af1.protocol != af2.protocol) 136 | return 0; 137 | 138 | switch (af1.af_family) { 139 | case AF_INET: 140 | if (memcmp(&af1.ip1.v4, &af2.ip1.v4, sizeof(af1.ip1.v4)) == 0 && 141 | memcmp(&af1.ip2.v4, &af2.ip2.v4, sizeof(af1.ip2.v4)) == 0 && 142 | af1.port1 == af2.port1 && af1.port2 == af2.port2) 143 | return 1; 144 | if (memcmp(&af1.ip1.v4, &af2.ip2.v4, sizeof(af1.ip1.v4)) == 0 && 145 | memcmp(&af1.ip2.v4, &af2.ip1.v4, sizeof(af1.ip2.v4)) == 0 && 146 | af1.port1 == af2.port2 && af1.port2 == af2.port1) 147 | return 1; 148 | break; 149 | case AF_INET6: 150 | if (memcmp(&af1.ip1.v6, &af2.ip1.v6, sizeof(af1.ip1.v6)) == 0 && 151 | memcmp(&af1.ip2.v6, &af2.ip2.v6, sizeof(af1.ip2.v6)) == 0 && 152 | af1.port1 == af2.port1 && af1.port2 == af2.port2) 153 | return 1; 154 | if (memcmp(&af1.ip1.v6, &af2.ip2.v6, sizeof(af1.ip1.v6)) == 0 && 155 | memcmp(&af1.ip2.v6, &af2.ip1.v6, sizeof(af1.ip2.v6)) == 0 && 156 | af1.port1 == af2.port2 && af1.port2 == af2.port1) 157 | return 1; 158 | break; 159 | } 160 | 161 | return 0; 162 | } 163 | 164 | struct ip_pair *find_ip_pair(struct af_6tuple af_6tuple) 165 | { 166 | struct ip_pair *p; 167 | unsigned int hash; 168 | 169 | hash = hash_5tuple(af_6tuple); 170 | if (pairs[hash]) { 171 | for (p = pairs [hash]; p != NULL; p = p->next) { 172 | if (compare_5tuple(p->af_6tuple, af_6tuple)) 173 | return p; 174 | } 175 | } 176 | 177 | return NULL; 178 | } 179 | 180 | struct ip_pair *register_ip_pair(struct af_6tuple af_6tuple) 181 | { 182 | struct ip_pair *newp; 183 | unsigned int hash; 184 | 185 | hash = hash_5tuple(af_6tuple); 186 | 187 | newp = (struct ip_pair *)malloc(sizeof(struct ip_pair)); 188 | if (!newp) { 189 | fprintf(stderr, "not enough memory to allocate another IP pair\n"); 190 | exit(1); 191 | } 192 | 193 | newp->af_6tuple = af_6tuple; 194 | newp->pdf.file_name = NULL; 195 | newp->next = pairs [hash]; 196 | pairs [hash] = newp; 197 | reset_pdf((struct pkt_dump_file *) & (newp->pdf)); 198 | 199 | return newp; 200 | } 201 | -------------------------------------------------------------------------------- /pkt2flow.c: -------------------------------------------------------------------------------- 1 | /* pkt2flow 2 | * Xiaming Chen (chen_xm@sjtu.edu.cn) 3 | * 4 | * Copyright (c) 2012 5 | * Copyright (C) 2014 Sven Eckelmann 6 | * 7 | * Permission is hereby granted, free of charge, to any person 8 | * obtaining a copy of this software and associated documentation files 9 | * (the "Software"), to deal in the Software without restriction, 10 | * including without limitation the rights to use, copy, modify, merge, 11 | * publish, distribute, sublicense, and/or sell copies of the Software, 12 | * and to permit persons to whom the Software is furnished to do so, 13 | * subject to the following conditions: 14 | * 15 | * The above copyright notice and this permission notice shall be 16 | * included in all copies or substantial portions of the Software. 17 | * 18 | * The names and trademarks of copyright holders may not be used in 19 | * advertising or publicity pertaining to the software without specific 20 | * prior permission. Title to copyright in this software and any 21 | * associated documentation will at all times remain with the copyright 22 | * holders. 23 | * 24 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 25 | * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 26 | * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 27 | * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 28 | * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 29 | * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 30 | * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 31 | * SOFTWARE. 32 | */ 33 | 34 | #include 35 | #include 36 | #include 37 | #include 38 | #include 39 | #include 40 | #include 41 | #include 42 | #include 43 | #include 44 | #include 45 | #include 46 | #include 47 | #include 48 | #include 49 | #include 50 | #include 51 | #include 52 | #include "pkt2flow.h" 53 | 54 | static uint32_t dump_allowed; 55 | static char *readfile = NULL; 56 | //char *interface = NULL; 57 | static char *outputdir = "pkt2flow.out"; 58 | static pcap_t *inputp = NULL; 59 | struct ip_pair *pairs[HASH_TBL_SIZE]; 60 | 61 | static void usage(char *progname) 62 | { 63 | fprintf(stderr, "Name: %s\n", __GLOBAL_NAME__); 64 | fprintf(stderr, "Version: %s\n", __SOURCE_VERSION__); 65 | fprintf(stderr, "Author: %s\n", __AUTHOR__); 66 | fprintf(stderr, "Program to seperate the packets into flows (UDP or TCP).\n\n"); 67 | fprintf(stderr, "Usage: %s [-huvx] [-o outdir] pcapfile\n\n", progname); 68 | fprintf(stderr, "Options:\n"); 69 | fprintf(stderr, " -h print this help and exit\n"); 70 | fprintf(stderr, " -u also dump (U)DP flows\n"); 71 | fprintf(stderr, " -v also dump the in(v)alid TCP flows without the SYN option\n"); 72 | fprintf(stderr, " -x also dump non-UDP/non-TCP IP flows\n"); 73 | fprintf(stderr, " -o (o)utput directory\n"); 74 | } 75 | 76 | 77 | static void parseargs(int argc, char *argv[]) 78 | { 79 | int opt; 80 | const char *optstr = "uvxo:h"; 81 | while ((opt = getopt(argc, argv, optstr)) != -1) { 82 | switch (opt) { 83 | case 'h': 84 | usage(argv [0]); 85 | exit(-1); 86 | case 'o': 87 | outputdir = optarg; 88 | break; 89 | case 'u': 90 | dump_allowed |= DUMP_UDP_ALLOWED; 91 | break; 92 | case 'v': 93 | dump_allowed |= DUMP_TCP_NOSYN_ALLOWED; 94 | break; 95 | case 'x': 96 | dump_allowed |= DUMP_OTHER_ALLOWED; 97 | break; 98 | default: 99 | usage(argv [0]); 100 | exit(-1); 101 | } 102 | } 103 | 104 | if (optind < argc) 105 | readfile = argv[optind]; 106 | if (readfile == NULL) { 107 | fprintf(stderr, "pcap file not given\n"); 108 | usage(argv[0]); 109 | exit(1); 110 | } 111 | } 112 | 113 | static void open_trace_file(void) 114 | { 115 | char errbuf [PCAP_ERRBUF_SIZE]; 116 | 117 | inputp = pcap_open_offline(readfile, errbuf); 118 | if (!inputp) { 119 | fprintf(stderr, "error opening tracefile %s: %s\n", readfile, 120 | errbuf); 121 | exit(1); 122 | } 123 | } 124 | 125 | static char *resemble_file_path(struct pkt_dump_file *pdf) 126 | { 127 | char *cwd = getcwd(NULL, 0); // backup the current working directory 128 | char *folder = NULL; 129 | int check; 130 | struct stat statBuff; 131 | int ret; 132 | const char *type_folder; 133 | char *outputpath; 134 | 135 | switch (pdf->status) { 136 | case STS_TCP_SYN: 137 | type_folder = "tcp_syn"; 138 | break; 139 | case STS_TCP_NOSYN: 140 | type_folder = "tcp_nosyn"; 141 | break; 142 | case STS_UDP: 143 | type_folder = "udp"; 144 | break; 145 | case STS_UNSET: 146 | type_folder = "others"; 147 | break; 148 | } 149 | 150 | ret = asprintf(&outputpath, "%s/%s", outputdir, type_folder); 151 | if (ret < 0) 152 | return NULL; 153 | 154 | // Check the path folder and create the folders if they are not there 155 | ret = stat(outputpath, &statBuff); 156 | if (!(ret != -1 && S_ISDIR(statBuff.st_mode))) { 157 | /* handle absolute path */ 158 | if (outputpath[0] == '/') 159 | chdir("/"); 160 | 161 | folder = strtok(outputpath, "/"); 162 | while (folder != NULL) { 163 | ret = stat(folder, &statBuff); 164 | if (!(ret != -1 && S_ISDIR(statBuff.st_mode))) { 165 | check = mkdir(folder, S_IRWXU); 166 | if (check != 0) { 167 | fprintf(stderr, "making directory error: %s\n", 168 | folder); 169 | exit(-1); 170 | } 171 | } 172 | chdir(folder); 173 | folder = strtok(NULL, "/"); 174 | } 175 | } 176 | chdir(cwd); 177 | free(cwd); 178 | free(outputpath); 179 | 180 | ret = asprintf(&outputpath, "%s/%s/%s", outputdir, type_folder, 181 | pdf->file_name); 182 | if (ret < 0) 183 | return NULL; 184 | 185 | return outputpath; 186 | } 187 | 188 | static int pcap_handle_layer4(struct af_6tuple *af_6tuple, const u_char *bytes, 189 | size_t len, uint8_t proto) 190 | { 191 | struct tcphdr *tcphdr; 192 | struct udphdr *udphdr; 193 | 194 | switch (proto) { 195 | case IPPROTO_UDP: 196 | if (len < sizeof(*udphdr)) 197 | return -1; 198 | 199 | udphdr = (struct udphdr *)bytes; 200 | af_6tuple->protocol = IPPROTO_UDP; 201 | #ifdef darwin 202 | af_6tuple->port1 = ntohs(udphdr->uh_sport); 203 | af_6tuple->port2 = ntohs(udphdr->uh_dport); 204 | #else 205 | af_6tuple->port1 = ntohs(udphdr->source); 206 | af_6tuple->port2 = ntohs(udphdr->dest); 207 | #endif 208 | return 0; 209 | case IPPROTO_TCP: 210 | if (len < sizeof(*tcphdr)) 211 | return -1; 212 | 213 | tcphdr = (struct tcphdr *)bytes; 214 | af_6tuple->protocol = IPPROTO_TCP; 215 | #ifdef darwin 216 | af_6tuple->port1 = ntohs(tcphdr->th_sport); 217 | af_6tuple->port2 = ntohs(tcphdr->th_dport); 218 | #else 219 | af_6tuple->port1 = ntohs(tcphdr->source); 220 | af_6tuple->port2 = ntohs(tcphdr->dest); 221 | #endif 222 | 223 | #ifdef darwin 224 | if (tcphdr->th_flags == TH_SYN) 225 | #else 226 | if (tcphdr->syn) 227 | #endif 228 | return 1; 229 | else 230 | return 0; 231 | default: 232 | af_6tuple->protocol = 0; 233 | af_6tuple->port1 = 0; 234 | af_6tuple->port2 = 0; 235 | return 0; 236 | } 237 | } 238 | 239 | static int pcap_handle_ipv4(struct af_6tuple *af_6tuple, const u_char *bytes, 240 | size_t len) 241 | { 242 | struct ip *iphdr; 243 | 244 | if (len < sizeof(*iphdr)) 245 | return -1; 246 | 247 | iphdr = (struct ip *)bytes; 248 | if (len > ntohs(iphdr->ip_len)) 249 | len = ntohs(iphdr->ip_len); 250 | 251 | if (len < 4 * iphdr->ip_hl) 252 | return -1; 253 | 254 | len -= 4 * iphdr->ip_hl; 255 | bytes += 4 * iphdr->ip_hl; 256 | 257 | af_6tuple->af_family = AF_INET; 258 | af_6tuple->ip1.v4 = iphdr->ip_src; 259 | af_6tuple->ip2.v4 = iphdr->ip_dst; 260 | 261 | return pcap_handle_layer4(af_6tuple, bytes, len, iphdr->ip_p); 262 | } 263 | 264 | static int pcap_handle_ipv6(struct af_6tuple *af_6tuple, const u_char *bytes, 265 | size_t len) 266 | { 267 | struct ip6_hdr *iphdr; 268 | struct ip6_opt *opthdr; 269 | int curheader = 255; 270 | uint8_t nexthdr; 271 | 272 | while (1) { 273 | switch (curheader) { 274 | case 255: 275 | if (len < sizeof(*iphdr)) 276 | return -1; 277 | iphdr = (struct ip6_hdr *)bytes; 278 | bytes += sizeof(*iphdr); 279 | len -= sizeof(*iphdr); 280 | nexthdr = iphdr->ip6_ctlun.ip6_un1.ip6_un1_nxt; 281 | 282 | af_6tuple->af_family = AF_INET6; 283 | af_6tuple->ip1.v6 = iphdr->ip6_src; 284 | af_6tuple->ip2.v6 = iphdr->ip6_dst; 285 | break; 286 | case IPPROTO_HOPOPTS: 287 | case IPPROTO_ROUTING: 288 | case IPPROTO_DSTOPTS: 289 | if (len < sizeof(*opthdr)) 290 | return -1; 291 | nexthdr = bytes[0]; 292 | 293 | opthdr = (struct ip6_opt *)bytes; 294 | if (len < ((1u + opthdr->ip6o_len) * 8u)) 295 | return -1; 296 | bytes += (1u + opthdr->ip6o_len) * 8u; 297 | len -= (1u + opthdr->ip6o_len) * 8u; 298 | break; 299 | case IPPROTO_FRAGMENT: 300 | if (len < 1) 301 | return -1; 302 | nexthdr = bytes[0]; 303 | if (len < 8) 304 | return -1; 305 | bytes += 8; 306 | len -= 8; 307 | break; 308 | case IPPROTO_NONE: 309 | return -1; 310 | default: 311 | return pcap_handle_layer4(af_6tuple, bytes, len, 312 | nexthdr); 313 | }; 314 | curheader = nexthdr; 315 | } 316 | } 317 | 318 | static int pcap_handle_ip(struct af_6tuple *af_6tuple, const u_char *bytes, 319 | size_t len) 320 | { 321 | if (len < 1) 322 | return -1; 323 | 324 | /* IP header */ 325 | if ((bytes[0] >> 4) == 4) 326 | return pcap_handle_ipv4(af_6tuple, bytes, len); 327 | 328 | if ((bytes[0] >> 4) == 6) 329 | return pcap_handle_ipv6(af_6tuple, bytes, len); 330 | 331 | return -1; 332 | } 333 | 334 | static int pcap_handle_ethernet(struct af_6tuple *af_6tuple, 335 | const struct pcap_pkthdr *h, 336 | const u_char *bytes) 337 | { 338 | size_t len = h->caplen; 339 | struct ether_header *ethhdr; 340 | 341 | /* Ethernet header */ 342 | if (len < sizeof(*ethhdr)) 343 | return - 1; 344 | 345 | ethhdr = (struct ether_header *)bytes; 346 | len -= sizeof(*ethhdr); 347 | bytes += sizeof(*ethhdr); 348 | 349 | struct vlan_header *vlanhdr; 350 | uint16_t etype = ntohs(ethhdr->ether_type); 351 | 352 | /* VLAN header, IEEE 802.1Q */ 353 | if (etype == ETHERTYPE_VLAN) { 354 | vlanhdr = (struct vlan_header *)bytes; 355 | etype = ntohs(vlanhdr->tpid); 356 | bytes += sizeof(*vlanhdr); 357 | len -= sizeof(*vlanhdr); 358 | af_6tuple->is_vlan = 1; 359 | } else { 360 | af_6tuple->is_vlan = 0; 361 | } 362 | 363 | if (etype != ETHERTYPE_IP && etype != ETHERTYPE_IPV6) 364 | return -1; 365 | 366 | return pcap_handle_ip(af_6tuple, bytes, len); 367 | } 368 | 369 | static void process_trace(void) 370 | { 371 | struct pcap_pkthdr hdr; 372 | int syn_detected; 373 | struct ip_pair *pair = NULL; 374 | pcap_dumper_t *dumper = NULL; 375 | u_char *pkt = NULL; 376 | char *fname = NULL; 377 | struct af_6tuple af_6tuple; 378 | 379 | while ((pkt = (u_char *)pcap_next(inputp, &hdr)) != NULL) { 380 | syn_detected = pcap_handle_ethernet(&af_6tuple, &hdr, pkt); 381 | if (syn_detected < 0) 382 | continue; 383 | 384 | switch (af_6tuple.protocol) { 385 | case IPPROTO_TCP: 386 | /* always accept tcp */ 387 | break; 388 | case IPPROTO_UDP: 389 | if (!isset_bits(dump_allowed, DUMP_UDP_ALLOWED)) 390 | // Omit the UDP packets 391 | continue; 392 | break; 393 | default: 394 | if (!isset_bits(dump_allowed, DUMP_OTHER_ALLOWED)) 395 | // Omit the other packets 396 | continue; 397 | break; 398 | } 399 | 400 | // Search for the ip_pair of specific six-tuple 401 | pair = find_ip_pair(af_6tuple); 402 | if (pair == NULL) { 403 | if ((af_6tuple.protocol == IPPROTO_TCP) && 404 | !syn_detected && 405 | !isset_bits(dump_allowed, DUMP_TCP_NOSYN_ALLOWED)) { 406 | // No SYN detected and don't create a new flow 407 | continue; 408 | } 409 | pair = register_ip_pair(af_6tuple); 410 | switch (af_6tuple.protocol) { 411 | case IPPROTO_TCP: 412 | if (syn_detected) 413 | pair->pdf.status = STS_TCP_SYN; 414 | else 415 | pair->pdf.status = STS_TCP_NOSYN; 416 | break; 417 | case IPPROTO_UDP: 418 | pair->pdf.status = STS_UDP; 419 | break; 420 | default: 421 | pair->pdf.status = STS_UNSET; 422 | break; 423 | } 424 | } 425 | 426 | // Fill the ip_pair with information of the current flow 427 | if (pair->pdf.pkts == 0) { 428 | // A new flow item reated with empty dump file object 429 | fname = new_file_name(af_6tuple, hdr.ts.tv_sec); 430 | pair->pdf.file_name = fname; 431 | pair->pdf.start_time = hdr.ts.tv_sec; 432 | } else { 433 | if (hdr.ts.tv_sec - pair->pdf.start_time >= FLOW_TIMEOUT) { 434 | // Rest the pair to start a new flow with the same 6-tuple, but with 435 | // the different name and timestamp 436 | reset_pdf(&(pair->pdf)); 437 | fname = new_file_name(af_6tuple, hdr.ts.tv_sec); 438 | pair->pdf.file_name = fname; 439 | pair->pdf.start_time = hdr.ts.tv_sec; 440 | 441 | switch (af_6tuple.protocol) { 442 | case IPPROTO_TCP: 443 | if (syn_detected) 444 | pair->pdf.status = STS_TCP_SYN; 445 | else 446 | pair->pdf.status = STS_TCP_NOSYN; 447 | break; 448 | case IPPROTO_UDP: 449 | pair->pdf.status = STS_UDP; 450 | break; 451 | default: 452 | pair->pdf.status = STS_UNSET; 453 | break; 454 | } 455 | } 456 | } 457 | 458 | // Dump the packet to file and close the file 459 | fname = resemble_file_path(&(pair->pdf)); 460 | FILE *f = fopen(fname, "ab"); 461 | if (!f) { 462 | fprintf(stderr, "Failed to open output file '%s'\n", fname); 463 | goto skip_dump_write; 464 | } 465 | 466 | if (pair->pdf.pkts == 0) { 467 | // Call the pcap_dump_fopen to write the pcap file header first 468 | // to the new file 469 | dumper = pcap_dump_fopen(inputp, f); 470 | } else { 471 | // Write the packet only 472 | dumper = (pcap_dumper_t *)f; 473 | } 474 | // Dump the packet now 475 | pcap_dump((u_char *)dumper, &hdr, (unsigned char *)pkt); 476 | pcap_dump_close(dumper); 477 | 478 | skip_dump_write: 479 | free(fname); 480 | pair->pdf.pkts++; 481 | } 482 | } 483 | 484 | 485 | static void close_trace_files(void) 486 | { 487 | pcap_close(inputp); 488 | } 489 | 490 | 491 | int main(int argc, char *argv[]) 492 | { 493 | parseargs(argc, argv); 494 | open_trace_file(); 495 | init_hash_table(); 496 | process_trace(); 497 | close_trace_files(); 498 | free_hash_table(); 499 | exit(0); 500 | } 501 | -------------------------------------------------------------------------------- /pkt2flow.h: -------------------------------------------------------------------------------- 1 | 2 | /* pkt2flow 3 | * Xiaming Chen (chen_xm@sjtu.edu.cn) 4 | * 5 | * Copyright (c) 2012 6 | * Copyright (c) 2014 Sven Eckelmann 7 | * 8 | * Permission is hereby granted, free of charge, to any person 9 | * obtaining a copy of this software and associated documentation files 10 | * (the "Software"), to deal in the Software without restriction, 11 | * including without limitation the rights to use, copy, modify, merge, 12 | * publish, distribute, sublicense, and/or sell copies of the Software, 13 | * and to permit persons to whom the Software is furnished to do so, 14 | * subject to the following conditions: 15 | * 16 | * The above copyright notice and this permission notice shall be 17 | * included in all copies or substantial portions of the Software. 18 | * 19 | * The names and trademarks of copyright holders may not be used in 20 | * advertising or publicity pertaining to the software without specific 21 | * prior permission. Title to copyright in this software and any 22 | * associated documentation will at all times remain with the copyright 23 | * holders. 24 | * 25 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 26 | * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 27 | * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 28 | * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 29 | * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 30 | * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 31 | * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 32 | * SOFTWARE. 33 | */ 34 | 35 | #include 36 | #include 37 | #include 38 | 39 | #define __SOURCE_VERSION__ "1.2" 40 | #define __AUTHOR__ "X. Chen (chenxm35@gmail.com)" 41 | #define __GLOBAL_NAME__ "pkt2flow" 42 | #define FLOW_TIMEOUT 1800 // seconds 43 | #define HASH_MULTIPLIER 37 44 | #define HASH_TBL_SIZE 48611 45 | 46 | #define BIT(bitnr) (1ULL << (bitnr)) 47 | #define isset_bits(x, bitmask) ({ typeof(bitmask) _bitmask = (bitmask); \ 48 | (_bitmask & (x)) == _bitmask; }) 49 | 50 | enum dump_allow_flags { 51 | DUMP_OTHER_ALLOWED = BIT(0), 52 | DUMP_TCP_NOSYN_ALLOWED = BIT(1), 53 | DUMP_UDP_ALLOWED = BIT(2), 54 | }; 55 | 56 | enum pkt_dump_file_status { 57 | STS_UNSET, 58 | STS_TCP_SYN, 59 | STS_TCP_NOSYN, 60 | STS_UDP, 61 | }; 62 | 63 | struct pkt_dump_file { 64 | char *file_name; 65 | unsigned long pkts; 66 | 67 | enum pkt_dump_file_status status; 68 | unsigned long start_time; 69 | }; 70 | 71 | /* VLAN header, IEEE 802.1Q */ 72 | struct vlan_header { 73 | uint16_t tci; /* Priority 3bits, CFI 1bit, ID 12bits */ 74 | uint16_t tpid; 75 | }; 76 | 77 | union ip_address { 78 | struct in_addr v4; 79 | struct in6_addr v6; 80 | }; 81 | 82 | struct af_6tuple { 83 | int af_family; 84 | int protocol; 85 | union ip_address ip1, ip2; 86 | uint16_t port1, port2; 87 | uint8_t is_vlan; 88 | }; 89 | 90 | struct ip_pair { 91 | struct af_6tuple af_6tuple; 92 | struct pkt_dump_file pdf; 93 | struct ip_pair *next; 94 | }; 95 | 96 | /* pkt2flow.c */ 97 | extern struct ip_pair *pairs[]; 98 | 99 | /* utilities.c */ 100 | 101 | /* 102 | * Generate a new file name for flow with 4-tuple and timestamp 103 | */ 104 | char *new_file_name(struct af_6tuple af_6tuple, unsigned long timestamp); 105 | 106 | /* flow_db.c */ 107 | 108 | /* 109 | * Initialize the flow hash table to store registered flow items 110 | */ 111 | void init_hash_table(void); 112 | 113 | /* 114 | * Free the flow has table 115 | */ 116 | void free_hash_table(void); 117 | 118 | /* 119 | * Search for the flow in the flow hash table with specific 4-tuple; 120 | * If the flow item exists in the hash table, the pointer to the ip_pair will be 121 | * returned. 122 | * Otherwise, NULL returned; 123 | */ 124 | struct ip_pair *find_ip_pair(struct af_6tuple af_6tuple); 125 | 126 | /* 127 | * To register a new flow item in the flow hash table. This is uaually called 128 | * after finding the flow item with NULL returned. 129 | * The pointer to the new registerd ip_pair will be returned; and the pdf will 130 | * be reset as empty. 131 | */ 132 | struct ip_pair *register_ip_pair(struct af_6tuple af_6tuple); 133 | 134 | /* 135 | * Reset the packet dump file (pdf) for: 1) a new ip_pair created; 136 | * 2) a timeout flow with new status. 137 | * The pdf will be reset with: zero packets, zero timestamp, 138 | * and file name bytes all set to be '\0' 139 | */ 140 | void reset_pdf(struct pkt_dump_file *f); 141 | 142 | -------------------------------------------------------------------------------- /utilities.c: -------------------------------------------------------------------------------- 1 | /* pkt2flow 2 | * Xiaming Chen (chen_xm@sjtu.edu.cn) 3 | * 4 | * Copyright (c) 2012 5 | * Copyright (c) 2014 Sven Eckelmann 6 | * 7 | * Permission is hereby granted, free of charge, to any person 8 | * obtaining a copy of this software and associated documentation files 9 | * (the "Software"), to deal in the Software without restriction, 10 | * including without limitation the rights to use, copy, modify, merge, 11 | * publish, distribute, sublicense, and/or sell copies of the Software, 12 | * and to permit persons to whom the Software is furnished to do so, 13 | * subject to the following conditions: 14 | * 15 | * The above copyright notice and this permission notice shall be 16 | * included in all copies or substantial portions of the Software. 17 | * 18 | * The names and trademarks of copyright holders may not be used in 19 | * advertising or publicity pertaining to the software without specific 20 | * prior permission. Title to copyright in this software and any 21 | * associated documentation will at all times remain with the copyright 22 | * holders. 23 | * 24 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 25 | * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 26 | * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 27 | * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 28 | * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 29 | * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 30 | * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 31 | * SOFTWARE. 32 | */ 33 | 34 | #include 35 | #include 36 | #include 37 | #include 38 | #include 39 | #include 40 | #include "pkt2flow.h" 41 | 42 | char *new_file_name(struct af_6tuple af_6tuple, unsigned long timestamp) 43 | { 44 | char *fname; 45 | char src_ip_str[INET6_ADDRSTRLEN]; 46 | char dst_ip_str[INET6_ADDRSTRLEN]; 47 | int ret; 48 | 49 | switch (af_6tuple.af_family) { 50 | case AF_INET: 51 | inet_ntop(AF_INET, &af_6tuple.ip1.v4, src_ip_str, INET_ADDRSTRLEN); 52 | inet_ntop(AF_INET, &af_6tuple.ip2.v4, dst_ip_str, INET_ADDRSTRLEN); 53 | break; 54 | case AF_INET6: 55 | inet_ntop(AF_INET6, &af_6tuple.ip1.v6, src_ip_str, INET6_ADDRSTRLEN); 56 | inet_ntop(AF_INET6, &af_6tuple.ip2.v6, dst_ip_str, INET6_ADDRSTRLEN); 57 | break; 58 | } 59 | 60 | switch (af_6tuple.is_vlan) { 61 | case 0: 62 | ret = asprintf(&fname, "%s_%"PRIu16"_%s_%"PRIu16"_%lu.pcap", 63 | src_ip_str, af_6tuple.port1, dst_ip_str, af_6tuple.port2, 64 | timestamp); 65 | break; 66 | case 1: 67 | ret = asprintf(&fname, "%s_%"PRIu16"_%s_%"PRIu16"_%lu_vlan.pcap", 68 | src_ip_str, af_6tuple.port1, dst_ip_str, af_6tuple.port2, 69 | timestamp); 70 | break; 71 | } 72 | 73 | if (ret < 0) 74 | fname = NULL; 75 | 76 | return fname; 77 | } 78 | --------------------------------------------------------------------------------