├── Makefile ├── .gitignore ├── README.md ├── af_packet_classic.cpp ├── af_packet_classic_fanout.cpp ├── af_packet_rx_ring.cpp ├── af_packet_rx_ring_fanout.cpp └── LICENSE /Makefile: -------------------------------------------------------------------------------- 1 | build: 2 | g++ af_packet_classic.cpp -o af_packet_classic -lpthread 3 | g++ af_packet_classic_fanout.cpp -o af_packet_classic_fanout -lboost_thread -lboost_system -lpthread 4 | g++ af_packet_rx_ring.cpp -o af_packet_rx_ring -lpthread 5 | g++ af_packet_rx_ring_fanout.cpp -o af_packet_rx_ring_fanout -lboost_thread -lboost_system -lpthread 6 | 7 | 8 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Prerequisites 2 | *.d 3 | 4 | # Compiled Object files 5 | *.slo 6 | *.lo 7 | *.o 8 | *.obj 9 | 10 | # Precompiled Headers 11 | *.gch 12 | *.pch 13 | 14 | # Compiled Dynamic libraries 15 | *.so 16 | *.dylib 17 | *.dll 18 | 19 | # Fortran module files 20 | *.mod 21 | *.smod 22 | 23 | # Compiled Static libraries 24 | *.lai 25 | *.la 26 | *.a 27 | *.lib 28 | 29 | # Executables 30 | *.exe 31 | *.out 32 | *.app 33 | 34 | af_packet_classic 35 | af_packet_classic_fanout 36 | af_packet_rx_ring 37 | af_packet_rx_ring_fanout 38 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Intro 2 | In this repository you can find examples of using AF_PACKET v3 to capture tens of millions packets per second on Linux 3 | 4 | # More information 5 | 6 | Please read this [article](https://pavel.network/capturing-packets-in-linux-at-a-speed-of-millions-of-packets-per-second-without-using-third-party-libraries/) to understand more about things in this repository. 7 | 8 | # How to build 9 | 10 | We recommend using Ubuntu or Debian for building these examples 11 | 12 | `sudo apt-get install -y libboost-thread-dev` 13 | 14 | Then run: 15 | `make` 16 | 17 | There area three binaries from slowest to fastest: 18 | - af_packet_classic 19 | - af_packet_classic_fanout 20 | - af_packet_rx_ring 21 | - af_packet_rx_ring_fanout 22 | 23 | -------------------------------------------------------------------------------- /af_packet_classic.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | 6 | #include 7 | #include 8 | #include 9 | 10 | #include 11 | #include 12 | #include 13 | #include 14 | #include 15 | #include /* the L2 protocols */ 16 | 17 | /* 18 | Parser files: 19 | https://github.com/FastVPSEestiOu/fastnetmon/blob/master/src/fastnetmon_packet_parser.c 20 | https://github.com/FastVPSEestiOu/fastnetmon/blob/master/src/fastnetmon_packet_parser.h 21 | */ 22 | // Copy and paste from netmap code 23 | void consume_pkt(u_char* buffer, int len) { 24 | /* 25 | struct pfring_pkthdr packet_header; 26 | memset(&packet_header, 0, sizeof(packet_header)); 27 | packet_header.len = len; 28 | packet_header.caplen = len; 29 | 30 | // We do not calculate timestamps because timestamping is very CPU intensive operation: 31 | // https://github.com/ntop/PF_RING/issues/9 32 | u_int8_t timestamp = 0; 33 | u_int8_t add_hash = 0; 34 | fastnetmon_parse_pkt((u_char*)buffer, &packet_header, 4, timestamp, add_hash); 35 | */ 36 | 37 | //char print_buffer[512]; 38 | //fastnetmon_print_parsed_pkt(print_buffer, 512, (u_char*)buffer, &packet_header); 39 | //printf("%s\n", print_buffer); 40 | // logger.info("%s", print_buffer); 41 | } 42 | 43 | // Get interface number by name 44 | int get_interface_number_by_device_name(int socket_fd, std::string interface_name) { 45 | struct ifreq ifr; 46 | memset(&ifr, 0, sizeof(ifr)); 47 | 48 | if (interface_name.size() > IFNAMSIZ) { 49 | return -1; 50 | } 51 | 52 | strncpy(ifr.ifr_name, interface_name.c_str(), sizeof(ifr.ifr_name)); 53 | 54 | if (ioctl(socket_fd, SIOCGIFINDEX, &ifr) == -1) { 55 | return -1; 56 | } 57 | 58 | return ifr.ifr_ifindex; 59 | } 60 | 61 | uint64_t received_packets = 0; 62 | 63 | void speed_printer() { 64 | while (true) { 65 | uint64_t packets_before = received_packets; 66 | 67 | std::this_thread::sleep_for(std::chrono::seconds(1)); 68 | 69 | uint64_t packets_after = received_packets; 70 | uint64_t pps = packets_after - packets_before; 71 | 72 | std::cout << "We process: " << pps << " pps" << std::endl; 73 | } 74 | } 75 | 76 | int setup_socket(std::string interface_name) { 77 | // More details here: http://man7.org/linux/man-pages/man7/packet.7.html 78 | // We could use SOCK_RAW or SOCK_DGRAM for second argument 79 | // SOCK_RAW - raw packets pass from the kernel 80 | // SOCK_DGRAM - some amount of processing 81 | // Third argument manage ether type of captured packets 82 | int packet_socket = socket(AF_PACKET, SOCK_RAW, htons(ETH_P_ALL)); 83 | 84 | if (packet_socket == -1) { 85 | printf("Can't create AF_PACKET socket\n"); 86 | return -1; 87 | } 88 | 89 | int interface_number = get_interface_number_by_device_name(packet_socket, interface_name); 90 | 91 | if (interface_number == -1) { 92 | printf("Can't get interface number by interface name\n"); 93 | return -1; 94 | } 95 | 96 | // Switch to PROMISC mode 97 | struct packet_mreq sock_params; 98 | memset(&sock_params, 0, sizeof(sock_params)); 99 | sock_params.mr_type = PACKET_MR_PROMISC; 100 | sock_params.mr_ifindex = interface_number; 101 | 102 | int set_promisc = setsockopt(packet_socket, SOL_PACKET, PACKET_ADD_MEMBERSHIP, (void *)&sock_params, sizeof(sock_params)); 103 | 104 | if (set_promisc == -1) { 105 | printf("Can't enable promisc mode\n"); 106 | return -1; 107 | } 108 | 109 | struct sockaddr_ll bind_address; 110 | memset(&bind_address, 0, sizeof(bind_address)); 111 | 112 | bind_address.sll_family = AF_PACKET; 113 | bind_address.sll_protocol = htons(ETH_P_ALL); 114 | bind_address.sll_ifindex = interface_number; 115 | 116 | int bind_result = bind(packet_socket, (struct sockaddr *)&bind_address, sizeof(bind_address)); 117 | 118 | if (bind_result == -1) { 119 | printf("Can't bind to AF_PACKET socket\n"); 120 | return -1; 121 | } 122 | 123 | return packet_socket; 124 | } 125 | 126 | void start_af_packet_capture(std::string interface_name) { 127 | int packet_socket = setup_socket(interface_name); 128 | 129 | if (packet_socket == -1) { 130 | printf("Can't create socket\n"); 131 | return; 132 | } 133 | 134 | unsigned int capture_length = 1500; 135 | char buffer[capture_length]; 136 | 137 | while (true) { 138 | received_packets++; 139 | 140 | int readed_bytes = read(packet_socket, buffer, capture_length); 141 | 142 | // printf("Got %d bytes from interface\n", readed_bytes); 143 | 144 | consume_pkt((u_char*)buffer, readed_bytes); 145 | 146 | if (readed_bytes < 0) { 147 | break; 148 | } 149 | } 150 | } 151 | 152 | int main() { 153 | std::thread speed_printer_thread( speed_printer ); 154 | 155 | start_af_packet_capture("eth6"); 156 | 157 | speed_printer_thread.join(); 158 | } 159 | -------------------------------------------------------------------------------- /af_packet_classic_fanout.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | 7 | #include 8 | 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include 14 | #include /* the L2 protocols */ 15 | 16 | /* 17 | Parser files: 18 | https://github.com/FastVPSEestiOu/fastnetmon/blob/master/src/fastnetmon_packet_parser.c 19 | https://github.com/FastVPSEestiOu/fastnetmon/blob/master/src/fastnetmon_packet_parser.h 20 | */ 21 | // Copy and paste from netmap code 22 | void consume_pkt(u_char* buffer, int len) { 23 | /* 24 | struct pfring_pkthdr packet_header; 25 | memset(&packet_header, 0, sizeof(packet_header)); 26 | packet_header.len = len; 27 | packet_header.caplen = len; 28 | 29 | // We do not calculate timestamps because timestamping is very CPU intensive operation: 30 | // https://github.com/ntop/PF_RING/issues/9 31 | u_int8_t timestamp = 0; 32 | u_int8_t add_hash = 0; 33 | fastnetmon_parse_pkt((u_char*)buffer, &packet_header, 4, timestamp, add_hash); 34 | */ 35 | 36 | //char print_buffer[512]; 37 | //fastnetmon_print_parsed_pkt(print_buffer, 512, (u_char*)buffer, &packet_header); 38 | //printf("%s\n", print_buffer); 39 | // logger.info("%s", print_buffer); 40 | } 41 | 42 | // Get interface number by name 43 | int get_interface_number_by_device_name(int socket_fd, std::string interface_name) { 44 | struct ifreq ifr; 45 | memset(&ifr, 0, sizeof(ifr)); 46 | 47 | if (interface_name.size() > IFNAMSIZ) { 48 | return -1; 49 | } 50 | 51 | strncpy(ifr.ifr_name, interface_name.c_str(), sizeof(ifr.ifr_name)); 52 | 53 | if (ioctl(socket_fd, SIOCGIFINDEX, &ifr) == -1) { 54 | return -1; 55 | } 56 | 57 | return ifr.ifr_ifindex; 58 | } 59 | 60 | uint64_t received_packets = 0; 61 | 62 | void speed_printer() { 63 | while (true) { 64 | uint64_t packets_before = received_packets; 65 | 66 | boost::this_thread::sleep(boost::posix_time::seconds(1)); 67 | 68 | uint64_t packets_after = received_packets; 69 | uint64_t pps = packets_after - packets_before; 70 | 71 | std::cout << "We process: " << pps << " pps" << std::endl; 72 | } 73 | } 74 | 75 | int setup_socket(std::string interface_name, int fanout_group_id) { 76 | // More details here: http://man7.org/linux/man-pages/man7/packet.7.html 77 | // We could use SOCK_RAW or SOCK_DGRAM for second argument 78 | // SOCK_RAW - raw packets pass from the kernel 79 | // SOCK_DGRAM - some amount of processing 80 | // Third argument manage ether type of captured packets 81 | int packet_socket = socket(AF_PACKET, SOCK_RAW, htons(ETH_P_ALL)); 82 | 83 | if (packet_socket == -1) { 84 | printf("Can't create AF_PACKET socket\n"); 85 | return -1; 86 | } 87 | 88 | int interface_number = get_interface_number_by_device_name(packet_socket, interface_name); 89 | 90 | if (interface_number == -1) { 91 | printf("Can't get interface number by interface name\n"); 92 | return -1; 93 | } 94 | 95 | // Switch to PROMISC mode 96 | struct packet_mreq sock_params; 97 | memset(&sock_params, 0, sizeof(sock_params)); 98 | sock_params.mr_type = PACKET_MR_PROMISC; 99 | sock_params.mr_ifindex = interface_number; 100 | 101 | int set_promisc = setsockopt(packet_socket, SOL_PACKET, PACKET_ADD_MEMBERSHIP, (void *)&sock_params, sizeof(sock_params)); 102 | 103 | if (set_promisc == -1) { 104 | printf("Can't enable promisc mode\n"); 105 | return -1; 106 | } 107 | 108 | struct sockaddr_ll bind_address; 109 | memset(&bind_address, 0, sizeof(bind_address)); 110 | 111 | bind_address.sll_family = AF_PACKET; 112 | bind_address.sll_protocol = htons(ETH_P_ALL); 113 | bind_address.sll_ifindex = interface_number; 114 | 115 | int bind_result = bind(packet_socket, (struct sockaddr *)&bind_address, sizeof(bind_address)); 116 | 117 | if (bind_result == -1) { 118 | printf("Can't bind to AF_PACKET socket\n"); 119 | return -1; 120 | } 121 | 122 | if (fanout_group_id) { 123 | // PACKET_FANOUT_LB - round robin 124 | // PACKET_FANOUT_CPU - send packets to CPU where packet arrived 125 | int fanout_type = PACKET_FANOUT_CPU; 126 | 127 | int fanout_arg = (fanout_group_id | (fanout_type << 16)); 128 | 129 | int setsockopt_fanout = setsockopt(packet_socket, SOL_PACKET, PACKET_FANOUT, &fanout_arg, sizeof(fanout_arg)); 130 | 131 | if (setsockopt_fanout < 0) { 132 | printf("Can't configure fanout\n"); 133 | return -1; 134 | } 135 | } 136 | 137 | // Most challenging option: PACKET_TX_RING 138 | return packet_socket; 139 | } 140 | 141 | void start_af_packet_capture(std::string interface_name, int fanout_group_id) { 142 | int packet_socket = setup_socket(interface_name, fanout_group_id); 143 | 144 | if (packet_socket == -1) { 145 | printf("Can't create socket\n"); 146 | return; 147 | } 148 | 149 | unsigned int capture_length = 1500; 150 | char buffer[capture_length]; 151 | 152 | while (true) { 153 | received_packets++; 154 | 155 | int readed_bytes = read(packet_socket, buffer, capture_length); 156 | 157 | // printf("Got %d bytes from interface\n", readed_bytes); 158 | 159 | consume_pkt((u_char*)buffer, readed_bytes); 160 | 161 | if (readed_bytes < 0) { 162 | break; 163 | } 164 | } 165 | } 166 | 167 | // Could get some speed up on NUMA servers 168 | bool execute_strict_cpu_affinity = false; 169 | 170 | int main() { 171 | boost::thread speed_printer_thread( speed_printer ); 172 | 173 | int fanout_group_id = getpid() & 0xffff; 174 | 175 | boost::thread_group packet_receiver_thread_group; 176 | 177 | unsigned int num_cpus = 8; 178 | 179 | for (int cpu = 0; cpu < num_cpus; cpu++) { 180 | boost::thread::attributes thread_attrs; 181 | 182 | if (execute_strict_cpu_affinity) { 183 | cpu_set_t current_cpu_set; 184 | 185 | int cpu_to_bind = cpu % num_cpus; 186 | CPU_ZERO(¤t_cpu_set); 187 | // We count cpus from zero 188 | CPU_SET(cpu_to_bind, ¤t_cpu_set); 189 | 190 | int set_affinity_result = pthread_attr_setaffinity_np(thread_attrs.native_handle(), sizeof(cpu_set_t), ¤t_cpu_set); 191 | 192 | if (set_affinity_result != 0) { 193 | printf("Can't set CPU affinity for thread\n"); 194 | } 195 | } 196 | 197 | packet_receiver_thread_group.add_thread( 198 | new boost::thread(thread_attrs, boost::bind(start_af_packet_capture, "eth6", fanout_group_id)) 199 | ); 200 | } 201 | 202 | // Wait all processes for finish 203 | packet_receiver_thread_group.join_all(); 204 | 205 | speed_printer_thread.join(); 206 | } 207 | -------------------------------------------------------------------------------- /af_packet_rx_ring.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | 7 | #include 8 | 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include 14 | #include 15 | #include 16 | #include /* the L2 protocols */ 17 | 18 | /* 19 | Parser files: 20 | https://github.com/FastVPSEestiOu/fastnetmon/blob/master/src/fastnetmon_packet_parser.c 21 | https://github.com/FastVPSEestiOu/fastnetmon/blob/master/src/fastnetmon_packet_parser.h 22 | */ 23 | 24 | // 4194304 bytes 25 | unsigned int blocksiz = 1 << 22; 26 | // 2048 bytes 27 | unsigned int framesiz = 1 << 11; 28 | unsigned int blocknum = 64; 29 | 30 | struct block_desc { 31 | uint32_t version; 32 | uint32_t offset_to_priv; 33 | struct tpacket_hdr_v1 h1; 34 | }; 35 | 36 | // Get interface number by name 37 | int get_interface_number_by_device_name(int socket_fd, std::string interface_name) { 38 | struct ifreq ifr; 39 | memset(&ifr, 0, sizeof(ifr)); 40 | 41 | if (interface_name.size() > IFNAMSIZ) { 42 | return -1; 43 | } 44 | 45 | strncpy(ifr.ifr_name, interface_name.c_str(), sizeof(ifr.ifr_name)); 46 | 47 | if (ioctl(socket_fd, SIOCGIFINDEX, &ifr) == -1) { 48 | return -1; 49 | } 50 | 51 | return ifr.ifr_ifindex; 52 | } 53 | 54 | uint64_t received_packets = 0; 55 | uint64_t received_bytes = 0; 56 | 57 | void speed_printer() { 58 | while (true) { 59 | uint64_t packets_before = received_packets; 60 | 61 | std::this_thread::sleep_for( std::chrono::seconds(1) ); 62 | 63 | uint64_t packets_after = received_packets; 64 | uint64_t pps = packets_after - packets_before; 65 | 66 | std::cout << "We process: " << pps << " pps" << std::endl; 67 | } 68 | } 69 | 70 | void flush_block(struct block_desc *pbd) { 71 | pbd->h1.block_status = TP_STATUS_KERNEL; 72 | } 73 | 74 | void walk_block(struct block_desc *pbd, const int block_num) { 75 | int num_pkts = pbd->h1.num_pkts, i; 76 | unsigned long bytes = 0; 77 | struct tpacket3_hdr *ppd; 78 | 79 | ppd = (struct tpacket3_hdr *) ((uint8_t *) pbd + 80 | pbd->h1.offset_to_first_pkt); 81 | for (i = 0; i < num_pkts; ++i) { 82 | bytes += ppd->tp_snaplen; 83 | 84 | // struct ethhdr *eth = (struct ethhdr *) ((uint8_t *) ppd + ppd->tp_mac); 85 | // Print packets 86 | 87 | // #define PRINT_PACKETS 88 | #ifdef PRINT_PACKETS 89 | struct pfring_pkthdr packet_header; 90 | memset(&packet_header, 0, sizeof(packet_header)); 91 | packet_header.len = ppd->tp_snaplen; 92 | packet_header.caplen = ppd->tp_snaplen; 93 | 94 | u_int8_t timestamp = 0; 95 | u_int8_t add_hash = 0; 96 | 97 | u_char* data_pointer = (u_char*)((uint8_t *) ppd + ppd->tp_mac); 98 | 99 | fastnetmon_parse_pkt(data_pointer, &packet_header, 4, timestamp, add_hash); 100 | 101 | char print_buffer[512]; 102 | fastnetmon_print_parsed_pkt(print_buffer, 512, data_pointer, &packet_header); 103 | printf("%s\n", print_buffer); 104 | #endif 105 | 106 | ppd = (struct tpacket3_hdr *) ((uint8_t *) ppd + 107 | ppd->tp_next_offset); 108 | } 109 | 110 | received_packets += num_pkts; 111 | received_bytes += bytes; 112 | } 113 | 114 | int setup_socket(std::string interface_name) { 115 | // More details here: http://man7.org/linux/man-pages/man7/packet.7.html 116 | // We could use SOCK_RAW or SOCK_DGRAM for second argument 117 | // SOCK_RAW - raw packets pass from the kernel 118 | // SOCK_DGRAM - some amount of processing 119 | // Third argument manage ether type of captured packets 120 | int packet_socket = socket(AF_PACKET, SOCK_RAW, htons(ETH_P_ALL)); 121 | 122 | if (packet_socket == -1) { 123 | printf("Can't create AF_PACKET socket\n"); 124 | return -1; 125 | } 126 | 127 | // We whould use V3 bcause it could read/pool in per block basis instead per packet 128 | int version = TPACKET_V3; 129 | int setsockopt_packet_version = setsockopt(packet_socket, SOL_PACKET, PACKET_VERSION, &version, sizeof(version)); 130 | 131 | if (setsockopt_packet_version < 0) { 132 | printf("Can't set packet v3 version\n"); 133 | return -1; 134 | } 135 | 136 | int interface_number = get_interface_number_by_device_name(packet_socket, interface_name); 137 | 138 | if (interface_number == -1) { 139 | printf("Can't get interface number by interface name\n"); 140 | return -1; 141 | } 142 | 143 | // Switch to PROMISC mode 144 | struct packet_mreq sock_params; 145 | memset(&sock_params, 0, sizeof(sock_params)); 146 | sock_params.mr_type = PACKET_MR_PROMISC; 147 | sock_params.mr_ifindex = interface_number; 148 | 149 | int set_promisc = setsockopt(packet_socket, SOL_PACKET, PACKET_ADD_MEMBERSHIP, (void *)&sock_params, sizeof(sock_params)); 150 | 151 | if (set_promisc == -1) { 152 | printf("Can't enable promisc mode\n"); 153 | return -1; 154 | } 155 | 156 | struct sockaddr_ll bind_address; 157 | memset(&bind_address, 0, sizeof(bind_address)); 158 | 159 | bind_address.sll_family = AF_PACKET; 160 | bind_address.sll_protocol = htons(ETH_P_ALL); 161 | bind_address.sll_ifindex = interface_number; 162 | 163 | // We will follow http://yusufonlinux.blogspot.ru/2010/11/data-link-access-and-zero-copy.html 164 | // And this: https://www.kernel.org/doc/Documentation/networking/packet_mmap.txt 165 | 166 | struct tpacket_req3 req; 167 | memset(&req, 0, sizeof(req)); 168 | 169 | req.tp_block_size = blocksiz; 170 | req.tp_frame_size = framesiz; 171 | req.tp_block_nr = blocknum; 172 | req.tp_frame_nr = (blocksiz * blocknum) / framesiz; 173 | 174 | req.tp_retire_blk_tov = 60; // Timeout in msec 175 | req.tp_feature_req_word = TP_FT_REQ_FILL_RXHASH; 176 | 177 | int setsockopt_rx_ring = setsockopt(packet_socket, SOL_PACKET , PACKET_RX_RING , (void*)&req , sizeof(req)); 178 | 179 | if (setsockopt_rx_ring == -1) { 180 | printf("Can't enable RX_RING for AF_PACKET socket\n"); 181 | return -1; 182 | } 183 | 184 | // We use per thread structures 185 | uint8_t* mapped_buffer = NULL; 186 | struct iovec* rd = NULL; 187 | 188 | mapped_buffer = (uint8_t*)mmap(NULL, req.tp_block_size * req.tp_block_nr, PROT_READ | PROT_WRITE, MAP_SHARED | MAP_LOCKED, packet_socket, 0); 189 | 190 | if (mapped_buffer == MAP_FAILED) { 191 | printf("mmap failed!\n"); 192 | return -1; 193 | } 194 | 195 | // Allocate iov structure for each block 196 | rd = (struct iovec*)malloc(req.tp_block_nr * sizeof(struct iovec)); 197 | 198 | // Initilize iov structures 199 | for (int i = 0; i < req.tp_block_nr; ++i) { 200 | rd[i].iov_base = mapped_buffer + (i * req.tp_block_size); 201 | rd[i].iov_len = req.tp_block_size; 202 | } 203 | 204 | int bind_result = bind(packet_socket, (struct sockaddr *)&bind_address, sizeof(bind_address)); 205 | 206 | if (bind_result == -1) { 207 | printf("Can't bind to AF_PACKET socket\n"); 208 | return -1; 209 | } 210 | 211 | unsigned int current_block_num = 0; 212 | 213 | struct pollfd pfd; 214 | memset(&pfd, 0, sizeof(pfd)); 215 | 216 | pfd.fd = packet_socket; 217 | pfd.events = POLLIN | POLLERR; 218 | pfd.revents = 0; 219 | 220 | while (true) { 221 | struct block_desc *pbd = (struct block_desc *) rd[current_block_num].iov_base; 222 | 223 | if ((pbd->h1.block_status & TP_STATUS_USER) == 0) { 224 | poll(&pfd, 1, -1); 225 | 226 | continue; 227 | } 228 | 229 | walk_block(pbd, current_block_num); 230 | flush_block(pbd); 231 | current_block_num = (current_block_num + 1) % blocknum; 232 | } 233 | 234 | return packet_socket; 235 | } 236 | 237 | void start_af_packet_capture(std::string interface_name) { 238 | setup_socket(interface_name); 239 | } 240 | 241 | // Could get some speed up on NUMA servers 242 | bool execute_strict_cpu_affinity = false; 243 | 244 | int main() { 245 | int fanout_group_id = getpid() & 0xffff; 246 | 247 | std::thread speed_printer_thread( speed_printer ); 248 | 249 | start_af_packet_capture("eth6"); 250 | 251 | speed_printer_thread.join(); 252 | } 253 | -------------------------------------------------------------------------------- /af_packet_rx_ring_fanout.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include 14 | #include 15 | #include /* the L2 protocols */ 16 | 17 | /* 18 | Parser files: 19 | https://github.com/FastVPSEestiOu/fastnetmon/blob/master/src/fastnetmon_packet_parser.c 20 | https://github.com/FastVPSEestiOu/fastnetmon/blob/master/src/fastnetmon_packet_parser.h 21 | */ 22 | 23 | // 4194304 bytes 24 | unsigned int blocksiz = 1 << 22; 25 | // 2048 bytes 26 | unsigned int framesiz = 1 << 11; 27 | unsigned int blocknum = 64; 28 | 29 | struct block_desc { 30 | uint32_t version; 31 | uint32_t offset_to_priv; 32 | struct tpacket_hdr_v1 h1; 33 | }; 34 | 35 | // Get interface number by name 36 | int get_interface_number_by_device_name(int socket_fd, std::string interface_name) { 37 | struct ifreq ifr; 38 | memset(&ifr, 0, sizeof(ifr)); 39 | 40 | if (interface_name.size() > IFNAMSIZ) { 41 | return -1; 42 | } 43 | 44 | strncpy(ifr.ifr_name, interface_name.c_str(), sizeof(ifr.ifr_name)); 45 | 46 | if (ioctl(socket_fd, SIOCGIFINDEX, &ifr) == -1) { 47 | return -1; 48 | } 49 | 50 | return ifr.ifr_ifindex; 51 | } 52 | 53 | uint64_t received_packets = 0; 54 | uint64_t received_bytes = 0; 55 | 56 | void speed_printer() { 57 | while (true) { 58 | uint64_t packets_before = received_packets; 59 | 60 | boost::this_thread::sleep(boost::posix_time::seconds(1)); 61 | 62 | uint64_t packets_after = received_packets; 63 | uint64_t pps = packets_after - packets_before; 64 | 65 | std::cout << "We process: " << pps << " pps" << std::endl; 66 | } 67 | } 68 | 69 | void flush_block(struct block_desc *pbd) { 70 | pbd->h1.block_status = TP_STATUS_KERNEL; 71 | } 72 | 73 | void walk_block(struct block_desc *pbd, const int block_num) { 74 | int num_pkts = pbd->h1.num_pkts, i; 75 | unsigned long bytes = 0; 76 | struct tpacket3_hdr *ppd; 77 | 78 | ppd = (struct tpacket3_hdr *) ((uint8_t *) pbd + 79 | pbd->h1.offset_to_first_pkt); 80 | for (i = 0; i < num_pkts; ++i) { 81 | bytes += ppd->tp_snaplen; 82 | 83 | // struct ethhdr *eth = (struct ethhdr *) ((uint8_t *) ppd + ppd->tp_mac); 84 | // Print packets 85 | 86 | // #define PRINT_PACKETS 87 | #ifdef PRINT_PACKETS 88 | struct pfring_pkthdr packet_header; 89 | memset(&packet_header, 0, sizeof(packet_header)); 90 | packet_header.len = ppd->tp_snaplen; 91 | packet_header.caplen = ppd->tp_snaplen; 92 | 93 | u_int8_t timestamp = 0; 94 | u_int8_t add_hash = 0; 95 | 96 | u_char* data_pointer = (u_char*)((uint8_t *) ppd + ppd->tp_mac); 97 | 98 | fastnetmon_parse_pkt(data_pointer, &packet_header, 4, timestamp, add_hash); 99 | 100 | char print_buffer[512]; 101 | fastnetmon_print_parsed_pkt(print_buffer, 512, data_pointer, &packet_header); 102 | printf("%s\n", print_buffer); 103 | #endif 104 | 105 | ppd = (struct tpacket3_hdr *) ((uint8_t *) ppd + 106 | ppd->tp_next_offset); 107 | } 108 | 109 | received_packets += num_pkts; 110 | received_bytes += bytes; 111 | } 112 | 113 | int setup_socket(std::string interface_name, int fanout_group_id) { 114 | // More details here: http://man7.org/linux/man-pages/man7/packet.7.html 115 | // We could use SOCK_RAW or SOCK_DGRAM for second argument 116 | // SOCK_RAW - raw packets pass from the kernel 117 | // SOCK_DGRAM - some amount of processing 118 | // Third argument manage ether type of captured packets 119 | int packet_socket = socket(AF_PACKET, SOCK_RAW, htons(ETH_P_ALL)); 120 | 121 | if (packet_socket == -1) { 122 | printf("Can't create AF_PACKET socket\n"); 123 | return -1; 124 | } 125 | 126 | // We whould use V3 bcause it could read/pool in per block basis instead per packet 127 | int version = TPACKET_V3; 128 | int setsockopt_packet_version = setsockopt(packet_socket, SOL_PACKET, PACKET_VERSION, &version, sizeof(version)); 129 | 130 | if (setsockopt_packet_version < 0) { 131 | printf("Can't set packet v3 version\n"); 132 | return -1; 133 | } 134 | 135 | int interface_number = get_interface_number_by_device_name(packet_socket, interface_name); 136 | 137 | if (interface_number == -1) { 138 | printf("Can't get interface number by interface name\n"); 139 | return -1; 140 | } 141 | 142 | // Switch to PROMISC mode 143 | struct packet_mreq sock_params; 144 | memset(&sock_params, 0, sizeof(sock_params)); 145 | sock_params.mr_type = PACKET_MR_PROMISC; 146 | sock_params.mr_ifindex = interface_number; 147 | 148 | int set_promisc = setsockopt(packet_socket, SOL_PACKET, PACKET_ADD_MEMBERSHIP, (void *)&sock_params, sizeof(sock_params)); 149 | 150 | if (set_promisc == -1) { 151 | printf("Can't enable promisc mode\n"); 152 | return -1; 153 | } 154 | 155 | struct sockaddr_ll bind_address; 156 | memset(&bind_address, 0, sizeof(bind_address)); 157 | 158 | bind_address.sll_family = AF_PACKET; 159 | bind_address.sll_protocol = htons(ETH_P_ALL); 160 | bind_address.sll_ifindex = interface_number; 161 | 162 | // We will follow http://yusufonlinux.blogspot.ru/2010/11/data-link-access-and-zero-copy.html 163 | // And this: https://www.kernel.org/doc/Documentation/networking/packet_mmap.txt 164 | 165 | struct tpacket_req3 req; 166 | memset(&req, 0, sizeof(req)); 167 | 168 | req.tp_block_size = blocksiz; 169 | req.tp_frame_size = framesiz; 170 | req.tp_block_nr = blocknum; 171 | req.tp_frame_nr = (blocksiz * blocknum) / framesiz; 172 | 173 | req.tp_retire_blk_tov = 60; // Timeout in msec 174 | req.tp_feature_req_word = TP_FT_REQ_FILL_RXHASH; 175 | 176 | int setsockopt_rx_ring = setsockopt(packet_socket, SOL_PACKET , PACKET_RX_RING , (void*)&req , sizeof(req)); 177 | 178 | if (setsockopt_rx_ring == -1) { 179 | printf("Can't enable RX_RING for AF_PACKET socket\n"); 180 | return -1; 181 | } 182 | 183 | // We use per thread structures 184 | uint8_t* mapped_buffer = NULL; 185 | struct iovec* rd = NULL; 186 | 187 | mapped_buffer = (uint8_t*)mmap(NULL, req.tp_block_size * req.tp_block_nr, PROT_READ | PROT_WRITE, MAP_SHARED | MAP_LOCKED, packet_socket, 0); 188 | 189 | if (mapped_buffer == MAP_FAILED) { 190 | printf("mmap failed!\n"); 191 | return -1; 192 | } 193 | 194 | // Allocate iov structure for each block 195 | rd = (struct iovec*)malloc(req.tp_block_nr * sizeof(struct iovec)); 196 | 197 | // Initilize iov structures 198 | for (int i = 0; i < req.tp_block_nr; ++i) { 199 | rd[i].iov_base = mapped_buffer + (i * req.tp_block_size); 200 | rd[i].iov_len = req.tp_block_size; 201 | } 202 | 203 | int bind_result = bind(packet_socket, (struct sockaddr *)&bind_address, sizeof(bind_address)); 204 | 205 | if (bind_result == -1) { 206 | printf("Can't bind to AF_PACKET socket\n"); 207 | return -1; 208 | } 209 | 210 | if (fanout_group_id) { 211 | // PACKET_FANOUT_LB - round robin 212 | // PACKET_FANOUT_CPU - send packets to CPU where packet arrived 213 | int fanout_type = PACKET_FANOUT_CPU; 214 | 215 | int fanout_arg = (fanout_group_id | (fanout_type << 16)); 216 | 217 | int setsockopt_fanout = setsockopt(packet_socket, SOL_PACKET, PACKET_FANOUT, &fanout_arg, sizeof(fanout_arg)); 218 | 219 | if (setsockopt_fanout < 0) { 220 | printf("Can't configure fanout\n"); 221 | return -1; 222 | } 223 | } 224 | 225 | unsigned int current_block_num = 0; 226 | 227 | struct pollfd pfd; 228 | memset(&pfd, 0, sizeof(pfd)); 229 | 230 | pfd.fd = packet_socket; 231 | pfd.events = POLLIN | POLLERR; 232 | pfd.revents = 0; 233 | 234 | while (true) { 235 | struct block_desc *pbd = (struct block_desc *) rd[current_block_num].iov_base; 236 | 237 | if ((pbd->h1.block_status & TP_STATUS_USER) == 0) { 238 | poll(&pfd, 1, -1); 239 | 240 | continue; 241 | } 242 | 243 | walk_block(pbd, current_block_num); 244 | flush_block(pbd); 245 | current_block_num = (current_block_num + 1) % blocknum; 246 | } 247 | 248 | return packet_socket; 249 | } 250 | 251 | void start_af_packet_capture(std::string interface_name, int fanout_group_id) { 252 | setup_socket(interface_name, fanout_group_id); 253 | } 254 | 255 | // Could get some speed up on NUMA servers 256 | bool execute_strict_cpu_affinity = false; 257 | 258 | int main() { 259 | int fanout_group_id = getpid() & 0xffff; 260 | 261 | boost::thread speed_printer_thread( speed_printer ); 262 | 263 | boost::thread_group packet_receiver_thread_group; 264 | 265 | unsigned int num_cpus = 8; 266 | for (int cpu = 0; cpu < num_cpus; cpu++) { 267 | boost::thread::attributes thread_attrs; 268 | 269 | if (execute_strict_cpu_affinity) { 270 | cpu_set_t current_cpu_set; 271 | 272 | int cpu_to_bind = cpu % num_cpus; 273 | CPU_ZERO(¤t_cpu_set); 274 | // We count cpus from zero 275 | CPU_SET(cpu_to_bind, ¤t_cpu_set); 276 | 277 | int set_affinity_result = pthread_attr_setaffinity_np(thread_attrs.native_handle(), sizeof(cpu_set_t), ¤t_cpu_set); 278 | 279 | if (set_affinity_result != 0) { 280 | printf("Can't set CPU affinity for thread\n"); 281 | } 282 | } 283 | 284 | packet_receiver_thread_group.add_thread( 285 | new boost::thread(thread_attrs, boost::bind(start_af_packet_capture, "eth6", fanout_group_id)) 286 | ); 287 | } 288 | 289 | // Wait all processes for finish 290 | packet_receiver_thread_group.join_all(); 291 | 292 | speed_printer_thread.join(); 293 | } 294 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "[]" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright [yyyy] [name of copyright owner] 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | --------------------------------------------------------------------------------