├── LICENSE ├── README.md ├── build.sh ├── recv.c └── send.c /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2018 andbain 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # dpdk-hello 2 | The simplest possible DPDK examples to send and receive UDP packets. 3 | -------------------------------------------------------------------------------- /build.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | function build_exe { 4 | CFLAGS="-g -Wall -Wshadow -Wold-style-definition -O0" # -flto 5 | DPDK_DIR=$HOME/dpdk-18.02 6 | 7 | # The massive list of compile and link flags below were carefully 8 | # reverse engineered from the DPDK 18.02 example app Makefiles. 9 | # They might not be correct for other versions. 10 | 11 | gcc $2 \ 12 | -o $1 $CFLAGS -m64 -pthread -march=native -DRTE_MACHINE_CPUFLAG_SSE \ 13 | -DRTE_MACHINE_CPUFLAG_SSE2 -DRTE_MACHINE_CPUFLAG_SSE3 \ 14 | -DRTE_MACHINE_CPUFLAG_SSSE3 -DRTE_MACHINE_CPUFLAG_SSE4_1 \ 15 | -DRTE_MACHINE_CPUFLAG_SSE4_2 -DRTE_MACHINE_CPUFLAG_AES \ 16 | -DRTE_MACHINE_CPUFLAG_PCLMULQDQ -DRTE_MACHINE_CPUFLAG_AVX \ 17 | -DRTE_MACHINE_CPUFLAG_RDRAND -DRTE_MACHINE_CPUFLAG_FSGSBASE \ 18 | -DRTE_MACHINE_CPUFLAG_F16C -DRTE_MACHINE_CPUFLAG_AVX2 \ 19 | -I$DPDK_DIR/build/include \ 20 | -L$DPDK_DIR/build/lib \ 21 | -lrte_flow_classify -lrte_pipeline -lrte_table -lrte_port -lrte_pdump \ 22 | -lrte_distributor -lrte_ip_frag -lrte_gro -lrte_gso \ 23 | -lrte_meter -lrte_lpm -Wl,--whole-archive -lrte_acl \ 24 | -Wl,--no-whole-archive -lrte_jobstats -lrte_metrics \ 25 | -lrte_bitratestats -lrte_latencystats -lrte_power -lrte_timer \ 26 | -lrte_efd -Wl,--whole-archive -lrte_cfgfile -lrte_hash \ 27 | -lrte_member -lrte_vhost -lrte_kvargs -lrte_mbuf -lrte_net \ 28 | -lrte_ethdev -lrte_bbdev -lrte_cryptodev -lrte_security \ 29 | -lrte_eventdev -lrte_rawdev -lrte_mempool -lrte_mempool_ring \ 30 | -lrte_ring -lrte_pci -lrte_eal -lrte_cmdline -lrte_reorder \ 31 | -lrte_sched -lrte_kni -lrte_bus_pci -lrte_bus_vdev \ 32 | -lrte_mempool_stack -lrte_pmd_af_packet -lrte_pmd_ark \ 33 | -lrte_pmd_avf -lrte_pmd_avp -lrte_pmd_bnxt -lrte_pmd_bond \ 34 | -lrte_pmd_cxgbe -lrte_pmd_e1000 -lrte_pmd_ena -lrte_pmd_enic \ 35 | -lrte_pmd_fm10k -lrte_pmd_failsafe -lrte_pmd_i40e \ 36 | -lrte_pmd_ixgbe -lrte_pmd_kni -lrte_pmd_lio -lrte_pmd_mlx4 \ 37 | -libverbs -lmlx4 -lrte_pmd_nfp -lrte_pmd_null \ 38 | -lrte_pmd_qede -lrte_pmd_ring -lrte_pmd_softnic \ 39 | -lrte_pmd_sfc_efx -lrte_pmd_tap -lrte_pmd_thunderx_nicvf \ 40 | -lrte_pmd_vdev_netvsc -lrte_pmd_virtio -lrte_pmd_vhost \ 41 | -lrte_pmd_vmxnet3_uio -lrte_pmd_bbdev_null -lrte_pmd_null_crypto \ 42 | -lrte_pmd_crypto_scheduler -lrte_pmd_skeleton_event \ 43 | -lrte_pmd_sw_event -lrte_pmd_octeontx_ssovf -lrte_mempool_octeontx \ 44 | -lrte_pmd_octeontx -lrte_pmd_opdl_event -lrte_pmd_skeleton_rawdev \ 45 | -Wl,--no-whole-archive -lrt -lm -lnuma -ldl -Wl,-export-dynamic 46 | } 47 | 48 | build_exe dpdk_send send.c 49 | build_exe dpdk_recv recv.c 50 | -------------------------------------------------------------------------------- /recv.c: -------------------------------------------------------------------------------- 1 | // **************************************************************************** 2 | // This example shows how to recv some packets. 3 | // **************************************************************************** 4 | 5 | // Build it with: 6 | // ./build.sh 7 | // 8 | // Run it with: 9 | // sudo ./dpdk_recv 10 | // 11 | // Check it works by sending some packets from another machine, eg: 12 | // ping 13 | 14 | 15 | // DPDK headers 16 | #include 17 | #include 18 | #include 19 | 20 | // Standard headers 21 | #include 22 | 23 | 24 | // **************************************************************************** 25 | // Tweak these values if you want. 26 | // **************************************************************************** 27 | 28 | // I believe that when packets arrive at the NIC, they are put into the RX ring 29 | // and remain there until rte_eth_rx_burst() is called. So the size of the ring 30 | // must be calculated based on the max time between calls to rte_eth_rx_burst() 31 | // and the max packets per second you expect to handle. 32 | #define RX_RING_SIZE 128 33 | 34 | // DPDK has a pool allocator. Every "mbuf" holds one packets. This specifies 35 | // how many packets are in the pool. I think this includes all those in the TX 36 | // and RX rings, plus any that are currently in the hands of the application. 37 | #define NUM_MBUFS 8191 38 | 39 | // This value is probably irrelevant in this single threaded app. See 40 | // the docs for the cache_size param of rte_mempool_create(). 41 | #define MBUF_CACHE_SIZE 0 42 | 43 | // Everyone seems to use 32. Nobody seems to know why. 44 | #define BURST_SIZE 32 45 | 46 | 47 | // **************************************************************************** 48 | // Do not tweak these values. 49 | // **************************************************************************** 50 | 51 | // DPDK supports many queues per port. Simple apps only need one TX queue and 52 | // one RX queue. You only need more than one if you are doing something like 53 | // scatter/gather. 54 | #define DPDK_QUEUE_ID_RX 0 55 | 56 | 57 | // In DPDK, a "port" is a NIC. We will use the first NIC DPDK finds. 58 | int g_dpdkPortId = -1; 59 | 60 | static const struct rte_eth_conf port_conf_default = { 61 | .rxmode = { .max_rx_pkt_len = ETHER_MAX_LEN } 62 | }; 63 | 64 | 65 | static void port_init(struct rte_mempool *mbuf_pool) { 66 | // Find the first free DPDK enabled network interface. When running on 67 | // Azure, the TAP PMD and MLX4 PMD will both advertise a port. We must not 68 | // use them directly. Instead we want the fail-safe PMD which sits on top 69 | // of them. The fail-safe will already have taken ownership of the TAP and 70 | // MLX4 PMDs, so we won't see them as available in this loop. 71 | g_dpdkPortId = 0; 72 | while (g_dpdkPortId < RTE_MAX_ETHPORTS && 73 | rte_eth_devices[g_dpdkPortId].data->owner.id != RTE_ETH_DEV_NO_OWNER) { 74 | g_dpdkPortId++; 75 | } 76 | if (g_dpdkPortId == RTE_MAX_ETHPORTS) { 77 | rte_exit(EXIT_FAILURE, "There were no DPDK ports free.\n"); 78 | } 79 | 80 | // Configure the Ethernet device. 81 | const int num_rx_queues = 1; 82 | const int num_tx_queues = 0; 83 | struct rte_eth_conf port_conf = port_conf_default; 84 | if (rte_eth_dev_configure(g_dpdkPortId, num_rx_queues, num_tx_queues, &port_conf)) { 85 | rte_exit(EXIT_FAILURE, "rte_eth_dev_configure() failed.\n"); 86 | } 87 | 88 | // Set up RX queue. 89 | if (rte_eth_rx_queue_setup(g_dpdkPortId, DPDK_QUEUE_ID_RX, RX_RING_SIZE, 90 | rte_eth_dev_socket_id(g_dpdkPortId), NULL, mbuf_pool) < 0) { 91 | rte_exit(EXIT_FAILURE, "Couldn't setup RX queue.\n"); 92 | } 93 | 94 | // Start the Ethernet port. 95 | if (rte_eth_dev_start(g_dpdkPortId) < 0) { 96 | rte_exit(EXIT_FAILURE, "Device start failed.\n"); 97 | } 98 | 99 | // Enable RX in promiscuous mode for the Ethernet device. 100 | rte_eth_promiscuous_enable(g_dpdkPortId); 101 | } 102 | 103 | 104 | int main(int argc, char *argv[]) { 105 | // Initialize the Environment Abstraction Layer. All DPDK apps must do this. 106 | if (rte_eal_init(argc, argv) < 0) { 107 | rte_exit(EXIT_FAILURE, "Error with EAL initialization\n"); 108 | } 109 | 110 | // Creates a new mempool in memory to hold the mbufs. 111 | struct rte_mempool *mbuf_pool = rte_pktmbuf_pool_create("MBUF_POOL", NUM_MBUFS, 112 | MBUF_CACHE_SIZE, 0, RTE_MBUF_DEFAULT_BUF_SIZE, rte_socket_id()); 113 | if (!mbuf_pool) { 114 | rte_exit(EXIT_FAILURE, "Couldn't create mbuf pool\n"); 115 | } 116 | 117 | port_init(mbuf_pool); 118 | 119 | while (1) { 120 | struct rte_mbuf *mbufs[BURST_SIZE]; 121 | unsigned num_recvd = rte_eth_rx_burst(g_dpdkPortId, DPDK_QUEUE_ID_RX, mbufs, BURST_SIZE); 122 | for (unsigned i = 0; i < num_recvd; i++) { 123 | printf("Received packet: "); 124 | 125 | // Print 10 bytes of UDP payload (without checking the packet is UDP). 126 | char const *pack_data = rte_pktmbuf_mtod(mbufs[i], char const *); 127 | for (int j = 42; j < 52; j++) { 128 | putchar(pack_data[j]); 129 | } 130 | putchar('\n'); 131 | 132 | rte_pktmbuf_free(mbufs[i]); 133 | } 134 | } 135 | 136 | return 0; 137 | } 138 | -------------------------------------------------------------------------------- /send.c: -------------------------------------------------------------------------------- 1 | // **************************************************************************** 2 | // This example shows how to send some simple UDP packets. 3 | // **************************************************************************** 4 | 5 | // Build it with: 6 | // ./build.sh 7 | // 8 | // Run it with: 9 | // sudo ./dpdk_send 10 | // 11 | // Check it works by running this on the receiving machine: 12 | // sudo tshark -i udp port 1234 13 | 14 | 15 | // DPDK headers 16 | #include 17 | #include 18 | #include 19 | 20 | // Platform headers 21 | #include 22 | #include 23 | #include 24 | 25 | // Standard headers 26 | #include 27 | #include 28 | 29 | 30 | // **************************************************************************** 31 | // Set these to the correct values of the machine you want to send to / from. 32 | // **************************************************************************** 33 | 34 | #define MAKE_IPV4_ADDR(a, b, c, d) (a + (b<<8) + (c<<16) + (d<<24)) 35 | 36 | static uint32_t g_src_ip = MAKE_IPV4_ADDR(10, 0, 0, 4); 37 | static uint32_t g_dest_ip = MAKE_IPV4_ADDR(10, 0, 0, 5); 38 | static uint8_t g_dest_mac_addr[ETH_ALEN] = { 0x00, 0x0d, 0x3a, 0xf4, 0x56, 0x28 }; 39 | 40 | 41 | // **************************************************************************** 42 | // Tweak these values if you want. 43 | // **************************************************************************** 44 | 45 | // I believe packets you write using rte_eth_tx_burst() are put in the TX ring. 46 | // The hardware moves them out of the ring when it can. The timing of when the 47 | // hardware does that move depends on the rate at which it can put packets on 48 | // the wire, on the performance of the interface between the NIC and the system 49 | // RAM (typically DMA over PCIe) and on how much buffer space there is on the 50 | // NIC. If your app calls rte_eth_tx_burst() repeatedly without doing anything 51 | // else, you could fill this ring before the hardware has had time to move any 52 | // packets out. 53 | #define TX_RING_SIZE 128 54 | 55 | // DPDK has a pool allocator. This specifies how many packets are in the pool. 56 | // I think this includes all those in the TX and RX rings, plus any that are 57 | // currently in the hands of the application. 58 | #define NUM_MBUFS 8191 59 | 60 | // This value is probably irrelevant in this single threaded app. See 61 | // the docs for the cache_size param of rte_mempool_create(). 62 | #define MBUF_CACHE_SIZE 0 63 | 64 | // Everyone seems to use 32. Nobody seems to know why. 65 | #define BURST_SIZE 32 66 | 67 | 68 | // **************************************************************************** 69 | // Do not tweak these values. 70 | // **************************************************************************** 71 | 72 | static uint8_t g_src_mac_addr[ETH_ALEN]; // This will be set automatically at run time. 73 | 74 | // DPDK supports many queues per port. Most simple apps only need one TX queue 75 | // and one RX queue. You only need more than one if you are doing something 76 | // like scatter/gather. 77 | #define DPDK_QUEUE_ID_TX 0 78 | 79 | 80 | // In DPDK, a "port" is a NIC. We will use the first NIC DPDK finds. 81 | int g_dpdkPortId = -1; 82 | 83 | static const struct rte_eth_conf port_conf_default = { 84 | .rxmode = { .max_rx_pkt_len = ETHER_MAX_LEN } 85 | }; 86 | 87 | 88 | static void port_init(struct rte_mempool *mbuf_pool) { 89 | // Find the first free DPDK enabled network interface. When running on 90 | // Azure, the TAP PMD and MLX4 PMD will both advertise a port. We must not 91 | // use them directly. Instead we want the fail-safe PMD which sits on top 92 | // of them. The fail-safe will already have taken ownership of the TAP and 93 | // MLX4 PMDs, so we won't see them as available in this loop. 94 | g_dpdkPortId = 0; 95 | while (g_dpdkPortId < RTE_MAX_ETHPORTS && 96 | rte_eth_devices[g_dpdkPortId].data->owner.id != RTE_ETH_DEV_NO_OWNER) { 97 | g_dpdkPortId++; 98 | } 99 | if (g_dpdkPortId == RTE_MAX_ETHPORTS) { 100 | rte_exit(EXIT_FAILURE, "There were no DPDK ports free.\n"); 101 | } 102 | 103 | // Configure the Ethernet device. 104 | const int num_rx_queues = 0; 105 | const int num_tx_queues = 1; 106 | struct rte_eth_conf port_conf = port_conf_default; 107 | if (rte_eth_dev_configure(g_dpdkPortId, num_rx_queues, num_tx_queues, &port_conf)) { 108 | rte_exit(EXIT_FAILURE, "rte_eth_dev_configure() failed.\n"); 109 | } 110 | 111 | // Set up TX queue. 112 | if (rte_eth_tx_queue_setup(g_dpdkPortId, DPDK_QUEUE_ID_TX, TX_RING_SIZE, 113 | rte_eth_dev_socket_id(g_dpdkPortId), NULL) < 0) { 114 | rte_exit(EXIT_FAILURE, "Couldn't setup TX queue.\n"); 115 | } 116 | 117 | // Start the Ethernet port. 118 | if (rte_eth_dev_start(g_dpdkPortId) < 0) { 119 | rte_exit(EXIT_FAILURE, "Device start failed.\n"); 120 | } 121 | } 122 | 123 | 124 | static uint16_t gen_checksum(const char *buf, int num_bytes) { 125 | const uint16_t *half_words = (const uint16_t *)buf; 126 | unsigned sum = 0; 127 | for (int i = 0; i < num_bytes / 2; i++) 128 | sum += half_words[i]; 129 | 130 | if (num_bytes & 1) 131 | sum += buf[num_bytes - 1]; 132 | 133 | sum = (sum & 0xffff) + (sum >> 16); 134 | sum += (sum & 0xff0000) >> 16; 135 | sum = ~sum & 0xffff; 136 | 137 | return sum; 138 | } 139 | 140 | 141 | static void create_eth_ip_udp(uint8_t *msg, size_t total_len, uint8_t dst_mac[ETH_ALEN], 142 | uint32_t src_ip, uint32_t dst_ip, uint16_t udp_src_port, uint16_t udp_dst_port) { 143 | // Packet looks like this: 144 | // Eth | IP | UDP | 145 | // We will fill out each section in order. 146 | 147 | struct ethhdr *eth = (struct ethhdr *)msg; 148 | memcpy(eth->h_dest, dst_mac, ETH_ALEN); 149 | memcpy(eth->h_source, g_src_mac_addr, ETH_ALEN); 150 | eth->h_proto = htons(ETH_P_IP); 151 | 152 | struct iphdr *ip = (struct iphdr *)(eth + 1); 153 | size_t ip_len = total_len - sizeof(struct ethhdr); 154 | ip->ihl = 5; 155 | ip->version = 4; 156 | ip->tos = 0; 157 | ip->tot_len = htons((uint16_t)ip_len); 158 | ip->id = 0; 159 | ip->frag_off = 0; 160 | ip->ttl = 64; 161 | ip->protocol = IPPROTO_UDP; 162 | ip->check = 0; 163 | ip->saddr = src_ip; 164 | ip->daddr = dst_ip; 165 | ip->check = gen_checksum((char *)ip, sizeof(struct iphdr)); 166 | 167 | struct udphdr *udp = (struct udphdr *)(ip + 1); 168 | size_t udp_len = ip_len - sizeof(struct iphdr); 169 | udp->source = htons(udp_src_port); 170 | udp->dest = htons(udp_dst_port); 171 | udp->len = htons((uint16_t)udp_len); 172 | 173 | // Set the UDP checksum to zero for simplicity. This is perfectly legal. It 174 | // just tells the the receiver not to check the checksum. 175 | udp->check = 0; 176 | 177 | // Use the packet count as the payload. 178 | uint32_t *payload = (uint32_t *)(udp + 1); 179 | static uint32_t seq_num = 0; 180 | *payload = htonl(seq_num++); 181 | } 182 | 183 | 184 | // Send the specified number of packets, using as many bursts as necessary. 185 | static void do_send(struct rte_mempool *mbuf_pool, int num_to_send) { 186 | // The smallest packet allowed by Ethernet. 187 | const unsigned eth_total_len = 64; 188 | 189 | struct rte_mbuf *mbufs[BURST_SIZE]; 190 | for (int i = 0; i < BURST_SIZE; i++) { 191 | mbufs[i] = rte_pktmbuf_alloc(mbuf_pool); 192 | if (!mbufs[i]) { 193 | rte_exit(EXIT_FAILURE, "Cannot alloc mbuf\n"); 194 | } 195 | 196 | mbufs[i]->pkt_len = eth_total_len; 197 | mbufs[i]->data_len = eth_total_len; 198 | } 199 | 200 | for (int num_packets_left = num_to_send; num_packets_left > 0;) { 201 | int num_to_send_this_burst = BURST_SIZE; 202 | if (num_packets_left < BURST_SIZE) { 203 | num_to_send_this_burst = num_packets_left; 204 | } 205 | 206 | for (int i = 0; i < num_to_send_this_burst; i++) { 207 | uint8_t *packet_data = rte_pktmbuf_mtod(mbufs[i], uint8_t *); 208 | const int UDP_PORT = 1234; 209 | create_eth_ip_udp(packet_data, eth_total_len, g_dest_mac_addr, 210 | g_src_ip, g_dest_ip, UDP_PORT, UDP_PORT); 211 | } 212 | 213 | // Send as many packets as will fit in the TX ring. 214 | int num_sent = rte_eth_tx_burst(g_dpdkPortId, DPDK_QUEUE_ID_TX, mbufs, num_to_send_this_burst); 215 | 216 | printf("Sent %i packets\n", num_sent); 217 | num_packets_left -= num_sent; 218 | } 219 | } 220 | 221 | 222 | int main(int argc, char *argv[]) { 223 | // Initialize the Environment Abstraction Layer. All DPDK apps must do this. 224 | if (rte_eal_init(argc, argv) < 0) { 225 | rte_exit(EXIT_FAILURE, "Error with EAL initialization\n"); 226 | } 227 | 228 | // Creates a new mempool in memory to hold the mbufs. 229 | struct rte_mempool *mbuf_pool = rte_pktmbuf_pool_create("MBUF_POOL", NUM_MBUFS, 230 | MBUF_CACHE_SIZE, 0, RTE_MBUF_DEFAULT_BUF_SIZE, rte_socket_id()); 231 | if (!mbuf_pool) { 232 | rte_exit(EXIT_FAILURE, "Couldn't create mbuf pool\n"); 233 | } 234 | 235 | port_init(mbuf_pool); 236 | 237 | rte_eth_macaddr_get(g_dpdkPortId, (struct ether_addr *)g_src_mac_addr); 238 | printf("Our MAC: %02x %02x %02x %02x %02x %02x\n", 239 | g_src_mac_addr[0], g_src_mac_addr[1], 240 | g_src_mac_addr[2], g_src_mac_addr[3], 241 | g_src_mac_addr[4], g_src_mac_addr[5]); 242 | 243 | do_send(mbuf_pool, 1); 244 | 245 | return 0; 246 | } 247 | --------------------------------------------------------------------------------