├── LINUX ├── Makefile ├── bsd_glue_multistack.h └── multistack_linux.c ├── README.md ├── examples-netmap4 ├── Makefile ├── nm_util.h ├── pkt-gen.c ├── setup.sh └── test_bind.c ├── examples ├── Makefile ├── nm_util.h ├── pkt-gen.c ├── pkt-gen.c.orig ├── setup.sh └── test_bind.c └── sys ├── contrib └── multistack │ ├── Makefile │ ├── multistack.c │ └── multistack_kern.h └── net └── multistack.h /LINUX/Makefile: -------------------------------------------------------------------------------- 1 | CONFIG_MSTACK:=m 2 | multistack_lin-objs := multistack.o multistack_linux.o 3 | obj-$(CONFIG_MSTACK) = multistack_lin.o 4 | 5 | M:=$(CURDIR) 6 | #OVS_DIR = openvswitch 7 | #OVS_SRC = net/$(OVS_DIR)/ 8 | SRC ?= $(KSRC) 9 | #DIST = $(SRC)/$(OVS_SRC) 10 | #EXTRA_CFLAGS := -I$(NSRC)/LINUX -I$(NSRC)/sys -I$(NSRC)/sys/contrib -DCONFIG_NETMAP 11 | EXTRA_CFLAGS := -I$(NSRC)/LINUX -I$(NSRC)/sys -I$(M)/../sys -I$(M) -DCONFIG_NETMAP $(NMVER) 12 | 13 | #all: get-ovs build 14 | all: build 15 | build: 16 | make -C $(SRC) M=$(CURDIR) \ 17 | CONFIG_NETMAP=m CONFIG_NETMAP_VALE=y CONFIG_MSTACK=m \ 18 | EXTRA_CFLAGS='$(EXTRA_CFLAGS)' \ 19 | KBUILD_EXTRA_SYMBOLS=$(NSRC)/LINUX/Module.symvers 20 | ls -l `find . -name \*.ko` 21 | clean: 22 | # make -C $(SRC) M=$(CURDIR)/$(OVS_DIR) clean 23 | (rm -rf *.o *.ko modules.order multistack_lin.mod.c Module.symvers) 24 | 25 | $(obj)/multistack.o: $(M)/../sys/contrib/multistack/multistack.c 26 | $(call cmd,cc_o_c) 27 | $(call cmd,modversions) 28 | $(obj)/multistack_linux.o: $(M)/multistack_linux.c 29 | $(call cmd,cc_o_c) 30 | $(call cmd,modversions) 31 | -------------------------------------------------------------------------------- /LINUX/bsd_glue_multistack.h: -------------------------------------------------------------------------------- 1 | #ifndef _BSD_GLUE_MULTISTACK_H 2 | #define _BSD_GLUE_MULTISTACK_H 3 | 4 | #define MS_RWLOCK_T spinlock_t 5 | #define MS_RWINIT(_lock, _m) spin_lock_init(_lock) 6 | #define MS_WLOCK() do {\ 7 | spin_lock(&ms_global.lock); rcu_read_lock(); } while (0) 8 | #define MS_WUNLOCK() do {\ 9 | rcu_read_unlock(); spin_unlock(&ms_global.lock); } while (0) 10 | #define MS_RLOCK(_m) rcu_read_lock() 11 | #define MS_RUNLOCK(_m) rcu_read_unlock() 12 | 13 | #define MS_LIST_INIT(_head) INIT_HLIST_HEAD(_head) 14 | #define MS_LIST_ENTRY(_type) struct hlist_node 15 | #define MS_LIST_ADD(_head, _n, _pos) hlist_add_head_rcu(&((_n)->_pos), _head) 16 | #define MS_LIST_DEL(_n, _pos) hlist_del_init_rcu(&((_n)->_pos)) 17 | #define MS_LIST_FOREACH(_n, _head, _pos) hlist_for_each_entry_rcu(_n, _head, _pos) 18 | #define MS_LIST_FOREACH_SAFE(_n, _head, _pos, _tvar) hlist_for_each_entry_rcu(_n, _head, _pos) 19 | #define MS_ROUTE_LIST struct hlist_head 20 | 21 | #define MS_GET_VAR(lval) rcu_dereference((lval)) 22 | #define MS_SET_VAR(lval, p) rcu_assign_pointer((lval), (p)) 23 | 24 | #define INET6_ADDRSTRLEN 46 25 | 26 | typedef uint32_t tcp_seq; 27 | 28 | /* IPv6 address presentation (taken from FreeBSD) */ 29 | 30 | #define satosin(sa) ((struct sockaddr_in *)(sa)) 31 | #define satosin6(sa) ((struct sockaddr_in6 *)(sa)) 32 | #define IN6_ARE_ADDR_EQUAL(a, b) ipv6_addr_equal(a, b) 33 | #define ETHER_HDR_LEN ETH_HLEN 34 | struct ip { 35 | #if defined(__LITTLE_ENDIAN_BITFIELD) 36 | u_char ip_hl:4, /* header length */ 37 | ip_v:4; /* version */ 38 | #elif defined (__BIG_ENDIAN_BITFIELD) 39 | u_char ip_v:4, /* version */ 40 | ip_hl:4; /* header length */ 41 | #endif 42 | u_char ip_tos; /* type of service */ 43 | u_short ip_len; /* total length */ 44 | u_short ip_id; /* identification */ 45 | u_short ip_off; /* fragment offset field */ 46 | #define IP_RF 0x8000 /* reserved fragment flag */ 47 | #define IP_DF 0x4000 /* dont fragment flag */ 48 | #define IP_MF 0x2000 /* more fragments flag */ 49 | #define IP_OFFMASK 0x1fff /* mask for fragmenting bits */ 50 | u_char ip_ttl; /* time to live */ 51 | u_char ip_p; /* protocol */ 52 | u_short ip_sum; /* checksum */ 53 | struct in_addr ip_src,ip_dst; /* source and dest address */ 54 | } __packed __aligned(4); 55 | 56 | struct ip6_hdr { 57 | union { 58 | struct ip6_hdrctl { 59 | u_int32_t ip6_un1_flow; /* 20 bits of flow-ID */ 60 | u_int16_t ip6_un1_plen; /* payload length */ 61 | u_int8_t ip6_un1_nxt; /* next header */ 62 | u_int8_t ip6_un1_hlim; /* hop limit */ 63 | } ip6_un1; 64 | u_int8_t ip6_un2_vfc; /* 4 bits version, top 4 bits class */ 65 | } ip6_ctlun; 66 | struct in6_addr ip6_src; /* source address */ 67 | struct in6_addr ip6_dst; /* destination address */ 68 | } __packed; 69 | 70 | #define ETHER_ADDR_LEN 6 71 | #define ip6_vfc ip6_ctlun.ip6_un2_vfc 72 | #define ip6_flow ip6_ctlun.ip6_un1.ip6_un1_flow 73 | #define ip6_plen ip6_ctlun.ip6_un1.ip6_un1_plen 74 | #define ip6_nxt ip6_ctlun.ip6_un1.ip6_un1_nxt 75 | #define ip6_hlim ip6_ctlun.ip6_un1.ip6_un1_hlim 76 | #define ip6_hops ip6_ctlun.ip6_un1.ip6_un1_hlim 77 | 78 | char *ip6_sprintf(char *, const struct in6_addr *); 79 | /* From ethernet.h */ 80 | struct ether_header { 81 | u_char ether_dhost[ETHER_ADDR_LEN]; 82 | u_char ether_shost[ETHER_ADDR_LEN]; 83 | u_short ether_type; 84 | }; 85 | #define ETHERTYPE_IP 0x0800 /* IP protocol */ 86 | #define ETHERTYPE_ARP 0x0806 /* Addr. resolution protocol */ 87 | #define ETHERTYPE_IPV6 0x86dd /* IPv6 */ 88 | 89 | #endif 90 | -------------------------------------------------------------------------------- /LINUX/multistack_linux.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) 2015 NEC Europe Ltd. All rights reserved. 3 | * 4 | * Redistribution and use in source and binary forms, with or without 5 | * modification, are permitted provided that the following conditions 6 | * are met: 7 | * 1. Redistributions of source code must retain the above copyright 8 | * notice, this list of conditions and the following disclaimer. 9 | * 2. Redistributions in binary form must reproduce the above copyright 10 | * notice, this list of conditions and the following disclaimer in the 11 | * documentation and/or other materials provided with the distribution. 12 | * 13 | * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 14 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 15 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 16 | * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 17 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 18 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 19 | * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 20 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 21 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 22 | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 23 | * SUCH DAMAGE. 24 | */ 25 | #include /* from netmap-release */ 26 | #include 27 | #include 28 | 29 | #include 30 | #include 31 | #include 32 | #include /* for ms_pcb_clash() */ 33 | 34 | /* from FreeBSD in6.c */ 35 | /* 36 | * Convert IP6 address to printable (loggable) representation. Caller 37 | * has to make sure that ip6buf is at least INET6_ADDRSTRLEN long. 38 | */ 39 | static char digits[] = "0123456789abcdef"; 40 | char * 41 | ip6_sprintf(char *ip6buf, const struct in6_addr *addr) 42 | { 43 | int i, cnt = 0, maxcnt = 0, idx = 0, index = 0; 44 | char *cp; 45 | const u_int16_t *a = (const u_int16_t *)addr; 46 | const u_int8_t *d; 47 | int dcolon = 0, zero = 0; 48 | 49 | cp = ip6buf; 50 | 51 | for (i = 0; i < 8; i++) { 52 | if (*(a + i) == 0) { 53 | cnt++; 54 | if (cnt == 1) 55 | idx = i; 56 | } 57 | else if (maxcnt < cnt) { 58 | maxcnt = cnt; 59 | index = idx; 60 | cnt = 0; 61 | } 62 | } 63 | if (maxcnt < cnt) { 64 | maxcnt = cnt; 65 | index = idx; 66 | } 67 | 68 | for (i = 0; i < 8; i++) { 69 | if (dcolon == 1) { 70 | if (*a == 0) { 71 | if (i == 7) 72 | *cp++ = ':'; 73 | a++; 74 | continue; 75 | } else 76 | dcolon = 2; 77 | } 78 | if (*a == 0) { 79 | if (dcolon == 0 && *(a + 1) == 0 && i == index) { 80 | if (i == 0) 81 | *cp++ = ':'; 82 | *cp++ = ':'; 83 | dcolon = 1; 84 | } else { 85 | *cp++ = '0'; 86 | *cp++ = ':'; 87 | } 88 | a++; 89 | continue; 90 | } 91 | d = (const u_char *)a; 92 | /* Try to eliminate leading zeros in printout like in :0001. */ 93 | zero = 1; 94 | *cp = digits[*d >> 4]; 95 | if (*cp != '0') { 96 | zero = 0; 97 | cp++; 98 | } 99 | *cp = digits[*d++ & 0xf]; 100 | if (zero == 0 || (*cp != '0')) { 101 | zero = 0; 102 | cp++; 103 | } 104 | *cp = digits[*d >> 4]; 105 | if (zero == 0 || (*cp != '0')) { 106 | zero = 0; 107 | cp++; 108 | } 109 | *cp++ = digits[*d & 0xf]; 110 | *cp++ = ':'; 111 | a++; 112 | } 113 | *--cp = '\0'; 114 | return (ip6buf); 115 | } 116 | 117 | int 118 | ms_getifname(struct sockaddr *sa, char *name) 119 | { 120 | struct net_device *dev; 121 | int retval = 0; 122 | 123 | rcu_read_lock(); 124 | for_each_netdev_rcu(&init_net, dev) { 125 | rcu_read_lock(); 126 | if (sa->sa_family == AF_INET) { 127 | struct in_device *in_dev; 128 | struct in_ifaddr *ifa; 129 | struct sockaddr_in *sin = (struct sockaddr_in *)sa; 130 | 131 | if ((in_dev = __in_dev_get_rcu(dev)) == NULL) { 132 | rcu_read_unlock(); 133 | continue; 134 | } 135 | for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) { 136 | if (ifa->ifa_local == sin->sin_addr.s_addr) { 137 | retval = 1; 138 | break; 139 | } 140 | } 141 | rcu_read_unlock(); 142 | } else if (sa->sa_family == AF_INET6) { 143 | struct inet6_dev *in6_dev; 144 | struct inet6_ifaddr *ifa; 145 | struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)sa; 146 | 147 | rcu_read_lock(); 148 | if ((in6_dev = __in6_dev_get(dev)) == NULL) { 149 | rcu_read_unlock(); 150 | continue; 151 | } 152 | read_lock_bh(&in6_dev->lock); 153 | list_for_each_entry(ifa, &in6_dev->addr_list, if_list) { 154 | if (ipv6_addr_equal(&ifa->addr, 155 | &sin6->sin6_addr)) { 156 | retval = 1; 157 | break; 158 | } 159 | } 160 | read_unlock_bh(&in6_dev->lock); 161 | rcu_read_unlock(); 162 | } 163 | if (retval) 164 | break; 165 | } 166 | if (retval) 167 | strncpy(name, dev->name, IFNAMSIZ); 168 | rcu_read_unlock(); 169 | return retval; 170 | } 171 | 172 | int 173 | ms_pcb_clash(struct sockaddr *sa, uint8_t protocol) 174 | { 175 | struct socket *so; 176 | int found = 0, i; 177 | struct files_struct *files; 178 | struct fdtable *files_table; 179 | int socktype = 0; 180 | 181 | #if 0 /* we are not allowed to access inetsw... */ 182 | spinlock_t *lock; 183 | struct list_head *head; 184 | struct inet_protosw *answer = NULL; 185 | 186 | if (sa->sa_family == AF_INET) { 187 | head = inetsw; 188 | lock = &inetsw_lock; 189 | } 190 | #ifdef CONFIG_IPV6 191 | else if (sa->sa_family == AF_INET6) { 192 | head = inetsw6; 193 | lock = &inet6sw_lock; 194 | } 195 | #endif 196 | else 197 | return ENOENT; 198 | 199 | spin_lock_bh(lock); 200 | for (i = 0; i < SOCK_MAX; i++) { 201 | list_for_each(lh, &head[i]) { 202 | answer = list_entry(lh, struct inet_protosw, list); 203 | if (answer->protocol == protocol) { 204 | found = 1; 205 | break; 206 | } 207 | answer = NULL: 208 | } 209 | if (found) 210 | break; 211 | } 212 | spin_unlock_bh(lock); 213 | #endif /* 0 */ 214 | if (protocol == IPPROTO_TCP || protocol == IPPROTO_SCTP) 215 | socktype = SOCK_STREAM; 216 | else if (protocol == IPPROTO_UDP) 217 | socktype = SOCK_DGRAM; 218 | else if (protocol == IPPROTO_DCCP) 219 | socktype = SOCK_DCCP; 220 | /* If success, the protocol is registered */ 221 | if (sock_create_kern(sa->sa_family, socktype, protocol, &so) == 0) { 222 | found = 1; 223 | sock_release(so); /* don't need anymore */ 224 | } 225 | 226 | if (!found) /* the protosw is not registered */ 227 | return 0; 228 | 229 | /* 230 | * Walking through PCBs in Linux is not trivial. 231 | * We thus search in open files of the process. 232 | */ 233 | files = current->files; 234 | files_table = files_fdtable(files); 235 | for (i = 0, found = 0; files_table->fd[i] != NULL; i++) { 236 | struct inet_sock *isk; 237 | int err; 238 | 239 | so = sock_from_file(files_table->fd[i], &err); 240 | if (!so) 241 | continue; 242 | else if (so->sk->sk_protocol != protocol) 243 | continue; 244 | 245 | isk = inet_sk(so->sk); 246 | if (sa->sa_family == AF_INET) { 247 | struct sockaddr_in *sin = (struct sockaddr_in *)sa; 248 | 249 | if (isk->inet_sport != sin->sin_port) 250 | continue; 251 | if (isk->inet_rcv_saddr == 0 || 252 | isk->inet_rcv_saddr == sin->sin_addr.s_addr) { 253 | found = 1; 254 | break; 255 | } 256 | } 257 | #ifdef CONFIG_IPV6 258 | else if (sa->sa_family == AF_INET6) { 259 | struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)sa; 260 | 261 | if (isk->inet_sport != sin6->sin6_port) 262 | continue; 263 | if (ipv6_addr_any(&so->sk->sk_v6_rcv_saddr) || 264 | ipv6_addr_equal(&so->sk->sk_v6_rcv_saddr, 265 | &sin6->sin6_addr)) { 266 | found = 1; 267 | break; 268 | } 269 | } 270 | #endif 271 | if (found) 272 | break; 273 | } 274 | return found ? 0 : ENOENT; 275 | } 276 | 277 | static int linux_ms_init(void) 278 | { 279 | return -ms_init(); 280 | } 281 | 282 | static void linux_ms_fini(void) 283 | { 284 | ms_fini(); 285 | } 286 | 287 | module_init(linux_ms_init); 288 | module_exit(linux_ms_fini); 289 | MODULE_AUTHOR("Michio Honda"); 290 | MODULE_DESCRIPTION("MultiStack: isolated user-space stack support"); 291 | MODULE_LICENSE("Dual BSD/GPL"); 292 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # MultiStack - Kernel Support for Multiplexing and Isolating User-space Stacks. 2 | 3 | MultiStack is a kernel module that enables user-level network stacks to securely run alongside the in-kernel stack on the same NIC. 4 | 5 | To isolate multiple network stacks including the in-kernel stack, a 3-tuple is used. Currently, applications that run on socket APIs are isolated such that they exclusively use this 3-tuple through a call to bind() or equivalent system calls (except for special cases like fork()); MultiStack extends this primitive to user-space stacks. 6 | 7 | For example, when a user-level stack wishes to use local port TCP 80 on a NIC configured with IP address 10.0.0.2, it must create a socket, call bind() with the corresponding 3-tuple, and register the tuple with MultiStack. 8 | 9 | MultiStack is implemented as a module in [VALE](http://info.iet.unipi.it/~luigi/netmap/), a fast, scalable and modular software switch. A VALE virtual port is used to interconnect a user-level stack and a NIC. MultiStack forwards packets from the NIC to the different virtual ports (or the in-kernel network stack) based on the set of currently registered 3-tuples. It also validates packets sent from virtual ports to ensure that they match the registered 3-tuples. 10 | 11 | ## How to Build the Code (Linux) 12 | 13 | 1. Make sure you have [netmap](http://info.iet.unipi.it/~luigi/netmap/) installed. 14 | 15 | 2. In the Multistack directory: 16 | - cd LINUX 17 | - make KSRC=PATH_TO_KERNEL_SOURCES NSRC=PATH_TO_NETMAP_SOURCES 18 | 19 | 20 | ## How to Use the Code (Linux) 21 | 22 | Assuming you've already installed netmap and Multistack in ~/netmap and ~/multistack respectively, and that you configured your PATH environment variable to include ~/netmap/examples/ and ~/multistack/examples/ : 23 | 24 | 1. Instantiate a VALE switch named "valem:", and attach eth1 and the in-kernel network stack to it: 25 | 26 | - vale-ctl -h valem:eth1 27 | 28 | The vale-ctl command is included in [netmap](http://info.iet.unipi.it/~luigi/netmap/), and the "-h" option attaches the in-kernel network stack to the switch. 29 | 30 | 2. Load the MultiStack kernel module 31 | - insmod ~/multistack/LINUX/multistack_lin.ko 32 | 33 | ## How to Run Apps 34 | 35 | Applications or user-level stacks that run on top of the netmap API can be easily ported. To run a user-level network stack or app: 36 | 37 | 1. Run the app on top of a virtual port that attaches to the switch instance "valem:", represented by "valem:vp0". 38 | - You can choose arbitrary name for "vp0" 39 | - You don't need any system-wide configuration to create "valem:vp0". When your app registers this name with nm_open(), the virtual port is dynamically created. 40 | 41 | 2. Create a socket and bind() a 3-tuple. 42 | - This process is important so that afterwards MultiStack can confirm this process owns credential to this 3 tuple. 43 | 44 | 3. Issue an ioctl() for a file descripter opened by nm_open() (you can refer to it with nmd->fd) with MULTISTACK_BIND as an argument in order to register this 3-tuple with MultiStack. 45 | - Here MultiStack internally checks if the caller process owns credential to register this 3 tuple. Since you have bind()ed this 3 tuple, it will success. 46 | 47 | 4. You can now send (raw) packets whose source matches this 3 tuple, and receive (raw) packets whose destination is this 3 tuple on "valem:vp0" using netmap API. 48 | 49 | For more details, see multistack/examples/pkt-gen.c (a modified version of netmap/examples/pkt-gen.c that can run on top of MultiStack) 50 | 51 | ## How to Build the Code (FreeBSD) 52 | 53 | Assuming that the kernel source code directory is `~/head/` use 54 | ```` 55 | cd multistack/sys/contrib/multistack 56 | env SYSDIR=~/head/sys make 57 | ```` 58 | If you also want SCTP support, you need to do 59 | ```` 60 | echo "#define SCTP 1" > opt_sctp.h 61 | env SYSDIR=~/head/sys make 62 | ```` 63 | 64 | ## Author 65 | 66 | Michio Honda (firstname@netapp.com) 67 | 68 | 69 | ## References 70 | 71 | Michio Honda, Felipe Huici, Costin Raiciu, Joao Araujo and Luigi Rizzo, ["Rekindling network protocol innovation with user-level stacks"](http://www.sigcomm.org/sites/default/files/ccr/papers/2014/April/0000000-0000006.pdf), ACM SIGCOMM Computer Communication Review 44(2), 52-58, April, 2014 72 | 73 | 74 | ## Credits 75 | 76 | MultiStack was developed at NEC Laboratories Europe, with partial funding from EU FP7 projects CHANGE and Trilogy2. It is currently maintained with support from the EU FP7 SSICLOPS project and NetApp. 77 | -------------------------------------------------------------------------------- /examples-netmap4/Makefile: -------------------------------------------------------------------------------- 1 | PROGS = pkt-gen test_bind 2 | CLEANFILES = $(PROGS) *.o 3 | CFLAGS = -O2 -pipe 4 | CFLAGS += -Werror -Wall -Wextra 5 | CFLAGS += -I ../sys 6 | CFLAGS += -DMULTISTACK 7 | LDFLAGS = -lpcap -lrt -lpthread 8 | 9 | all: $(PROGS) 10 | pkt-gen: pkt-gen.o 11 | $(CC) $(CFLAGS) -o pkt-gen pkt-gen.o $(LDFLAGS) 12 | test_bind: test_bind.c 13 | $(CC) $(CFLAGS) -o test_bind test_bind.c 14 | clean: 15 | -@rm -rf $(CLEANFILES) 16 | 17 | -------------------------------------------------------------------------------- /examples-netmap4/nm_util.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) 2012 Luigi Rizzo. All rights reserved. 3 | * 4 | * Redistribution and use in source and binary forms, with or without 5 | * modification, are permitted provided that the following conditions 6 | * are met: 7 | * 1. Redistributions of source code must retain the above copyright 8 | * notice, this list of conditions and the following disclaimer. 9 | * 2. Redistributions in binary form must reproduce the above copyright 10 | * notice, this list of conditions and the following disclaimer in the 11 | * documentation and/or other materials provided with the distribution. 12 | * 13 | * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 14 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 15 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 16 | * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 17 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 18 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 19 | * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 20 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 21 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 22 | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 23 | * SUCH DAMAGE. 24 | */ 25 | 26 | /* 27 | * $Id$ 28 | * 29 | * Some utilities to build netmap-based programs. 30 | */ 31 | 32 | #ifndef _NM_UTIL_H 33 | #define _NM_UTIL_H 34 | #include 35 | #include /* signal */ 36 | #include 37 | #include 38 | #include /* PRI* macros */ 39 | #include /* strcmp */ 40 | #include /* open */ 41 | #include /* close */ 42 | #include /* getifaddrs */ 43 | 44 | #include /* PROT_* */ 45 | #include /* ioctl */ 46 | #include 47 | #include /* sockaddr.. */ 48 | #include /* ntohs */ 49 | #include 50 | #include /* sysctl */ 51 | #include /* timersub */ 52 | 53 | #include 54 | #include /* ifreq */ 55 | 56 | #include 57 | #include 58 | #include 59 | 60 | #include 61 | #include 62 | 63 | #ifndef MY_PCAP /* use the system's pcap if available */ 64 | 65 | #ifdef NO_PCAP 66 | #define PCAP_ERRBUF_SIZE 512 67 | typedef void pcap_t; 68 | struct pcap_pkthdr; 69 | #define pcap_inject(a,b,c) ((void)a, (void)b, (void)c, -1) 70 | #define pcap_dispatch(a, b, c, d) (void)c 71 | #define pcap_open_live(a, b, c, d, e) ((void)e, NULL) 72 | #else /* !NO_PCAP */ 73 | #include // XXX do we need it ? 74 | #endif /* !NO_PCAP */ 75 | 76 | #endif // XXX hack 77 | 78 | #include /* pthread_* */ 79 | 80 | #ifdef linux 81 | #define ifr_flagshigh ifr_flags 82 | #define ifr_curcap ifr_flags 83 | #define ifr_reqcap ifr_flags 84 | #define IFF_PPROMISC IFF_PROMISC 85 | #include 86 | #include 87 | 88 | #define CLOCK_REALTIME_PRECISE CLOCK_REALTIME 89 | #include /* ether_aton */ 90 | #include /* sockaddr_ll */ 91 | #endif /* linux */ 92 | 93 | #ifdef __FreeBSD__ 94 | #include /* le64toh */ 95 | #include 96 | 97 | #include /* pthread w/ affinity */ 98 | #include /* cpu_set */ 99 | #include /* LLADDR */ 100 | #endif /* __FreeBSD__ */ 101 | 102 | #ifdef __APPLE__ 103 | #define ifr_flagshigh ifr_flags // XXX 104 | #define IFF_PPROMISC IFF_PROMISC 105 | #include /* LLADDR */ 106 | #define clock_gettime(a,b) \ 107 | do {struct timespec t0 = {0,0}; *(b) = t0; } while (0) 108 | #endif /* __APPLE__ */ 109 | 110 | static inline int min(int a, int b) { return a < b ? a : b; } 111 | extern int time_second; 112 | 113 | /* debug support */ 114 | #define ND(format, ...) do {} while(0) 115 | #define D(format, ...) \ 116 | fprintf(stderr, "%s [%d] " format "\n", \ 117 | __FUNCTION__, __LINE__, ##__VA_ARGS__) 118 | 119 | #define RD(lps, format, ...) \ 120 | do { \ 121 | static int t0, cnt; \ 122 | if (t0 != time_second) { \ 123 | t0 = time_second; \ 124 | cnt = 0; \ 125 | } \ 126 | if (cnt++ < lps) \ 127 | D(format, ##__VA_ARGS__); \ 128 | } while (0) 129 | 130 | 131 | 132 | // XXX does it work on 32-bit machines ? 133 | static inline void prefetch (const void *x) 134 | { 135 | __asm volatile("prefetcht0 %0" :: "m" (*(const unsigned long *)x)); 136 | } 137 | 138 | // XXX only for multiples of 64 bytes, non overlapped. 139 | static inline void 140 | pkt_copy(const void *_src, void *_dst, int l) 141 | { 142 | const uint64_t *src = _src; 143 | uint64_t *dst = _dst; 144 | #define likely(x) __builtin_expect(!!(x), 1) 145 | #define unlikely(x) __builtin_expect(!!(x), 0) 146 | if (unlikely(l >= 1024)) { 147 | bcopy(src, dst, l); 148 | return; 149 | } 150 | for (; l > 0; l-=64) { 151 | *dst++ = *src++; 152 | *dst++ = *src++; 153 | *dst++ = *src++; 154 | *dst++ = *src++; 155 | *dst++ = *src++; 156 | *dst++ = *src++; 157 | *dst++ = *src++; 158 | *dst++ = *src++; 159 | } 160 | } 161 | 162 | /* 163 | * info on a ring we handle 164 | */ 165 | struct my_ring { 166 | const char *ifname; 167 | int fd; 168 | char *mem; /* userspace mmap address */ 169 | u_int memsize; 170 | u_int queueid; 171 | u_int begin, end; /* first..last+1 rings to check */ 172 | struct netmap_if *nifp; 173 | struct netmap_ring *tx, *rx; /* shortcuts */ 174 | 175 | uint32_t if_flags; 176 | uint32_t if_reqcap; 177 | uint32_t if_curcap; 178 | }; 179 | int netmap_open(struct my_ring *me, int ringid, int promisc); 180 | int netmap_close(struct my_ring *me); 181 | int nm_do_ioctl(struct my_ring *me, u_long what, int subcmd); 182 | #endif /* _NM_UTIL_H */ 183 | -------------------------------------------------------------------------------- /examples-netmap4/pkt-gen.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) 2011-2012 Matteo Landi, Luigi Rizzo. All rights reserved. 3 | * 4 | * Redistribution and use in source and binary forms, with or without 5 | * modification, are permitted provided that the following conditions 6 | * are met: 7 | * 1. Redistributions of source code must retain the above copyright 8 | * notice, this list of conditions and the following disclaimer. 9 | * 2. Redistributions in binary form must reproduce the above copyright 10 | * notice, this list of conditions and the following disclaimer in the 11 | * documentation and/or other materials provided with the distribution. 12 | * 13 | * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 14 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 15 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 16 | * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 17 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 18 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 19 | * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 20 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 21 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 22 | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 23 | * SUCH DAMAGE. 24 | */ 25 | 26 | /* 27 | * $FreeBSD: head/tools/tools/netmap/pkt-gen.c 231198 2012-02-08 11:43:29Z luigi $ 28 | * $Id: pkt-gen.c 12346 2013-06-12 17:36:25Z luigi $ 29 | * 30 | * Example program to show how to build a multithreaded packet 31 | * source/sink using the netmap device. 32 | * 33 | * In this example we create a programmable number of threads 34 | * to take care of all the queues of the interface used to 35 | * send or receive traffic. 36 | * 37 | */ 38 | 39 | #include "nm_util.h" 40 | #ifdef MULTISTACK 41 | #include 42 | #endif 43 | 44 | #include // isprint() 45 | 46 | const char *default_payload="netmap pkt-gen DIRECT payload\n" 47 | "http://info.iet.unipi.it/~luigi/netmap/ "; 48 | 49 | const char *indirect_payload="netmap pkt-gen indirect payload\n" 50 | "http://info.iet.unipi.it/~luigi/netmap/ "; 51 | 52 | int time_second; // support for RD() debugging macro 53 | 54 | int verbose = 0; 55 | 56 | #define SKIP_PAYLOAD 1 /* do not check payload. */ 57 | 58 | struct pkt { 59 | struct ether_header eh; 60 | struct ip ip; 61 | struct udphdr udp; 62 | uint8_t body[2048]; // XXX hardwired 63 | } __attribute__((__packed__)); 64 | 65 | struct ip_range { 66 | char *name; 67 | uint32_t start, end; /* same as struct in_addr */ 68 | uint16_t port0, port1; 69 | }; 70 | 71 | struct mac_range { 72 | char *name; 73 | struct ether_addr start, end; 74 | }; 75 | 76 | struct dst_range { 77 | char *name; 78 | int start, end, now; 79 | }; 80 | 81 | struct tstamp { 82 | uint32_t sec; 83 | uint32_t nsec; 84 | }; 85 | 86 | /* 87 | * global arguments for all threads 88 | */ 89 | 90 | struct glob_arg { 91 | struct ip_range src_ip; 92 | struct ip_range dst_ip; 93 | struct mac_range dst_mac; 94 | struct mac_range src_mac; 95 | int pkt_size; 96 | int burst; 97 | int forever; 98 | int npackets; /* total packets to send */ 99 | int frags; /* fragments per packet */ 100 | int nthreads; 101 | int cpus; 102 | int options; /* testing */ 103 | #define OPT_PREFETCH 1 104 | #define OPT_ACCESS 2 105 | #define OPT_COPY 4 106 | #define OPT_MEMCPY 8 107 | #define OPT_TS 16 /* add a timestamp */ 108 | #define OPT_INDIRECT 32 /* use indirect buffers, tx only */ 109 | #define OPT_DUMP 64 /* dump rx/tx traffic */ 110 | int dev_type; 111 | pcap_t *p; 112 | 113 | int tx_rate; 114 | struct timespec tx_period; 115 | 116 | int affinity; 117 | int affinity2; 118 | int main_fd; 119 | int report_interval; 120 | void *(*td_body)(void *); 121 | void *mmap_addr; 122 | int mmap_size; 123 | char *ifname; 124 | char *nmr_config; 125 | int dummy_send; 126 | u_int qfirst; 127 | struct dst_range dst; 128 | #ifdef MULTISTACK 129 | int so; 130 | struct msreq msr; 131 | #endif 132 | }; 133 | enum dev_type { DEV_NONE, DEV_NETMAP, DEV_PCAP, DEV_TAP }; 134 | 135 | 136 | /* 137 | * Arguments for a new thread. The same structure is used by 138 | * the source and the sink 139 | */ 140 | struct targ { 141 | struct glob_arg *g; 142 | int used; 143 | int completed; 144 | int cancel; 145 | int fd; 146 | struct nmreq nmr; 147 | struct netmap_if *nifp; 148 | uint16_t qfirst, qlast; /* range of queues to scan */ 149 | volatile uint64_t count; 150 | struct timespec tic, toc; 151 | int me; 152 | pthread_t thread; 153 | int affinity; 154 | 155 | struct pkt pkt; 156 | }; 157 | 158 | 159 | /* 160 | * extract the extremes from a range of ipv4 addresses. 161 | * addr_lo[-addr_hi][:port_lo[-port_hi]] 162 | */ 163 | static void 164 | extract_ip_range(struct ip_range *r) 165 | { 166 | char *ap, *pp; 167 | struct in_addr a; 168 | 169 | D("extract IP range from %s", r->name); 170 | r->port0 = r->port1 = 0; 171 | r->start = r->end = 0; 172 | 173 | /* the first - splits start/end of range */ 174 | ap = index(r->name, '-'); /* do we have ports ? */ 175 | if (ap) { 176 | *ap++ = '\0'; 177 | } 178 | /* grab the initial values (mandatory) */ 179 | pp = index(r->name, ':'); 180 | if (pp) { 181 | *pp++ = '\0'; 182 | r->port0 = r->port1 = strtol(pp, NULL, 0); 183 | }; 184 | inet_aton(r->name, &a); 185 | r->start = r->end = ntohl(a.s_addr); 186 | if (ap) { 187 | pp = index(ap, ':'); 188 | if (pp) { 189 | *pp++ = '\0'; 190 | if (*pp) 191 | r->port1 = strtol(pp, NULL, 0); 192 | } 193 | if (*ap) { 194 | inet_aton(ap, &a); 195 | r->end = ntohl(a.s_addr); 196 | } 197 | } 198 | if (r->port0 > r->port1) { 199 | uint16_t tmp = r->port0; 200 | r->port0 = r->port1; 201 | r->port1 = tmp; 202 | } 203 | if (r->start > r->end) { 204 | uint32_t tmp = r->start; 205 | r->start = r->end; 206 | r->end = tmp; 207 | } 208 | { 209 | struct in_addr a; 210 | char buf1[16]; // one ip address 211 | 212 | a.s_addr = htonl(r->end); 213 | strncpy(buf1, inet_ntoa(a), sizeof(buf1)); 214 | a.s_addr = htonl(r->start); 215 | D("range is %s:%d to %s:%d", 216 | inet_ntoa(a), r->port0, buf1, r->port1); 217 | } 218 | } 219 | 220 | static void 221 | extract_mac_range(struct mac_range *r) 222 | { 223 | D("extract MAC range from %s", r->name); 224 | bcopy(ether_aton(r->name), &r->start, 6); 225 | bcopy(ether_aton(r->name), &r->end, 6); 226 | #if 0 227 | bcopy(targ->src_mac, eh->ether_shost, 6); 228 | p = index(targ->g->src_mac, '-'); 229 | if (p) 230 | targ->src_mac_range = atoi(p+1); 231 | 232 | bcopy(ether_aton(targ->g->dst_mac), targ->dst_mac, 6); 233 | bcopy(targ->dst_mac, eh->ether_dhost, 6); 234 | p = index(targ->g->dst_mac, '-'); 235 | if (p) 236 | targ->dst_mac_range = atoi(p+1); 237 | #endif 238 | D("%s starts at %s", r->name, ether_ntoa(&r->start)); 239 | } 240 | 241 | static struct targ *targs; 242 | static int global_nthreads; 243 | 244 | /* control-C handler */ 245 | static void 246 | sigint_h(int sig) 247 | { 248 | int i; 249 | 250 | (void)sig; /* UNUSED */ 251 | for (i = 0; i < global_nthreads; i++) { 252 | targs[i].cancel = 1; 253 | } 254 | signal(SIGINT, SIG_DFL); 255 | } 256 | 257 | /* sysctl wrapper to return the number of active CPUs */ 258 | static int 259 | system_ncpus(void) 260 | { 261 | #ifdef __FreeBSD__ 262 | int mib[2], ncpus; 263 | size_t len; 264 | 265 | mib[0] = CTL_HW; 266 | mib[1] = HW_NCPU; 267 | len = sizeof(mib); 268 | sysctl(mib, 2, &ncpus, &len, NULL, 0); 269 | 270 | return (ncpus); 271 | #else 272 | return 1; 273 | #endif /* !__FreeBSD__ */ 274 | } 275 | 276 | #ifdef __linux__ 277 | #define sockaddr_dl sockaddr_ll 278 | #define sdl_family sll_family 279 | #define AF_LINK AF_PACKET 280 | #define LLADDR(s) s->sll_addr; 281 | #include 282 | #define TAP_CLONEDEV "/dev/net/tun" 283 | #endif /* __linux__ */ 284 | 285 | #ifdef __FreeBSD__ 286 | #include 287 | #define TAP_CLONEDEV "/dev/tap" 288 | #endif /* __FreeBSD */ 289 | 290 | #ifdef __APPLE__ 291 | // #warning TAP not supported on apple ? 292 | #include 293 | #define TAP_CLONEDEV "/dev/tap" 294 | #endif /* __APPLE__ */ 295 | 296 | 297 | /* 298 | * parse the vale configuration in conf and put it in nmr. 299 | * The configuration may consist of 0 to 4 numbers separated 300 | * by commas: #tx-slots,#rx-slots,#tx-rinzgs,#rx-rings. 301 | * Missing numbers or zeroes stand for default values. 302 | * As an additional convenience, if exactly one number 303 | * is specified, then this is assigned to bot #tx-slots and #rx-slots. 304 | * If there is no 4th number, then the 3rd is assigned to bot #tx-rings 305 | * and #rx-rings. 306 | */ 307 | void parse_nmr_config(const char* conf, struct nmreq *nmr) 308 | { 309 | char *w, *tok; 310 | int i, v; 311 | 312 | nmr->nr_tx_rings = nmr->nr_rx_rings = 0; 313 | nmr->nr_tx_slots = nmr->nr_rx_slots = 0; 314 | if (conf == NULL || ! *conf) 315 | return; 316 | w = strdup(conf); 317 | for (i = 0, tok = strtok(w, ","); tok; i++, tok = strtok(NULL, ",")) { 318 | v = atoi(tok); 319 | switch (i) { 320 | case 0: 321 | nmr->nr_tx_slots = nmr->nr_rx_slots = v; 322 | break; 323 | case 1: 324 | nmr->nr_rx_slots = v; 325 | break; 326 | case 2: 327 | nmr->nr_tx_rings = nmr->nr_rx_rings = v; 328 | break; 329 | case 3: 330 | nmr->nr_rx_rings = v; 331 | break; 332 | default: 333 | D("ignored config: %s", tok); 334 | break; 335 | } 336 | } 337 | D("txr %d txd %d rxr %d rxd %d", 338 | nmr->nr_tx_rings, nmr->nr_tx_slots, 339 | nmr->nr_rx_rings, nmr->nr_rx_slots); 340 | free(w); 341 | } 342 | void extract_dst_range(struct dst_range *dst) 343 | { 344 | char *w, *tok; 345 | int i, v; 346 | 347 | if (!dst->name) 348 | return; 349 | dst->start = dst->end = dst->now = 0; 350 | w = strdup(dst->name); 351 | for (i = 0, tok = strtok(w, ","); tok; i++, tok = strtok(NULL, ",")) { 352 | v = atoi(tok); 353 | switch (i) { 354 | case 0: 355 | dst->start = dst->now = v; 356 | break; 357 | case 1: 358 | dst->end = v; 359 | break; 360 | default: 361 | D("ignored config: %s", tok); 362 | break; 363 | } 364 | } 365 | D("start %d end %d now %d", dst->start, dst->end, dst->now); 366 | free(w); 367 | } 368 | 369 | 370 | /* 371 | * locate the src mac address for our interface, put it 372 | * into the user-supplied buffer. return 0 if ok, -1 on error. 373 | */ 374 | static int 375 | source_hwaddr(const char *ifname, char *buf) 376 | { 377 | struct ifaddrs *ifaphead, *ifap; 378 | int l = sizeof(ifap->ifa_name); 379 | 380 | if (getifaddrs(&ifaphead) != 0) { 381 | D("getifaddrs %s failed", ifname); 382 | return (-1); 383 | } 384 | 385 | for (ifap = ifaphead; ifap; ifap = ifap->ifa_next) { 386 | struct sockaddr_dl *sdl = 387 | (struct sockaddr_dl *)ifap->ifa_addr; 388 | uint8_t *mac; 389 | 390 | if (!sdl || sdl->sdl_family != AF_LINK) 391 | continue; 392 | if (strncmp(ifap->ifa_name, ifname, l) != 0) 393 | continue; 394 | mac = (uint8_t *)LLADDR(sdl); 395 | sprintf(buf, "%02x:%02x:%02x:%02x:%02x:%02x", 396 | mac[0], mac[1], mac[2], 397 | mac[3], mac[4], mac[5]); 398 | if (verbose) 399 | D("source hwaddr %s", buf); 400 | break; 401 | } 402 | freeifaddrs(ifaphead); 403 | return ifap ? 0 : 1; 404 | } 405 | 406 | 407 | /* set the thread affinity. */ 408 | static int 409 | setaffinity(pthread_t me, int i) 410 | { 411 | #ifdef __FreeBSD__ 412 | cpuset_t cpumask; 413 | 414 | if (i == -1) 415 | return 0; 416 | 417 | /* Set thread affinity affinity.*/ 418 | CPU_ZERO(&cpumask); 419 | CPU_SET(i, &cpumask); 420 | 421 | if (pthread_setaffinity_np(me, sizeof(cpuset_t), &cpumask) != 0) { 422 | D("Unable to set affinity"); 423 | return 1; 424 | } 425 | #else 426 | (void)me; /* suppress 'unused' warnings */ 427 | (void)i; 428 | #endif /* __FreeBSD__ */ 429 | return 0; 430 | } 431 | 432 | /* Compute the checksum of the given ip header. */ 433 | static uint16_t 434 | checksum(const void *data, uint16_t len, uint32_t sum) 435 | { 436 | const uint8_t *addr = data; 437 | uint32_t i; 438 | 439 | /* Checksum all the pairs of bytes first... */ 440 | for (i = 0; i < (len & ~1U); i += 2) { 441 | sum += (u_int16_t)ntohs(*((u_int16_t *)(addr + i))); 442 | if (sum > 0xFFFF) 443 | sum -= 0xFFFF; 444 | } 445 | /* 446 | * If there's a single byte left over, checksum it, too. 447 | * Network byte order is big-endian, so the remaining byte is 448 | * the high byte. 449 | */ 450 | if (i < len) { 451 | sum += addr[i] << 8; 452 | if (sum > 0xFFFF) 453 | sum -= 0xFFFF; 454 | } 455 | return sum; 456 | } 457 | 458 | static u_int16_t 459 | wrapsum(u_int32_t sum) 460 | { 461 | sum = ~sum & 0xFFFF; 462 | return (htons(sum)); 463 | } 464 | 465 | /* Check the payload of the packet for errors (use it for debug). 466 | * Look for consecutive ascii representations of the size of the packet. 467 | */ 468 | static void 469 | dump_payload(char *p, int len, struct netmap_ring *ring, int cur) 470 | { 471 | char buf[128]; 472 | int i, j, i0; 473 | 474 | /* get the length in ASCII of the length of the packet. */ 475 | 476 | printf("ring %p cur %5d [buf %6d flags 0x%04x len %5d]\n", 477 | ring, cur, ring->slot[cur].buf_idx, 478 | ring->slot[cur].flags, len); 479 | /* hexdump routine */ 480 | for (i = 0; i < len; ) { 481 | memset(buf, sizeof(buf), ' '); 482 | sprintf(buf, "%5d: ", i); 483 | i0 = i; 484 | for (j=0; j < 16 && i < len; i++, j++) 485 | sprintf(buf+7+j*3, "%02x ", (uint8_t)(p[i])); 486 | i = i0; 487 | for (j=0; j < 16 && i < len; i++, j++) 488 | sprintf(buf+7+j + 48, "%c", 489 | isprint(p[i]) ? p[i] : '.'); 490 | printf("%s\n", buf); 491 | } 492 | } 493 | 494 | /* 495 | * Fill a packet with some payload. 496 | * We create a UDP packet so the payload starts at 497 | * 14+20+8 = 42 bytes. 498 | */ 499 | #ifdef __linux__ 500 | #define uh_sport source 501 | #define uh_dport dest 502 | #define uh_ulen len 503 | #define uh_sum check 504 | #endif /* linux */ 505 | 506 | /* 507 | * increment the addressed in the packet, 508 | * starting from the least significant field. 509 | * DST_IP DST_PORT SRC_IP SRC_PORT 510 | */ 511 | static void 512 | update_addresses(struct pkt *pkt, struct glob_arg *g) 513 | { 514 | uint32_t a; 515 | uint16_t p; 516 | struct ip *ip = &pkt->ip; 517 | struct udphdr *udp = &pkt->udp; 518 | 519 | p = ntohs(udp->uh_sport); 520 | if (p < g->src_ip.port1) { /* just inc, no wrap */ 521 | udp->uh_sport = htons(p + 1); 522 | return; 523 | } 524 | udp->uh_sport = htons(g->src_ip.port0); 525 | 526 | a = ntohl(ip->ip_src.s_addr); 527 | if (a < g->src_ip.end) { /* just inc, no wrap */ 528 | ip->ip_src.s_addr = htonl(a + 1); 529 | return; 530 | } 531 | ip->ip_src.s_addr = htonl(g->src_ip.start); 532 | 533 | udp->uh_sport = htons(g->src_ip.port0); 534 | p = ntohs(udp->uh_dport); 535 | if (p < g->dst_ip.port1) { /* just inc, no wrap */ 536 | udp->uh_dport = htons(p + 1); 537 | return; 538 | } 539 | udp->uh_dport = htons(g->dst_ip.port0); 540 | 541 | a = ntohl(ip->ip_dst.s_addr); 542 | if (a < g->dst_ip.end) { /* just inc, no wrap */ 543 | ip->ip_dst.s_addr = htonl(a + 1); 544 | return; 545 | } 546 | ip->ip_dst.s_addr = htonl(g->dst_ip.start); 547 | 548 | } 549 | 550 | /* 551 | * initialize one packet and prepare for the next one. 552 | * The copy could be done better instead of repeating it each time. 553 | */ 554 | static void 555 | initialize_packet(struct targ *targ) 556 | { 557 | struct pkt *pkt = &targ->pkt; 558 | struct ether_header *eh; 559 | struct ip *ip; 560 | struct udphdr *udp; 561 | uint16_t paylen = targ->g->pkt_size - sizeof(*eh) - sizeof(struct ip); 562 | const char *payload = targ->g->options & OPT_INDIRECT ? 563 | indirect_payload : default_payload; 564 | int i, l, l0 = strlen(payload); 565 | 566 | /* create a nice NUL-terminated string */ 567 | for (i = 0; i < paylen;) { 568 | l = min(l0, paylen - i); 569 | bcopy(payload, pkt->body + i, l); 570 | i += l; 571 | } 572 | pkt->body[i-1] = '\0'; 573 | ip = &pkt->ip; 574 | 575 | /* prepare the headers */ 576 | ip->ip_v = IPVERSION; 577 | ip->ip_hl = 5; 578 | ip->ip_id = 0; 579 | ip->ip_tos = IPTOS_LOWDELAY; 580 | ip->ip_len = ntohs(targ->g->pkt_size - sizeof(*eh)); 581 | ip->ip_id = 0; 582 | ip->ip_off = htons(IP_DF); /* Don't fragment */ 583 | ip->ip_ttl = IPDEFTTL; 584 | ip->ip_p = IPPROTO_UDP; 585 | ip->ip_dst.s_addr = htonl(targ->g->dst_ip.start); 586 | ip->ip_src.s_addr = htonl(targ->g->src_ip.start); 587 | ip->ip_sum = wrapsum(checksum(ip, sizeof(*ip), 0)); 588 | 589 | 590 | udp = &pkt->udp; 591 | udp->uh_sport = htons(targ->g->src_ip.port0); 592 | udp->uh_dport = htons(targ->g->dst_ip.port0); 593 | udp->uh_ulen = htons(paylen); 594 | /* Magic: taken from sbin/dhclient/packet.c */ 595 | udp->uh_sum = wrapsum(checksum(udp, sizeof(*udp), 596 | checksum(pkt->body, 597 | paylen - sizeof(*udp), 598 | checksum(&ip->ip_src, 2 * sizeof(ip->ip_src), 599 | IPPROTO_UDP + (u_int32_t)ntohs(udp->uh_ulen) 600 | ) 601 | ) 602 | )); 603 | 604 | eh = &pkt->eh; 605 | bcopy(&targ->g->src_mac.start, eh->ether_shost, 6); 606 | bcopy(&targ->g->dst_mac.start, eh->ether_dhost, 6); 607 | eh->ether_type = htons(ETHERTYPE_IP); 608 | // dump_payload((void *)pkt, targ->g->pkt_size, NULL, 0); 609 | } 610 | 611 | 612 | 613 | /* 614 | * create and enqueue a batch of packets on a ring. 615 | * On the last one set NS_REPORT to tell the driver to generate 616 | * an interrupt when done. 617 | */ 618 | static int 619 | send_packets(struct netmap_ring *ring, struct pkt *pkt, 620 | struct glob_arg *g, u_int count, int options, u_int nfrags, 621 | struct dst_range *dst) 622 | { 623 | u_int sent, cur = ring->cur; 624 | int fcnt; 625 | int size = g->pkt_size; 626 | 627 | if (ring->avail < count) 628 | count = ring->avail; 629 | if (count < nfrags) { 630 | D("truncating packet, no room for frags %d %d", 631 | count, nfrags); 632 | } 633 | #if 0 634 | if (options & (OPT_COPY | OPT_PREFETCH) ) { 635 | for (sent = 0; sent < count; sent++) { 636 | struct netmap_slot *slot = &ring->slot[cur]; 637 | char *p = NETMAP_BUF(ring, slot->buf_idx); 638 | 639 | prefetch(p); 640 | cur = NETMAP_RING_NEXT(ring, cur); 641 | } 642 | cur = ring->cur; 643 | } 644 | #endif 645 | for (fcnt = nfrags, sent = 0; sent < count; sent++) { 646 | struct netmap_slot *slot = &ring->slot[cur]; 647 | char *p = NETMAP_BUF(ring, slot->buf_idx); 648 | 649 | slot->flags = 0; 650 | if (options & OPT_INDIRECT) { 651 | slot->flags |= NS_INDIRECT; 652 | slot->ptr = (uint64_t)pkt; 653 | } else if (options & OPT_COPY) { 654 | pkt_copy(pkt, p, size); 655 | if (fcnt == 1) 656 | update_addresses(pkt, g); 657 | } else if (options & OPT_MEMCPY) { 658 | memcpy(p, pkt, size); 659 | if (fcnt == 1) 660 | update_addresses(pkt, g); 661 | } else if (options & OPT_PREFETCH) { 662 | prefetch(p); 663 | } 664 | if (options & OPT_DUMP) 665 | dump_payload(p, size, ring, cur); 666 | slot->len = size; 667 | if (dst) { 668 | slot->flags |= NS_DST_PORT | dst->now << 8; 669 | dst->now = dst->nowend ? dst->now+1 : dst->start; 670 | } 671 | if (--fcnt > 0) 672 | slot->flags |= NS_MOREFRAG; 673 | else 674 | fcnt = nfrags; 675 | if (sent == count - 1) { 676 | slot->flags &= ~NS_MOREFRAG; 677 | slot->flags |= NS_REPORT; 678 | } 679 | cur = NETMAP_RING_NEXT(ring, cur); 680 | } 681 | ring->avail -= sent; 682 | ring->cur = cur; 683 | 684 | return (sent); 685 | } 686 | 687 | /* 688 | * Send a packet, and wait for a response. 689 | * The payload (after UDP header, ofs 42) has a 4-byte sequence 690 | * followed by a struct timeval (or bintime?) 691 | */ 692 | #define PAY_OFS 42 /* where in the pkt... */ 693 | 694 | static void * 695 | pinger_body(void *data) 696 | { 697 | struct targ *targ = (struct targ *) data; 698 | struct pollfd fds[1]; 699 | struct netmap_if *nifp = targ->nifp; 700 | int i, rx = 0, n = targ->g->npackets; 701 | 702 | fds[0].fd = targ->fd; 703 | fds[0].events = (POLLIN); 704 | static uint32_t sent; 705 | struct timespec ts, now, last_print; 706 | uint32_t count = 0, min = 1000000000, av = 0; 707 | 708 | if (targ->g->nthreads > 1) { 709 | D("can only ping with 1 thread"); 710 | return NULL; 711 | } 712 | 713 | clock_gettime(CLOCK_REALTIME_PRECISE, &last_print); 714 | while (!targ->cancel && (n == 0 || (int)sent < n)) { 715 | struct netmap_ring *ring = NETMAP_TXRING(nifp, 0); 716 | struct netmap_slot *slot; 717 | char *p; 718 | for (i = 0; i < 1; i++) { 719 | slot = &ring->slot[ring->cur]; 720 | slot->len = targ->g->pkt_size; 721 | p = NETMAP_BUF(ring, slot->buf_idx); 722 | 723 | if (ring->avail == 0) { 724 | D("-- ouch, cannot send"); 725 | } else { 726 | struct tstamp *tp; 727 | pkt_copy(&targ->pkt, p, targ->g->pkt_size); 728 | clock_gettime(CLOCK_REALTIME_PRECISE, &ts); 729 | bcopy(&sent, p+42, sizeof(sent)); 730 | tp = (struct tstamp *)(p+46); 731 | tp->sec = (uint32_t)ts.tv_sec; 732 | tp->nsec = (uint32_t)ts.tv_nsec; 733 | sent++; 734 | ring->cur = NETMAP_RING_NEXT(ring, ring->cur); 735 | ring->avail--; 736 | } 737 | } 738 | /* should use a parameter to decide how often to send */ 739 | if (poll(fds, 1, 3000) <= 0) { 740 | D("poll error/timeout on queue %d", targ->me); 741 | continue; 742 | } 743 | /* see what we got back */ 744 | for (i = targ->qfirst; i < targ->qlast; i++) { 745 | ring = NETMAP_RXRING(nifp, i); 746 | while (ring->avail > 0) { 747 | uint32_t seq; 748 | struct tstamp *tp; 749 | slot = &ring->slot[ring->cur]; 750 | p = NETMAP_BUF(ring, slot->buf_idx); 751 | 752 | clock_gettime(CLOCK_REALTIME_PRECISE, &now); 753 | bcopy(p+42, &seq, sizeof(seq)); 754 | tp = (struct tstamp *)(p+46); 755 | ts.tv_sec = (time_t)tp->sec; 756 | ts.tv_nsec = (long)tp->nsec; 757 | 758 | ts.tv_sec = now.tv_sec - ts.tv_sec; 759 | ts.tv_nsec = now.tv_nsec - ts.tv_nsec; 760 | if (ts.tv_nsec < 0) { 761 | ts.tv_nsec += 1000000000; 762 | ts.tv_sec--; 763 | } 764 | if (1) D("seq %d/%d delta %d.%09d", seq, sent, 765 | (int)ts.tv_sec, (int)ts.tv_nsec); 766 | if (ts.tv_nsec < (int)min) 767 | min = ts.tv_nsec; 768 | count ++; 769 | av += ts.tv_nsec; 770 | ring->avail--; 771 | ring->cur = NETMAP_RING_NEXT(ring, ring->cur); 772 | rx++; 773 | } 774 | } 775 | //D("tx %d rx %d", sent, rx); 776 | //usleep(100000); 777 | ts.tv_sec = now.tv_sec - last_print.tv_sec; 778 | ts.tv_nsec = now.tv_nsec - last_print.tv_nsec; 779 | if (ts.tv_nsec < 0) { 780 | ts.tv_nsec += 1000000000; 781 | ts.tv_sec--; 782 | } 783 | if (ts.tv_sec >= 1) { 784 | D("count %d min %d av %d", 785 | count, min, av/count); 786 | count = 0; 787 | av = 0; 788 | min = 100000000; 789 | last_print = now; 790 | } 791 | } 792 | targ->used = 0; 793 | return NULL; 794 | } 795 | 796 | 797 | /* 798 | * reply to ping requests 799 | */ 800 | static void * 801 | ponger_body(void *data) 802 | { 803 | struct targ *targ = (struct targ *) data; 804 | struct pollfd fds[1]; 805 | struct netmap_if *nifp = targ->nifp; 806 | struct netmap_ring *txring, *rxring; 807 | int i, rx = 0, sent = 0, n = targ->g->npackets; 808 | fds[0].fd = targ->fd; 809 | fds[0].events = (POLLIN); 810 | 811 | if (targ->g->nthreads > 1) { 812 | D("can only reply ping with 1 thread"); 813 | return NULL; 814 | } 815 | D("understood ponger %d but don't know how to do it", n); 816 | while (!targ->cancel && (n == 0 || sent < n)) { 817 | uint32_t txcur, txavail; 818 | //#define BUSYWAIT 819 | #ifdef BUSYWAIT 820 | ioctl(fds[0].fd, NIOCRXSYNC, NULL); 821 | #else 822 | if (poll(fds, 1, 1000) <= 0) { 823 | D("poll error/timeout on queue %d", targ->me); 824 | continue; 825 | } 826 | #endif 827 | txring = NETMAP_TXRING(nifp, 0); 828 | txcur = txring->cur; 829 | txavail = txring->avail; 830 | /* see what we got back */ 831 | for (i = targ->qfirst; i < targ->qlast; i++) { 832 | rxring = NETMAP_RXRING(nifp, i); 833 | while (rxring->avail > 0) { 834 | uint16_t *spkt, *dpkt; 835 | uint32_t cur = rxring->cur; 836 | struct netmap_slot *slot = &rxring->slot[cur]; 837 | char *src, *dst; 838 | src = NETMAP_BUF(rxring, slot->buf_idx); 839 | //D("got pkt %p of size %d", src, slot->len); 840 | rxring->avail--; 841 | rxring->cur = NETMAP_RING_NEXT(rxring, cur); 842 | rx++; 843 | if (!memcmp(src + 42, default_payload, 4)) 844 | continue; 845 | if (txavail == 0) 846 | continue; 847 | dst = NETMAP_BUF(txring, 848 | txring->slot[txcur].buf_idx); 849 | /* copy... */ 850 | dpkt = (uint16_t *)dst; 851 | spkt = (uint16_t *)src; 852 | pkt_copy(src, dst, slot->len); 853 | dpkt[0] = spkt[3]; 854 | dpkt[1] = spkt[4]; 855 | dpkt[2] = spkt[5]; 856 | dpkt[3] = spkt[0]; 857 | dpkt[4] = spkt[1]; 858 | dpkt[5] = spkt[2]; 859 | txring->slot[txcur].len = slot->len; 860 | /* XXX swap src dst mac */ 861 | txcur = NETMAP_RING_NEXT(txring, txcur); 862 | txavail--; 863 | sent++; 864 | } 865 | } 866 | txring->cur = txcur; 867 | txring->avail = txavail; 868 | targ->count = sent; 869 | #ifdef BUSYWAIT 870 | ioctl(fds[0].fd, NIOCTXSYNC, NULL); 871 | #endif 872 | //D("tx %d rx %d", sent, rx); 873 | } 874 | targ->used = 0; 875 | return NULL; 876 | } 877 | 878 | static __inline int 879 | timespec_ge(const struct timespec *a, const struct timespec *b) 880 | { 881 | 882 | if (a->tv_sec > b->tv_sec) 883 | return (1); 884 | if (a->tv_sec < b->tv_sec) 885 | return (0); 886 | if (a->tv_nsec >= b->tv_nsec) 887 | return (1); 888 | return (0); 889 | } 890 | 891 | static __inline struct timespec 892 | timeval2spec(const struct timeval *a) 893 | { 894 | struct timespec ts = { 895 | .tv_sec = a->tv_sec, 896 | .tv_nsec = a->tv_usec * 1000 897 | }; 898 | return ts; 899 | } 900 | 901 | static __inline struct timeval 902 | timespec2val(const struct timespec *a) 903 | { 904 | struct timeval tv = { 905 | .tv_sec = a->tv_sec, 906 | .tv_usec = a->tv_nsec / 1000 907 | }; 908 | return tv; 909 | } 910 | 911 | 912 | static int 913 | wait_time(struct timespec ts, struct timespec *wakeup_ts, long long *waited) 914 | { 915 | struct timespec curtime; 916 | 917 | curtime.tv_sec = 0; 918 | curtime.tv_nsec = 0; 919 | 920 | if (clock_gettime(CLOCK_REALTIME_PRECISE, &curtime) == -1) { 921 | D("clock_gettime: %s", strerror(errno)); 922 | return (-1); 923 | } 924 | while (timespec_ge(&ts, &curtime)) { 925 | if (waited != NULL) 926 | (*waited)++; 927 | if (clock_gettime(CLOCK_REALTIME_PRECISE, &curtime) == -1) { 928 | D("clock_gettime"); 929 | return (-1); 930 | } 931 | } 932 | if (wakeup_ts != NULL) 933 | *wakeup_ts = curtime; 934 | return (0); 935 | } 936 | 937 | static __inline void 938 | timespec_add(struct timespec *tsa, struct timespec *tsb) 939 | { 940 | tsa->tv_sec += tsb->tv_sec; 941 | tsa->tv_nsec += tsb->tv_nsec; 942 | if (tsa->tv_nsec >= 1000000000) { 943 | tsa->tv_sec++; 944 | tsa->tv_nsec -= 1000000000; 945 | } 946 | } 947 | 948 | 949 | static void * 950 | sender_body(void *data) 951 | { 952 | struct targ *targ = (struct targ *) data; 953 | 954 | struct pollfd fds[1]; 955 | struct netmap_if *nifp = targ->nifp; 956 | struct netmap_ring *txring; 957 | int i, n = targ->g->npackets / targ->g->nthreads, sent = 0; 958 | int options = targ->g->options | OPT_COPY; 959 | struct timespec tmptime, nexttime = { 0, 0}; // XXX silence compiler 960 | int rate_limit = targ->g->tx_rate; 961 | long long waited = 0; 962 | 963 | D("start"); 964 | if (setaffinity(targ->thread, targ->affinity)) 965 | goto quit; 966 | /* setup poll(2) mechanism. */ 967 | memset(fds, 0, sizeof(fds)); 968 | fds[0].fd = targ->fd; 969 | fds[0].events = (POLLOUT); 970 | 971 | /* main loop.*/ 972 | clock_gettime(CLOCK_REALTIME_PRECISE, &targ->tic); 973 | if (rate_limit) { 974 | tmptime.tv_sec = 2; 975 | tmptime.tv_nsec = 0; 976 | timespec_add(&targ->tic, &tmptime); 977 | targ->tic.tv_nsec = 0; 978 | if (wait_time(targ->tic, NULL, NULL) == -1) { 979 | D("wait_time: %s", strerror(errno)); 980 | goto quit; 981 | } 982 | nexttime = targ->tic; 983 | } 984 | if (targ->g->dev_type == DEV_PCAP) { 985 | int size = targ->g->pkt_size; 986 | void *pkt = &targ->pkt; 987 | pcap_t *p = targ->g->p; 988 | 989 | for (i = 0; !targ->cancel && (n == 0 || sent < n); i++) { 990 | if (pcap_inject(p, pkt, size) != -1) 991 | sent++; 992 | update_addresses(pkt, targ->g); 993 | if (i > 10000) { 994 | targ->count = sent; 995 | i = 0; 996 | } 997 | } 998 | } else if (targ->g->dev_type == DEV_TAP) { /* tap */ 999 | int size = targ->g->pkt_size; 1000 | void *pkt = &targ->pkt; 1001 | D("writing to file desc %d", targ->g->main_fd); 1002 | 1003 | for (i = 0; !targ->cancel && (n == 0 || sent < n); i++) { 1004 | if (write(targ->g->main_fd, pkt, size) != -1) 1005 | sent++; 1006 | update_addresses(pkt, targ->g); 1007 | if (i > 10000) { 1008 | targ->count = sent; 1009 | i = 0; 1010 | } 1011 | } 1012 | } else { 1013 | int tosend = 0; 1014 | int frags = targ->g->frags; 1015 | struct dst_range *dst; 1016 | 1017 | dst = targ->g->dst.name ? &targ->g->dst : NULL; 1018 | while (!targ->cancel && (n == 0 || sent < n)) { 1019 | 1020 | if (rate_limit && tosend <= 0) { 1021 | tosend = targ->g->burst; 1022 | timespec_add(&nexttime, &targ->g->tx_period); 1023 | if (wait_time(nexttime, &tmptime, &waited) == -1) { 1024 | D("wait_time"); 1025 | goto quit; 1026 | } 1027 | } 1028 | 1029 | /* 1030 | * wait for available room in the send queue(s) 1031 | */ 1032 | if (poll(fds, 1, 2000) <= 0) { 1033 | if (targ->cancel) 1034 | break; 1035 | D("poll error/timeout on queue %d", targ->me); 1036 | goto quit; 1037 | } 1038 | /* 1039 | * scan our queues and send on those with room 1040 | */ 1041 | if (options & OPT_COPY && sent > 100000 && !(targ->g->options & OPT_COPY) ) { 1042 | D("drop copy"); 1043 | options &= ~OPT_COPY; 1044 | } 1045 | for (i = targ->qfirst; i < targ->qlast; i++) { 1046 | int m, limit = rate_limit ? tosend : targ->g->burst; 1047 | if (n > 0 && n - sent < limit) 1048 | limit = n - sent; 1049 | txring = NETMAP_TXRING(nifp, i); 1050 | if (txring->avail == 0) 1051 | continue; 1052 | if (frags > 1) 1053 | limit = ((limit + frags - 1) / frags) * frags; 1054 | 1055 | m = send_packets(txring, &targ->pkt, targ->g, 1056 | limit, options, frags, dst); 1057 | ND("limit %d avail %d frags %d m %d", 1058 | limit, txring->avail, frags, m); 1059 | sent += m; 1060 | targ->count = sent; 1061 | if (rate_limit) { 1062 | tosend -= m; 1063 | if (tosend <= 0) 1064 | break; 1065 | } 1066 | } 1067 | } 1068 | /* flush any remaining packets */ 1069 | ioctl(fds[0].fd, NIOCTXSYNC, NULL); 1070 | 1071 | /* final part: wait all the TX queues to be empty. */ 1072 | for (i = targ->qfirst; i < targ->qlast; i++) { 1073 | txring = NETMAP_TXRING(nifp, i); 1074 | while (!NETMAP_TX_RING_EMPTY(txring)) { 1075 | ioctl(fds[0].fd, NIOCTXSYNC, NULL); 1076 | usleep(1); /* wait 1 tick */ 1077 | } 1078 | } 1079 | } 1080 | 1081 | clock_gettime(CLOCK_REALTIME_PRECISE, &targ->toc); 1082 | targ->completed = 1; 1083 | targ->count = sent; 1084 | 1085 | quit: 1086 | /* reset the ``used`` flag. */ 1087 | targ->used = 0; 1088 | 1089 | return (NULL); 1090 | } 1091 | 1092 | 1093 | static void 1094 | receive_pcap(u_char *user, const struct pcap_pkthdr * h, 1095 | const u_char * bytes) 1096 | { 1097 | int *count = (int *)user; 1098 | (void)h; /* UNUSED */ 1099 | (void)bytes; /* UNUSED */ 1100 | (*count)++; 1101 | } 1102 | 1103 | static int 1104 | receive_packets(struct netmap_ring *ring, u_int limit, int dump) 1105 | { 1106 | u_int cur, rx; 1107 | 1108 | cur = ring->cur; 1109 | if (ring->avail < limit) 1110 | limit = ring->avail; 1111 | for (rx = 0; rx < limit; rx++) { 1112 | struct netmap_slot *slot = &ring->slot[cur]; 1113 | char *p = NETMAP_BUF(ring, slot->buf_idx); 1114 | 1115 | if (dump) 1116 | dump_payload(p, slot->len, ring, cur); 1117 | 1118 | cur = NETMAP_RING_NEXT(ring, cur); 1119 | } 1120 | ring->avail -= rx; 1121 | ring->cur = cur; 1122 | 1123 | return (rx); 1124 | } 1125 | 1126 | static void * 1127 | receiver_body(void *data) 1128 | { 1129 | struct targ *targ = (struct targ *) data; 1130 | struct pollfd fds[1]; 1131 | struct netmap_if *nifp = targ->nifp; 1132 | struct netmap_ring *rxring; 1133 | int i; 1134 | uint64_t received = 0; 1135 | 1136 | if (setaffinity(targ->thread, targ->affinity)) 1137 | goto quit; 1138 | 1139 | /* setup poll(2) mechanism. */ 1140 | memset(fds, 0, sizeof(fds)); 1141 | fds[0].fd = targ->fd; 1142 | fds[0].events = (POLLIN); 1143 | 1144 | /* unbounded wait for the first packet. */ 1145 | for (;!targ->cancel;) { 1146 | i = poll(fds, 1, 1000); 1147 | if (i > 0 && !(fds[0].revents & POLLERR)) 1148 | break; 1149 | D("waiting for initial packets, poll returns %d %d", i, fds[0].revents); 1150 | } 1151 | 1152 | /* main loop, exit after 1s silence */ 1153 | clock_gettime(CLOCK_REALTIME_PRECISE, &targ->tic); 1154 | if (targ->g->dev_type == DEV_PCAP) { 1155 | while (!targ->cancel) { 1156 | /* XXX should we poll ? */ 1157 | pcap_dispatch(targ->g->p, targ->g->burst, receive_pcap, NULL); 1158 | } 1159 | } else if (targ->g->dev_type == DEV_TAP) { 1160 | D("reading from %s fd %d", targ->g->ifname, targ->g->main_fd); 1161 | while (!targ->cancel) { 1162 | char buf[2048]; 1163 | /* XXX should we poll ? */ 1164 | if (read(targ->g->main_fd, buf, sizeof(buf)) > 0) 1165 | targ->count++; 1166 | } 1167 | } else { 1168 | int dump = targ->g->options & OPT_DUMP; 1169 | while (!targ->cancel) { 1170 | /* Once we started to receive packets, wait at most 1 seconds 1171 | before quitting. */ 1172 | if (poll(fds, 1, 1 * 1000) <= 0 && !targ->g->forever) { 1173 | clock_gettime(CLOCK_REALTIME_PRECISE, &targ->toc); 1174 | targ->toc.tv_sec -= 1; /* Subtract timeout time. */ 1175 | break; 1176 | } 1177 | 1178 | for (i = targ->qfirst; i < targ->qlast; i++) { 1179 | int m; 1180 | 1181 | rxring = NETMAP_RXRING(nifp, i); 1182 | if (rxring->avail == 0) 1183 | continue; 1184 | 1185 | m = receive_packets(rxring, targ->g->burst, dump); 1186 | received += m; 1187 | } 1188 | targ->count = received; 1189 | 1190 | // tell the card we have read the data 1191 | //ioctl(fds[0].fd, NIOCRXSYNC, NULL); 1192 | } 1193 | } 1194 | 1195 | targ->completed = 1; 1196 | targ->count = received; 1197 | 1198 | quit: 1199 | /* reset the ``used`` flag. */ 1200 | targ->used = 0; 1201 | 1202 | return (NULL); 1203 | } 1204 | 1205 | /* very crude code to print a number in normalized form. 1206 | * Caller has to make sure that the buffer is large enough. 1207 | */ 1208 | static const char * 1209 | norm(char *buf, double val) 1210 | { 1211 | char *units[] = { "", "K", "M", "G" }; 1212 | u_int i; 1213 | 1214 | for (i = 0; val >=1000 && i < sizeof(units)/sizeof(char *); i++) 1215 | val /= 1000; 1216 | sprintf(buf, "%.2f %s", val, units[i]); 1217 | return buf; 1218 | } 1219 | 1220 | static void 1221 | tx_output(uint64_t sent, int size, double delta) 1222 | { 1223 | double bw, raw_bw, pps; 1224 | char b1[40], b2[80], b3[80]; 1225 | 1226 | printf("Sent %" PRIu64 " packets, %d bytes each, in %.2f seconds.\n", 1227 | sent, size, delta); 1228 | if (delta == 0) 1229 | delta = 1e-6; 1230 | if (size < 60) /* correct for min packet size */ 1231 | size = 60; 1232 | pps = sent / delta; 1233 | bw = (8.0 * size * sent) / delta; 1234 | /* raw packets have4 bytes crc + 20 bytes framing */ 1235 | raw_bw = (8.0 * (size + 24) * sent) / delta; 1236 | 1237 | printf("Speed: %spps Bandwidth: %sbps (raw %sbps)\n", 1238 | norm(b1, pps), norm(b2, bw), norm(b3, raw_bw) ); 1239 | } 1240 | 1241 | 1242 | static void 1243 | rx_output(uint64_t received, double delta) 1244 | { 1245 | double pps; 1246 | char b1[40]; 1247 | 1248 | printf("Received %" PRIu64 " packets, in %.2f seconds.\n", received, delta); 1249 | 1250 | if (delta == 0) 1251 | delta = 1e-6; 1252 | pps = received / delta; 1253 | printf("Speed: %spps\n", norm(b1, pps)); 1254 | } 1255 | 1256 | static void 1257 | usage(void) 1258 | { 1259 | const char *cmd = "pkt-gen"; 1260 | fprintf(stderr, 1261 | "Usage:\n" 1262 | "%s arguments\n" 1263 | "\t-i interface interface name\n" 1264 | "\t-f function tx rx ping pong\n" 1265 | "\t-n count number of iterations (can be 0)\n" 1266 | "\t-t pkts_to_send also forces tx mode\n" 1267 | "\t-r pkts_to_receive also forces rx mode\n" 1268 | "\t-l pkt_size in bytes excluding CRC\n" 1269 | "\t-d dst_ip[:port[-dst_ip:port]] single or range\n" 1270 | "\t-s src_ip[:port[-src_ip:port]] single or range\n" 1271 | "\t-D dst-mac\n" 1272 | "\t-S src-mac\n" 1273 | "\t-a cpu_id use setaffinity\n" 1274 | "\t-b burst size testing, mostly\n" 1275 | "\t-c cores cores to use\n" 1276 | "\t-p threads processes/threads to use\n" 1277 | "\t-T report_ms milliseconds between reports\n" 1278 | "\t-P use libpcap instead of netmap\n" 1279 | "\t-w wait_for_link_time in seconds\n" 1280 | "\t-R rate in packets per second\n" 1281 | "\t-X dump payload\n" 1282 | "", 1283 | cmd); 1284 | 1285 | exit(0); 1286 | } 1287 | 1288 | static void 1289 | start_threads(struct glob_arg *g) 1290 | { 1291 | int i; 1292 | 1293 | targs = calloc(g->nthreads, sizeof(*targs)); 1294 | /* 1295 | * Now create the desired number of threads, each one 1296 | * using a single descriptor. 1297 | */ 1298 | for (i = 0; i < g->nthreads; i++) { 1299 | bzero(&targs[i], sizeof(targs[i])); 1300 | targs[i].fd = -1; /* default, with pcap */ 1301 | targs[i].g = g; 1302 | 1303 | if (g->dev_type == DEV_NETMAP) { 1304 | struct nmreq tifreq; 1305 | int tfd; 1306 | int q = (g->qfirst & ~NETMAP_HW_RING) + i; 1307 | 1308 | /* register interface. */ 1309 | tfd = open("/dev/netmap", O_RDWR); 1310 | if (tfd == -1) { 1311 | D("Unable to open /dev/netmap"); 1312 | continue; 1313 | } 1314 | targs[i].fd = tfd; 1315 | 1316 | bzero(&tifreq, sizeof(tifreq)); 1317 | strncpy(tifreq.nr_name, g->ifname, sizeof(tifreq.nr_name)); 1318 | tifreq.nr_version = NETMAP_API; 1319 | tifreq.nr_ringid = (g->nthreads > 1 || 1320 | g->qfirst & NETMAP_HW_RING) ? q | NETMAP_HW_RING : 0; 1321 | parse_nmr_config(g->nmr_config, &tifreq); 1322 | 1323 | /* 1324 | * if we are acting as a receiver only, do not touch the transmit ring. 1325 | * This is not the default because many apps may use the interface 1326 | * in both directions, but a pure receiver does not. 1327 | */ 1328 | if (g->td_body == receiver_body) { 1329 | tifreq.nr_ringid |= NETMAP_NO_TX_POLL; 1330 | } 1331 | 1332 | if ((ioctl(tfd, NIOCREGIF, &tifreq)) == -1) { 1333 | D("Unable to register %s", g->ifname); 1334 | continue; 1335 | } 1336 | targs[i].nmr = tifreq; 1337 | targs[i].nifp = NETMAP_IF(g->mmap_addr, tifreq.nr_offset); 1338 | /* start threads. */ 1339 | targs[i].qfirst = (g->nthreads > 1 || 1340 | g->qfirst & NETMAP_HW_RING) ? q : 0; 1341 | targs[i].qlast = (g->nthreads > 1 || 1342 | g->qfirst & NETMAP_HW_RING) ? q+1 : 1343 | (g->td_body == receiver_body ? tifreq.nr_rx_rings : tifreq.nr_tx_rings); 1344 | } else { 1345 | targs[i].fd = g->main_fd; 1346 | } 1347 | targs[i].used = 1; 1348 | targs[i].me = i; 1349 | if (g->affinity2 >= 0) 1350 | targs[i].affinity = (g->affinity2 + i) % g->cpus; 1351 | else if (g->affinity >= 0) { 1352 | if (g->affinity < g->cpus) 1353 | targs[i].affinity = g->affinity; 1354 | else 1355 | targs[i].affinity = i % g->cpus; 1356 | } else 1357 | targs[i].affinity = -1; 1358 | /* default, init packets */ 1359 | initialize_packet(&targs[i]); 1360 | 1361 | if (pthread_create(&targs[i].thread, NULL, g->td_body, 1362 | &targs[i]) == -1) { 1363 | D("Unable to create thread %d", i); 1364 | targs[i].used = 0; 1365 | } 1366 | } 1367 | } 1368 | 1369 | static void 1370 | main_thread(struct glob_arg *g) 1371 | { 1372 | int i; 1373 | 1374 | uint64_t prev = 0; 1375 | uint64_t count = 0; 1376 | double delta_t; 1377 | struct timeval tic, toc; 1378 | 1379 | gettimeofday(&toc, NULL); 1380 | for (;;) { 1381 | struct timeval now, delta; 1382 | uint64_t pps, usec, my_count, npkts; 1383 | int done = 0; 1384 | 1385 | delta.tv_sec = g->report_interval/1000; 1386 | delta.tv_usec = (g->report_interval%1000)*1000; 1387 | select(0, NULL, NULL, NULL, &delta); 1388 | gettimeofday(&now, NULL); 1389 | time_second = now.tv_sec; 1390 | timersub(&now, &toc, &toc); 1391 | my_count = 0; 1392 | for (i = 0; i < g->nthreads; i++) { 1393 | my_count += targs[i].count; 1394 | if (targs[i].used == 0) 1395 | done++; 1396 | } 1397 | usec = toc.tv_sec* 1000000 + toc.tv_usec; 1398 | if (usec < 10000) 1399 | continue; 1400 | npkts = my_count - prev; 1401 | pps = (npkts*1000000 + usec/2) / usec; 1402 | D("%" PRIu64 " pps (%" PRIu64 " pkts in %" PRIu64 " usec)", 1403 | pps, npkts, usec); 1404 | prev = my_count; 1405 | toc = now; 1406 | if (done == g->nthreads) 1407 | break; 1408 | } 1409 | 1410 | timerclear(&tic); 1411 | timerclear(&toc); 1412 | for (i = 0; i < g->nthreads; i++) { 1413 | struct timespec t_tic, t_toc; 1414 | /* 1415 | * Join active threads, unregister interfaces and close 1416 | * file descriptors. 1417 | */ 1418 | if (targs[i].used) 1419 | pthread_join(targs[i].thread, NULL); 1420 | close(targs[i].fd); 1421 | 1422 | if (targs[i].completed == 0) 1423 | D("ouch, thread %d exited with error", i); 1424 | 1425 | /* 1426 | * Collect threads output and extract information about 1427 | * how long it took to send all the packets. 1428 | */ 1429 | count += targs[i].count; 1430 | t_tic = timeval2spec(&tic); 1431 | t_toc = timeval2spec(&toc); 1432 | if (!timerisset(&tic) || timespec_ge(&targs[i].tic, &t_tic)) 1433 | tic = timespec2val(&targs[i].tic); 1434 | if (!timerisset(&toc) || timespec_ge(&targs[i].toc, &t_toc)) 1435 | toc = timespec2val(&targs[i].toc); 1436 | } 1437 | 1438 | /* print output. */ 1439 | timersub(&toc, &tic, &toc); 1440 | delta_t = toc.tv_sec + 1e-6* toc.tv_usec; 1441 | if (g->td_body == sender_body) 1442 | tx_output(count, g->pkt_size, delta_t); 1443 | else 1444 | rx_output(count, delta_t); 1445 | 1446 | #ifdef MULTISTACK 1447 | g->msr.mr_cmd = MSTACK_UNBIND; 1448 | if (ioctl(g->so, NIOCCONFIG, &g->msr)) 1449 | perror("ioctl"); 1450 | close(g->so); 1451 | #endif 1452 | if (g->dev_type == DEV_NETMAP) { 1453 | munmap(g->mmap_addr, g->mmap_size); 1454 | close(g->main_fd); 1455 | } 1456 | } 1457 | 1458 | 1459 | struct sf { 1460 | char *key; 1461 | void *f; 1462 | }; 1463 | 1464 | static struct sf func[] = { 1465 | { "tx", sender_body }, 1466 | { "rx", receiver_body }, 1467 | { "ping", pinger_body }, 1468 | { "pong", ponger_body }, 1469 | { NULL, NULL } 1470 | }; 1471 | 1472 | static int 1473 | tap_alloc(char *dev) 1474 | { 1475 | struct ifreq ifr; 1476 | int fd, err; 1477 | char *clonedev = TAP_CLONEDEV; 1478 | 1479 | (void)err; 1480 | (void)dev; 1481 | /* Arguments taken by the function: 1482 | * 1483 | * char *dev: the name of an interface (or '\0'). MUST have enough 1484 | * space to hold the interface name if '\0' is passed 1485 | * int flags: interface flags (eg, IFF_TUN etc.) 1486 | */ 1487 | 1488 | #ifdef __FreeBSD__ 1489 | if (dev[3]) { /* tapSomething */ 1490 | static char buf[128]; 1491 | snprintf(buf, sizeof(buf), "/dev/%s", dev); 1492 | clonedev = buf; 1493 | } 1494 | #endif 1495 | /* open the device */ 1496 | if( (fd = open(clonedev, O_RDWR)) < 0 ) { 1497 | return fd; 1498 | } 1499 | D("%s open successful", clonedev); 1500 | 1501 | /* preparation of the struct ifr, of type "struct ifreq" */ 1502 | memset(&ifr, 0, sizeof(ifr)); 1503 | 1504 | #ifdef linux 1505 | ifr.ifr_flags = IFF_TAP | IFF_NO_PI; 1506 | 1507 | if (*dev) { 1508 | /* if a device name was specified, put it in the structure; otherwise, 1509 | * the kernel will try to allocate the "next" device of the 1510 | * specified type */ 1511 | strncpy(ifr.ifr_name, dev, IFNAMSIZ); 1512 | } 1513 | 1514 | /* try to create the device */ 1515 | if( (err = ioctl(fd, TUNSETIFF, (void *) &ifr)) < 0 ) { 1516 | D("failed to to a TUNSETIFF"); 1517 | close(fd); 1518 | return err; 1519 | } 1520 | 1521 | /* if the operation was successful, write back the name of the 1522 | * interface to the variable "dev", so the caller can know 1523 | * it. Note that the caller MUST reserve space in *dev (see calling 1524 | * code below) */ 1525 | strcpy(dev, ifr.ifr_name); 1526 | D("new name is %s", dev); 1527 | #endif /* linux */ 1528 | 1529 | /* this is the special file descriptor that the caller will use to talk 1530 | * with the virtual interface */ 1531 | return fd; 1532 | } 1533 | 1534 | int 1535 | main(int arc, char **argv) 1536 | { 1537 | int i; 1538 | 1539 | struct glob_arg g; 1540 | 1541 | struct nmreq nmr; 1542 | int ch; 1543 | int wait_link = 2; 1544 | int devqueues = 1; /* how many device queues */ 1545 | 1546 | bzero(&g, sizeof(g)); 1547 | 1548 | g.main_fd = -1; 1549 | g.td_body = receiver_body; 1550 | g.report_interval = 1000; /* report interval */ 1551 | g.affinity = g.affinity2 = -1; 1552 | /* ip addresses can also be a range x.x.x.x-x.x.x.y */ 1553 | g.src_ip.name = "10.0.0.1"; 1554 | g.dst_ip.name = "10.1.0.1"; 1555 | g.dst_mac.name = "ff:ff:ff:ff:ff:ff"; 1556 | g.src_mac.name = NULL; 1557 | g.pkt_size = 60; 1558 | g.burst = 512; // default 1559 | g.nthreads = 1; 1560 | g.cpus = 1; 1561 | g.forever = 1; 1562 | g.tx_rate = 0; 1563 | g.frags = 1; 1564 | g.nmr_config = ""; 1565 | g.qfirst = 0; 1566 | g.dst.name = NULL; 1567 | 1568 | while ( (ch = getopt(arc, argv, 1569 | "a:f:F:n:i:It:r:l:d:s:D:S:b:c:o:p:PT:w:WvR:XC:m:q:A:")) != -1) { 1570 | struct sf *fn; 1571 | 1572 | switch(ch) { 1573 | default: 1574 | D("bad option %c %s", ch, optarg); 1575 | usage(); 1576 | break; 1577 | 1578 | case 'n': 1579 | g.npackets = atoi(optarg); 1580 | break; 1581 | 1582 | case 'F': 1583 | i = atoi(optarg); 1584 | if (i < 1 || i > 63) { 1585 | D("invalid frags %d [1..63], ignore", i); 1586 | break; 1587 | } 1588 | g.frags = i; 1589 | break; 1590 | 1591 | case 'f': 1592 | for (fn = func; fn->key; fn++) { 1593 | if (!strcmp(fn->key, optarg)) 1594 | break; 1595 | } 1596 | if (fn->key) 1597 | g.td_body = fn->f; 1598 | else 1599 | D("unrecognised function %s", optarg); 1600 | break; 1601 | 1602 | case 'o': /* data generation options */ 1603 | g.options = atoi(optarg); 1604 | break; 1605 | 1606 | case 'a': /* force affinity */ 1607 | g.affinity = atoi(optarg); 1608 | break; 1609 | 1610 | case 'A': /* another affinity */ 1611 | g.affinity2 = atoi(optarg); 1612 | break; 1613 | 1614 | case 'i': /* interface */ 1615 | g.ifname = optarg; 1616 | if (!strncmp(optarg, "tap", 3)) 1617 | g.dev_type = DEV_TAP; 1618 | else 1619 | g.dev_type = DEV_NETMAP; 1620 | if (!strcmp(g.ifname, "null")) 1621 | g.dummy_send = 1; 1622 | break; 1623 | 1624 | case 'I': 1625 | g.options |= OPT_INDIRECT; /* XXX use indirect buffer */ 1626 | break; 1627 | 1628 | case 't': /* send, deprecated */ 1629 | D("-t deprecated, please use -f tx -n %s", optarg); 1630 | g.td_body = sender_body; 1631 | g.npackets = atoi(optarg); 1632 | break; 1633 | 1634 | case 'r': /* receive */ 1635 | D("-r deprecated, please use -f rx -n %s", optarg); 1636 | g.td_body = receiver_body; 1637 | g.npackets = atoi(optarg); 1638 | break; 1639 | 1640 | case 'l': /* pkt_size */ 1641 | g.pkt_size = atoi(optarg); 1642 | break; 1643 | 1644 | case 'd': 1645 | g.dst_ip.name = optarg; 1646 | break; 1647 | 1648 | case 's': 1649 | g.src_ip.name = optarg; 1650 | break; 1651 | 1652 | case 'T': /* report interval */ 1653 | g.report_interval = atoi(optarg); 1654 | break; 1655 | 1656 | case 'w': 1657 | wait_link = atoi(optarg); 1658 | break; 1659 | 1660 | case 'W': /* XXX changed default */ 1661 | g.forever = 0; /* do not exit rx even with no traffic */ 1662 | break; 1663 | 1664 | case 'b': /* burst */ 1665 | g.burst = atoi(optarg); 1666 | break; 1667 | case 'c': 1668 | g.cpus = atoi(optarg); 1669 | break; 1670 | case 'p': 1671 | g.nthreads = atoi(optarg); 1672 | break; 1673 | 1674 | case 'P': 1675 | g.dev_type = DEV_PCAP; 1676 | break; 1677 | 1678 | case 'D': /* destination mac */ 1679 | g.dst_mac.name = optarg; 1680 | break; 1681 | 1682 | case 'S': /* source mac */ 1683 | g.src_mac.name = optarg; 1684 | break; 1685 | case 'v': 1686 | verbose++; 1687 | break; 1688 | case 'R': 1689 | g.tx_rate = atoi(optarg); 1690 | break; 1691 | case 'X': 1692 | g.options |= OPT_DUMP; 1693 | break; 1694 | case 'C': 1695 | g.nmr_config = strdup(optarg); 1696 | break; 1697 | case 'm': 1698 | g.dst.name = strdup(optarg); 1699 | break; 1700 | case 'q': 1701 | g.qfirst = atoi(optarg) | NETMAP_HW_RING; 1702 | } 1703 | } 1704 | 1705 | if (g.ifname == NULL) { 1706 | D("missing ifname"); 1707 | usage(); 1708 | } 1709 | 1710 | i = system_ncpus(); 1711 | if (g.cpus < 0 || g.cpus > i) { 1712 | D("%d cpus is too high, have only %d cpus", g.cpus, i); 1713 | usage(); 1714 | } 1715 | if (g.cpus == 0) 1716 | g.cpus = i; 1717 | 1718 | if (g.pkt_size < 16 || g.pkt_size > 2048 /* default slot size */) { 1719 | D("bad pktsize %d\n", g.pkt_size); 1720 | usage(); 1721 | } 1722 | 1723 | if (g.src_mac.name == NULL) { 1724 | static char mybuf[20] = "00:00:00:00:00:00"; 1725 | int no; 1726 | /* retrieve source mac address. */ 1727 | no = source_hwaddr(g.ifname, mybuf); 1728 | if (no == -1) { 1729 | D("Unable to retrieve source mac"); 1730 | // continue, fail later 1731 | } else if (no && !strncmp(g.ifname, "vale", strlen("vale"))) { 1732 | /* retry for persistent virtual interface */ 1733 | char *p = strchr(g.ifname, ':'); 1734 | p = !p ? g.ifname + strlen("vale") : p + 1; 1735 | if (source_hwaddr(p, mybuf) == -1) { 1736 | D("Unable to retrieve source mac"); 1737 | // continue, fail later 1738 | } 1739 | } 1740 | g.src_mac.name = mybuf; 1741 | } 1742 | /* extract address ranges */ 1743 | extract_ip_range(&g.src_ip); 1744 | extract_ip_range(&g.dst_ip); 1745 | extract_mac_range(&g.src_mac); 1746 | extract_mac_range(&g.dst_mac); 1747 | extract_dst_range(&g.dst); 1748 | 1749 | if (g.dev_type == DEV_TAP) { 1750 | D("want to use tap %s", g.ifname); 1751 | g.main_fd = tap_alloc(g.ifname); 1752 | if (g.main_fd < 0) { 1753 | D("cannot open tap %s", g.ifname); 1754 | usage(); 1755 | } 1756 | } else if (g.dev_type > DEV_NETMAP) { 1757 | char pcap_errbuf[PCAP_ERRBUF_SIZE]; 1758 | 1759 | D("using pcap on %s", g.ifname); 1760 | pcap_errbuf[0] = '\0'; // init the buffer 1761 | g.p = pcap_open_live(g.ifname, 0, 1, 100, pcap_errbuf); 1762 | if (g.p == NULL) { 1763 | D("cannot open pcap on %s", g.ifname); 1764 | usage(); 1765 | } 1766 | } else if (g.dummy_send) { 1767 | D("using a dummy send routine"); 1768 | } else { 1769 | bzero(&nmr, sizeof(nmr)); 1770 | nmr.nr_version = NETMAP_API; 1771 | /* 1772 | * Open the netmap device to fetch the number of queues of our 1773 | * interface. 1774 | * 1775 | * The first NIOCREGIF also detaches the card from the 1776 | * protocol stack and may cause a reset of the card, 1777 | * which in turn may take some time for the PHY to 1778 | * reconfigure. 1779 | */ 1780 | g.main_fd = open("/dev/netmap", O_RDWR); 1781 | if (g.main_fd == -1) { 1782 | D("Unable to open /dev/netmap"); 1783 | // fail later 1784 | } 1785 | /* 1786 | * Register the interface on the netmap device: from now on, 1787 | * we can operate on the network interface without any 1788 | * interference from the legacy network stack. 1789 | * 1790 | * We decide to put the first interface registration here to 1791 | * give time to cards that take a long time to reset the PHY. 1792 | */ 1793 | bzero(&nmr, sizeof(nmr)); 1794 | nmr.nr_version = NETMAP_API; 1795 | strncpy(nmr.nr_name, g.ifname, sizeof(nmr.nr_name)); 1796 | nmr.nr_version = NETMAP_API; 1797 | parse_nmr_config(g.nmr_config, &nmr); 1798 | if (ioctl(g.main_fd, NIOCREGIF, &nmr) == -1) { 1799 | D("Unable to register interface %s", g.ifname); 1800 | //continue, fail later 1801 | } 1802 | ND("%s: txr %d txd %d rxr %d rxd %d", g.ifname, 1803 | nmr.nr_tx_rings, nmr.nr_tx_slots, 1804 | nmr.nr_rx_rings, nmr.nr_rx_slots); 1805 | //if ((ioctl(g.main_fd, NIOCGINFO, &nmr)) == -1) { 1806 | // D("Unable to get if info without name"); 1807 | //} else { 1808 | // D("map size is %d Kb", nmr.nr_memsize >> 10); 1809 | //} 1810 | if ((ioctl(g.main_fd, NIOCGINFO, &nmr)) == -1) { 1811 | D("Unable to get if info for %s", g.ifname); 1812 | } 1813 | devqueues = nmr.nr_rx_rings; 1814 | 1815 | /* validate provided nthreads. */ 1816 | if (g.nthreads < 1 || g.nthreads > devqueues) { 1817 | D("bad nthreads %d, have %d queues", g.nthreads, devqueues); 1818 | // continue, fail later 1819 | } 1820 | /* validate provided ring */ 1821 | if ((g.qfirst & ~NETMAP_HW_RING) >= (u_int)devqueues) { 1822 | D("bad qfirst(%d)", g.qfirst & ~NETMAP_HW_RING); 1823 | // continue, fail later 1824 | } 1825 | 1826 | /* 1827 | * Map the netmap shared memory: instead of issuing mmap() 1828 | * inside the body of the threads, we prefer to keep this 1829 | * operation here to simplify the thread logic. 1830 | */ 1831 | D("mapping %d Kbytes", nmr.nr_memsize>>10); 1832 | g.mmap_size = nmr.nr_memsize; 1833 | g.mmap_addr = (struct netmap_d *) mmap(0, nmr.nr_memsize, 1834 | PROT_WRITE | PROT_READ, 1835 | MAP_SHARED, g.main_fd, 0); 1836 | if (g.mmap_addr == MAP_FAILED) { 1837 | D("Unable to mmap %d KB", nmr.nr_memsize >> 10); 1838 | // continue, fail later 1839 | } 1840 | 1841 | 1842 | 1843 | /* Print some debug information. */ 1844 | fprintf(stdout, 1845 | "%s %s: %d queues, %d threads and %d cpus.\n", 1846 | (g.td_body == sender_body) ? "Sending on" : "Receiving from", 1847 | g.ifname, 1848 | devqueues, 1849 | g.nthreads, 1850 | g.cpus); 1851 | if (g.td_body == sender_body) { 1852 | fprintf(stdout, "%s -> %s (%s -> %s)\n", 1853 | g.src_ip.name, g.dst_ip.name, 1854 | g.src_mac.name, g.dst_mac.name); 1855 | } 1856 | 1857 | /* Exit if something went wrong. */ 1858 | if (g.main_fd < 0) { 1859 | D("aborting"); 1860 | usage(); 1861 | } 1862 | } 1863 | 1864 | 1865 | if (g.options) { 1866 | D("--- SPECIAL OPTIONS:%s%s%s%s%s\n", 1867 | g.options & OPT_PREFETCH ? " prefetch" : "", 1868 | g.options & OPT_ACCESS ? " access" : "", 1869 | g.options & OPT_MEMCPY ? " memcpy" : "", 1870 | g.options & OPT_INDIRECT ? " indirect" : "", 1871 | g.options & OPT_COPY ? " copy" : ""); 1872 | } 1873 | 1874 | g.tx_period.tv_sec = g.tx_period.tv_nsec = 0; 1875 | if (g.tx_rate > 0) { 1876 | /* try to have at least something every second, 1877 | * reducing the burst size to 0.5s worth of data 1878 | * (but no less than one full set of fragments) 1879 | */ 1880 | if (g.burst > g.tx_rate/2) 1881 | g.burst = g.tx_rate/2; 1882 | if (g.burst < g.frags) 1883 | g.burst = g.frags; 1884 | g.tx_period.tv_nsec = (1e9 / g.tx_rate) * g.burst; 1885 | g.tx_period.tv_sec = g.tx_period.tv_nsec / 1000000000; 1886 | g.tx_period.tv_nsec = g.tx_period.tv_nsec % 1000000000; 1887 | } 1888 | if (g.td_body == sender_body) 1889 | D("Sending %d packets every %ld.%09ld s", 1890 | g.burst, g.tx_period.tv_sec, g.tx_period.tv_nsec); 1891 | /* Wait for PHY reset. */ 1892 | D("Wait %d secs for phy reset", wait_link); 1893 | sleep(wait_link); 1894 | D("Ready..."); 1895 | 1896 | /* Install ^C handler. */ 1897 | global_nthreads = g.nthreads; 1898 | signal(SIGINT, sigint_h); 1899 | 1900 | #if 0 // XXX this is not needed, i believe 1901 | if (g.dev_type > DEV_NETMAP) { 1902 | g.p = pcap_open_live(g.ifname, 0, 1, 100, NULL); 1903 | if (g.p == NULL) { 1904 | D("cannot open pcap on %s", g.ifname); 1905 | usage(); 1906 | } else 1907 | D("using pcap %p on %s", g.p, g.ifname); 1908 | } 1909 | #endif // XXX 1910 | #ifdef MULTISTACK 1911 | { 1912 | struct sockaddr_in sin; 1913 | 1914 | g.so = socket(AF_INET, SOCK_DGRAM, IPPROTO_UDP); 1915 | if (g.so < 0) { 1916 | perror("socket"); 1917 | return 0; 1918 | } 1919 | sin.sin_family = AF_INET; 1920 | sin.sin_port = htons(g.src_ip.port0); 1921 | sin.sin_addr.s_addr = htonl(g.src_ip.start); 1922 | if (bind(g.so, (struct sockaddr *)&sin, sizeof(sin))) { 1923 | perror("bind"); 1924 | close(g.so); 1925 | return 0; 1926 | } 1927 | strncpy(g.msr.mr_name, nmr.nr_name, sizeof(g.msr.mr_name)); 1928 | g.msr.mr_cmd = MSTACK_BIND; 1929 | g.msr.mr_sin = sin; 1930 | g.msr.mr_proto = IPPROTO_UDP; 1931 | if (ioctl(g.main_fd, NIOCCONFIG, &g.msr)) { 1932 | perror("ioctl"); 1933 | return 0; 1934 | } 1935 | } 1936 | #endif /* MULTISTACK */ 1937 | start_threads(&g); 1938 | main_thread(&g); 1939 | return 0; 1940 | } 1941 | 1942 | /* end of file */ 1943 | -------------------------------------------------------------------------------- /examples-netmap4/setup.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | if [ ! $# = 2 ]; then 4 | echo "Usage: setup.sh ifname [start|finish]" 5 | exit 6 | fi 7 | 8 | if [ ! -f ../../netmap-release/examples/vale-ctl ]; then 9 | echo Usage: place ../../netmap-release/examples/vale-ctl 10 | exit 11 | fi 12 | 13 | if [ $2 = start ]; then 14 | ../../netmap-release/examples/vale-ctl -a valem:$1 15 | if [ `uname` = "FreeBSD" ]; then 16 | kldload ../sys/contrib/multistack/multistack.ko 17 | elif [ `uname` = "Linux" ]; then 18 | insmod ../LINUX/multistack_lin.ko 19 | fi 20 | elif [ $2 = finish ]; then 21 | if [ `uname` = "FreeBSD" ]; then 22 | kldunload multistack 23 | elif [ `uname` = "Linux" ]; then 24 | rmmod multistack 25 | fi 26 | ../../netmap-release/examples/vale-ctl -d valem:$1 27 | fi 28 | -------------------------------------------------------------------------------- /examples-netmap4/test_bind.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include 14 | #include 15 | #include 16 | #include 17 | #include 18 | #include 19 | 20 | #define DEFAULT_VPORT "valem:mp0" 21 | 22 | int 23 | main(int argc, char **argv) 24 | { 25 | int fd, nfd; 26 | struct sockaddr_storage ss; 27 | struct sockaddr_in *sin = (struct sockaddr_in *)&ss; 28 | struct nmreq nmr; 29 | int mmap_size; 30 | char *mmap_addr; 31 | struct msreq msr; 32 | 33 | if (argc != 3) { 34 | fprintf(stderr, "Usage: ./test_bind addr port\n"); 35 | return 1; 36 | } 37 | fd = socket(AF_INET, SOCK_STREAM, IPPROTO_TCP); 38 | if (!fd) { 39 | perror("socket"); 40 | return 0; 41 | } 42 | sin->sin_family = AF_INET; 43 | sin->sin_port = htons(atoi(argv[2])); 44 | if (inet_pton(AF_INET, argv[1], &sin->sin_addr) != 1) { 45 | perror("inet_pton"); 46 | close(fd); 47 | return 0; 48 | } 49 | printf("binding %s %u\n", argv[1], atoi(argv[2])); 50 | if (bind(fd, (struct sockaddr *)sin, sizeof(*sin))) { 51 | perror("bind"); 52 | close(fd); 53 | return 0; 54 | } 55 | 56 | nfd = open("/dev/netmap", O_RDWR); 57 | if (nfd < 0) { 58 | perror("open"); 59 | close(fd); 60 | return 0; 61 | } 62 | bzero(&nmr, sizeof(nmr)); 63 | nmr.nr_version = NETMAP_API; 64 | strncpy(nmr.nr_name, DEFAULT_VPORT, strlen(DEFAULT_VPORT)); 65 | if (ioctl(nfd, NIOCREGIF, &nmr)) { 66 | perror("ioctl"); 67 | close(nfd); 68 | close(fd); 69 | return 0; 70 | } 71 | bzero(&nmr, sizeof(nmr)); 72 | nmr.nr_version = NETMAP_API; 73 | strncpy(nmr.nr_name, DEFAULT_VPORT, strlen(DEFAULT_VPORT)); 74 | if (ioctl(nfd, NIOCGINFO, &nmr)) { 75 | perror("ioctl"); 76 | close(fd); 77 | close(nfd); 78 | return 0; 79 | } 80 | printf("mmapping\n"); 81 | mmap_size = nmr.nr_memsize; 82 | mmap_addr = (char *) mmap(0, nmr.nr_memsize, PROT_WRITE | PROT_READ, 83 | MAP_SHARED, nfd, 0); 84 | if (mmap_addr == MAP_FAILED) { 85 | perror("mmap"); 86 | close(fd); 87 | close(nfd); 88 | return 0; 89 | } 90 | bzero(&nmr, sizeof(nmr)); 91 | nmr.nr_version = NETMAP_API; 92 | strncpy(nmr.nr_name, DEFAULT_VPORT, strlen(DEFAULT_VPORT)); 93 | if (ioctl(nfd, NIOCREGIF, &nmr)) { 94 | perror("ioctl"); 95 | munmap(mmap_addr, mmap_size); 96 | close(nfd); 97 | close(fd); 98 | return 0; 99 | } 100 | 101 | sin->sin_port = htons(atoi(argv[2])); 102 | strncpy(msr.mr_name, nmr.nr_name, sizeof(msr.mr_name)); 103 | msr.mr_cmd = MSTACK_BIND; 104 | msr.mr_sin = *sin; 105 | msr.mr_proto = IPPROTO_TCP; 106 | 107 | if (ioctl(nfd, NIOCCONFIG, &msr)) { 108 | perror("ioctl"); 109 | } 110 | 111 | msr.mr_cmd = MSTACK_UNBIND; 112 | if (ioctl(nfd, NIOCCONFIG, &msr)) { 113 | perror("ioctl"); 114 | } 115 | 116 | munmap(mmap_addr, mmap_size); 117 | close(nfd); 118 | close(fd); 119 | return 0; 120 | } 121 | -------------------------------------------------------------------------------- /examples/Makefile: -------------------------------------------------------------------------------- 1 | PROGS = pkt-gen test_bind 2 | CLEANFILES = $(PROGS) *.o 3 | CFLAGS = -O2 -pipe 4 | CFLAGS += -Werror -Wall -Wextra 5 | CFLAGS += -I ../sys 6 | CFLAGS += -DMULTISTACK 7 | LDFLAGS = -lpcap -lrt -lpthread 8 | 9 | all: $(PROGS) 10 | pkt-gen: pkt-gen.o 11 | $(CC) $(CFLAGS) -o pkt-gen pkt-gen.o $(LDFLAGS) 12 | test_bind: test_bind.c 13 | $(CC) $(CFLAGS) -o test_bind test_bind.c 14 | clean: 15 | -@rm -rf $(CLEANFILES) 16 | 17 | -------------------------------------------------------------------------------- /examples/nm_util.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) 2012-2014 Luigi Rizzo. All rights reserved. 3 | * 4 | * Redistribution and use in source and binary forms, with or without 5 | * modification, are permitted provided that the following conditions 6 | * are met: 7 | * 1. Redistributions of source code must retain the above copyright 8 | * notice, this list of conditions and the following disclaimer. 9 | * 2. Redistributions in binary form must reproduce the above copyright 10 | * notice, this list of conditions and the following disclaimer in the 11 | * documentation and/or other materials provided with the distribution. 12 | * 13 | * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 14 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 15 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 16 | * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 17 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 18 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 19 | * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 20 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 21 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 22 | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 23 | * SUCH DAMAGE. 24 | */ 25 | 26 | /* 27 | * $FreeBSD$ 28 | * $Id$ 29 | * 30 | * Some utilities to build netmap-based programs. 31 | */ 32 | 33 | #ifndef _NM_UTIL_H 34 | #define _NM_UTIL_H 35 | 36 | #define _GNU_SOURCE /* for CPU_SET() */ 37 | 38 | #include /* fprintf */ 39 | #include /* POLLIN */ 40 | #include /* PRI* macros */ 41 | #include /* u_char */ 42 | 43 | #include /* ntohs */ 44 | #include /* sysctl */ 45 | #include /* getifaddrs */ 46 | #include /* ETHERTYPE_IP */ 47 | #include /* IPPROTO_* */ 48 | #include /* struct ip */ 49 | #include /* struct udp */ 50 | 51 | 52 | #define NETMAP_WITH_LIBS 53 | #include 54 | 55 | #include /* pthread_* */ 56 | 57 | #ifdef linux 58 | 59 | #define cpuset_t cpu_set_t 60 | 61 | #define ifr_flagshigh ifr_flags /* only the low 16 bits here */ 62 | #define IFF_PPROMISC IFF_PROMISC /* IFF_PPROMISC does not exist */ 63 | #include 64 | #include 65 | 66 | #define CLOCK_REALTIME_PRECISE CLOCK_REALTIME 67 | #include /* ether_aton */ 68 | #include /* sockaddr_ll */ 69 | #endif /* linux */ 70 | 71 | #ifdef __FreeBSD__ 72 | #include /* le64toh */ 73 | #include 74 | 75 | #include /* pthread w/ affinity */ 76 | #include /* cpu_set */ 77 | #include /* LLADDR */ 78 | #endif /* __FreeBSD__ */ 79 | 80 | #ifdef __APPLE__ 81 | 82 | #define cpuset_t uint64_t // XXX 83 | static inline void CPU_ZERO(cpuset_t *p) 84 | { 85 | *p = 0; 86 | } 87 | 88 | static inline void CPU_SET(uint32_t i, cpuset_t *p) 89 | { 90 | *p |= 1<< (i & 0x3f); 91 | } 92 | 93 | #define pthread_setaffinity_np(a, b, c) ((void)a, 0) 94 | 95 | #define ifr_flagshigh ifr_flags // XXX 96 | #define IFF_PPROMISC IFF_PROMISC 97 | #include /* LLADDR */ 98 | #define clock_gettime(a,b) \ 99 | do {struct timespec t0 = {0,0}; *(b) = t0; } while (0) 100 | #endif /* __APPLE__ */ 101 | 102 | static inline int min(int a, int b) { return a < b ? a : b; } 103 | extern int time_second; 104 | 105 | /* debug support */ 106 | #define ND(format, ...) do {} while(0) 107 | #define D(format, ...) \ 108 | fprintf(stderr, "%s [%d] " format "\n", \ 109 | __FUNCTION__, __LINE__, ##__VA_ARGS__) 110 | 111 | #define RD(lps, format, ...) \ 112 | do { \ 113 | static int t0, cnt; \ 114 | if (t0 != time_second) { \ 115 | t0 = time_second; \ 116 | cnt = 0; \ 117 | } \ 118 | if (cnt++ < lps) \ 119 | D(format, ##__VA_ARGS__); \ 120 | } while (0) 121 | 122 | 123 | 124 | struct nm_desc * netmap_open(const char *name, int ringid, int promisc); 125 | int nm_do_ioctl(struct nm_desc *me, u_long what, int subcmd); 126 | int pkt_queued(struct nm_desc *d, int tx); 127 | #endif /* _NM_UTIL_H */ 128 | -------------------------------------------------------------------------------- /examples/pkt-gen.c.orig: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) 2011-2014 Matteo Landi, Luigi Rizzo. All rights reserved. 3 | * Copyright (C) 2013-2014 Universita` di Pisa. All rights reserved. 4 | * 5 | * Redistribution and use in source and binary forms, with or without 6 | * modification, are permitted provided that the following conditions 7 | * are met: 8 | * 1. Redistributions of source code must retain the above copyright 9 | * notice, this list of conditions and the following disclaimer. 10 | * 2. Redistributions in binary form must reproduce the above copyright 11 | * notice, this list of conditions and the following disclaimer in the 12 | * documentation and/or other materials provided with the distribution. 13 | * 14 | * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 15 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17 | * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 18 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 20 | * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 21 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 22 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 23 | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 24 | * SUCH DAMAGE. 25 | */ 26 | 27 | /* 28 | * $FreeBSD: head/tools/tools/netmap/pkt-gen.c 231198 2012-02-08 11:43:29Z luigi $ 29 | * $Id: pkt-gen.c 12346 2013-06-12 17:36:25Z luigi $ 30 | * 31 | * Example program to show how to build a multithreaded packet 32 | * source/sink using the netmap device. 33 | * 34 | * In this example we create a programmable number of threads 35 | * to take care of all the queues of the interface used to 36 | * send or receive traffic. 37 | * 38 | */ 39 | 40 | #define _GNU_SOURCE /* for CPU_SET() */ 41 | #include 42 | #define NETMAP_WITH_LIBS 43 | #include 44 | 45 | 46 | #include // isprint() 47 | #include // sysconf() 48 | #include 49 | #include /* ntohs */ 50 | #include /* sysctl */ 51 | #include /* getifaddrs */ 52 | #include 53 | #include 54 | #include 55 | #include 56 | 57 | #include 58 | 59 | #ifndef NO_PCAP 60 | #include 61 | #endif 62 | 63 | #ifdef linux 64 | 65 | #define cpuset_t cpu_set_t 66 | 67 | #define ifr_flagshigh ifr_flags /* only the low 16 bits here */ 68 | #define IFF_PPROMISC IFF_PROMISC /* IFF_PPROMISC does not exist */ 69 | #include 70 | #include 71 | 72 | #define CLOCK_REALTIME_PRECISE CLOCK_REALTIME 73 | #include /* ether_aton */ 74 | #include /* sockaddr_ll */ 75 | #endif /* linux */ 76 | 77 | #ifdef __FreeBSD__ 78 | #include /* le64toh */ 79 | #include 80 | 81 | #include /* pthread w/ affinity */ 82 | #include /* cpu_set */ 83 | #include /* LLADDR */ 84 | #endif /* __FreeBSD__ */ 85 | 86 | #ifdef __APPLE__ 87 | 88 | #define cpuset_t uint64_t // XXX 89 | static inline void CPU_ZERO(cpuset_t *p) 90 | { 91 | *p = 0; 92 | } 93 | 94 | static inline void CPU_SET(uint32_t i, cpuset_t *p) 95 | { 96 | *p |= 1<< (i & 0x3f); 97 | } 98 | 99 | #define pthread_setaffinity_np(a, b, c) ((void)a, 0) 100 | 101 | #define ifr_flagshigh ifr_flags // XXX 102 | #define IFF_PPROMISC IFF_PROMISC 103 | #include /* LLADDR */ 104 | #define clock_gettime(a,b) \ 105 | do {struct timespec t0 = {0,0}; *(b) = t0; } while (0) 106 | #endif /* __APPLE__ */ 107 | 108 | const char *default_payload="netmap pkt-gen DIRECT payload\n" 109 | "http://info.iet.unipi.it/~luigi/netmap/ "; 110 | 111 | const char *indirect_payload="netmap pkt-gen indirect payload\n" 112 | "http://info.iet.unipi.it/~luigi/netmap/ "; 113 | 114 | int verbose = 0; 115 | 116 | #define SKIP_PAYLOAD 1 /* do not check payload. XXX unused */ 117 | 118 | 119 | #define VIRT_HDR_1 10 /* length of a base vnet-hdr */ 120 | #define VIRT_HDR_2 12 /* length of the extenede vnet-hdr */ 121 | #define VIRT_HDR_MAX VIRT_HDR_2 122 | struct virt_header { 123 | uint8_t fields[VIRT_HDR_MAX]; 124 | }; 125 | 126 | struct pkt { 127 | struct virt_header vh; 128 | struct ether_header eh; 129 | struct ip ip; 130 | struct udphdr udp; 131 | uint8_t body[2048]; // XXX hardwired 132 | } __attribute__((__packed__)); 133 | 134 | struct ip_range { 135 | char *name; 136 | uint32_t start, end; /* same as struct in_addr */ 137 | uint16_t port0, port1; 138 | }; 139 | 140 | struct mac_range { 141 | char *name; 142 | struct ether_addr start, end; 143 | }; 144 | 145 | /* ifname can be netmap:foo-xxxx */ 146 | #define MAX_IFNAMELEN 64 /* our buffer for ifname */ 147 | /* 148 | * global arguments for all threads 149 | */ 150 | 151 | struct glob_arg { 152 | struct ip_range src_ip; 153 | struct ip_range dst_ip; 154 | struct mac_range dst_mac; 155 | struct mac_range src_mac; 156 | int pkt_size; 157 | int burst; 158 | int forever; 159 | int npackets; /* total packets to send */ 160 | int frags; /* fragments per packet */ 161 | int nthreads; 162 | int cpus; 163 | int options; /* testing */ 164 | #define OPT_PREFETCH 1 165 | #define OPT_ACCESS 2 166 | #define OPT_COPY 4 167 | #define OPT_MEMCPY 8 168 | #define OPT_TS 16 /* add a timestamp */ 169 | #define OPT_INDIRECT 32 /* use indirect buffers, tx only */ 170 | #define OPT_DUMP 64 /* dump rx/tx traffic */ 171 | #define OPT_MONITOR_TX 128 172 | #define OPT_MONITOR_RX 256 173 | int dev_type; 174 | #ifndef NO_PCAP 175 | pcap_t *p; 176 | #endif 177 | 178 | int tx_rate; 179 | struct timespec tx_period; 180 | 181 | int affinity; 182 | int main_fd; 183 | struct nm_desc *nmd; 184 | uint64_t nmd_flags; 185 | int report_interval; /* milliseconds between prints */ 186 | void *(*td_body)(void *); 187 | void *mmap_addr; 188 | char ifname[MAX_IFNAMELEN]; 189 | char *nmr_config; 190 | int dummy_send; 191 | int virt_header; /* send also the virt_header */ 192 | int extra_bufs; /* goes in nr_arg3 */ 193 | }; 194 | enum dev_type { DEV_NONE, DEV_NETMAP, DEV_PCAP, DEV_TAP }; 195 | 196 | 197 | /* 198 | * Arguments for a new thread. The same structure is used by 199 | * the source and the sink 200 | */ 201 | struct targ { 202 | struct glob_arg *g; 203 | int used; 204 | int completed; 205 | int cancel; 206 | int fd; 207 | struct nm_desc *nmd; 208 | volatile uint64_t count; 209 | struct timespec tic, toc; 210 | int me; 211 | pthread_t thread; 212 | int affinity; 213 | 214 | struct pkt pkt; 215 | }; 216 | 217 | 218 | /* 219 | * extract the extremes from a range of ipv4 addresses. 220 | * addr_lo[-addr_hi][:port_lo[-port_hi]] 221 | */ 222 | static void 223 | extract_ip_range(struct ip_range *r) 224 | { 225 | char *ap, *pp; 226 | struct in_addr a; 227 | 228 | if (verbose) 229 | D("extract IP range from %s", r->name); 230 | r->port0 = r->port1 = 0; 231 | r->start = r->end = 0; 232 | 233 | /* the first - splits start/end of range */ 234 | ap = index(r->name, '-'); /* do we have ports ? */ 235 | if (ap) { 236 | *ap++ = '\0'; 237 | } 238 | /* grab the initial values (mandatory) */ 239 | pp = index(r->name, ':'); 240 | if (pp) { 241 | *pp++ = '\0'; 242 | r->port0 = r->port1 = strtol(pp, NULL, 0); 243 | }; 244 | inet_aton(r->name, &a); 245 | r->start = r->end = ntohl(a.s_addr); 246 | if (ap) { 247 | pp = index(ap, ':'); 248 | if (pp) { 249 | *pp++ = '\0'; 250 | if (*pp) 251 | r->port1 = strtol(pp, NULL, 0); 252 | } 253 | if (*ap) { 254 | inet_aton(ap, &a); 255 | r->end = ntohl(a.s_addr); 256 | } 257 | } 258 | if (r->port0 > r->port1) { 259 | uint16_t tmp = r->port0; 260 | r->port0 = r->port1; 261 | r->port1 = tmp; 262 | } 263 | if (r->start > r->end) { 264 | uint32_t tmp = r->start; 265 | r->start = r->end; 266 | r->end = tmp; 267 | } 268 | { 269 | struct in_addr a; 270 | char buf1[16]; // one ip address 271 | 272 | a.s_addr = htonl(r->end); 273 | strncpy(buf1, inet_ntoa(a), sizeof(buf1)); 274 | a.s_addr = htonl(r->start); 275 | if (1) 276 | D("range is %s:%d to %s:%d", 277 | inet_ntoa(a), r->port0, buf1, r->port1); 278 | } 279 | } 280 | 281 | static void 282 | extract_mac_range(struct mac_range *r) 283 | { 284 | if (verbose) 285 | D("extract MAC range from %s", r->name); 286 | bcopy(ether_aton(r->name), &r->start, 6); 287 | bcopy(ether_aton(r->name), &r->end, 6); 288 | #if 0 289 | bcopy(targ->src_mac, eh->ether_shost, 6); 290 | p = index(targ->g->src_mac, '-'); 291 | if (p) 292 | targ->src_mac_range = atoi(p+1); 293 | 294 | bcopy(ether_aton(targ->g->dst_mac), targ->dst_mac, 6); 295 | bcopy(targ->dst_mac, eh->ether_dhost, 6); 296 | p = index(targ->g->dst_mac, '-'); 297 | if (p) 298 | targ->dst_mac_range = atoi(p+1); 299 | #endif 300 | if (verbose) 301 | D("%s starts at %s", r->name, ether_ntoa(&r->start)); 302 | } 303 | 304 | static struct targ *targs; 305 | static int global_nthreads; 306 | 307 | /* control-C handler */ 308 | static void 309 | sigint_h(int sig) 310 | { 311 | int i; 312 | 313 | (void)sig; /* UNUSED */ 314 | for (i = 0; i < global_nthreads; i++) { 315 | targs[i].cancel = 1; 316 | } 317 | signal(SIGINT, SIG_DFL); 318 | } 319 | 320 | /* sysctl wrapper to return the number of active CPUs */ 321 | static int 322 | system_ncpus(void) 323 | { 324 | int ncpus; 325 | #if defined (__FreeBSD__) 326 | int mib[2] = { CTL_HW, HW_NCPU }; 327 | size_t len = sizeof(mib); 328 | sysctl(mib, 2, &ncpus, &len, NULL, 0); 329 | #elif defined(linux) 330 | ncpus = sysconf(_SC_NPROCESSORS_ONLN); 331 | #else /* others */ 332 | ncpus = 1; 333 | #endif /* others */ 334 | return (ncpus); 335 | } 336 | 337 | #ifdef __linux__ 338 | #define sockaddr_dl sockaddr_ll 339 | #define sdl_family sll_family 340 | #define AF_LINK AF_PACKET 341 | #define LLADDR(s) s->sll_addr; 342 | #include 343 | #define TAP_CLONEDEV "/dev/net/tun" 344 | #endif /* __linux__ */ 345 | 346 | #ifdef __FreeBSD__ 347 | #include 348 | #define TAP_CLONEDEV "/dev/tap" 349 | #endif /* __FreeBSD */ 350 | 351 | #ifdef __APPLE__ 352 | // #warning TAP not supported on apple ? 353 | #include 354 | #define TAP_CLONEDEV "/dev/tap" 355 | #endif /* __APPLE__ */ 356 | 357 | 358 | /* 359 | * parse the vale configuration in conf and put it in nmr. 360 | * Return the flag set if necessary. 361 | * The configuration may consist of 0 to 4 numbers separated 362 | * by commas: #tx-slots,#rx-slots,#tx-rings,#rx-rings. 363 | * Missing numbers or zeroes stand for default values. 364 | * As an additional convenience, if exactly one number 365 | * is specified, then this is assigned to both #tx-slots and #rx-slots. 366 | * If there is no 4th number, then the 3rd is assigned to both #tx-rings 367 | * and #rx-rings. 368 | */ 369 | int 370 | parse_nmr_config(const char* conf, struct nmreq *nmr) 371 | { 372 | char *w, *tok; 373 | int i, v; 374 | 375 | nmr->nr_tx_rings = nmr->nr_rx_rings = 0; 376 | nmr->nr_tx_slots = nmr->nr_rx_slots = 0; 377 | if (conf == NULL || ! *conf) 378 | return 0; 379 | w = strdup(conf); 380 | for (i = 0, tok = strtok(w, ","); tok; i++, tok = strtok(NULL, ",")) { 381 | v = atoi(tok); 382 | switch (i) { 383 | case 0: 384 | nmr->nr_tx_slots = nmr->nr_rx_slots = v; 385 | break; 386 | case 1: 387 | nmr->nr_rx_slots = v; 388 | break; 389 | case 2: 390 | nmr->nr_tx_rings = nmr->nr_rx_rings = v; 391 | break; 392 | case 3: 393 | nmr->nr_rx_rings = v; 394 | break; 395 | default: 396 | D("ignored config: %s", tok); 397 | break; 398 | } 399 | } 400 | D("txr %d txd %d rxr %d rxd %d", 401 | nmr->nr_tx_rings, nmr->nr_tx_slots, 402 | nmr->nr_rx_rings, nmr->nr_rx_slots); 403 | free(w); 404 | return (nmr->nr_tx_rings || nmr->nr_tx_slots || 405 | nmr->nr_rx_rings || nmr->nr_rx_slots) ? 406 | NM_OPEN_RING_CFG : 0; 407 | } 408 | 409 | 410 | /* 411 | * locate the src mac address for our interface, put it 412 | * into the user-supplied buffer. return 0 if ok, -1 on error. 413 | */ 414 | static int 415 | source_hwaddr(const char *ifname, char *buf) 416 | { 417 | struct ifaddrs *ifaphead, *ifap; 418 | int l = sizeof(ifap->ifa_name); 419 | 420 | if (getifaddrs(&ifaphead) != 0) { 421 | D("getifaddrs %s failed", ifname); 422 | return (-1); 423 | } 424 | 425 | for (ifap = ifaphead; ifap; ifap = ifap->ifa_next) { 426 | struct sockaddr_dl *sdl = 427 | (struct sockaddr_dl *)ifap->ifa_addr; 428 | uint8_t *mac; 429 | 430 | if (!sdl || sdl->sdl_family != AF_LINK) 431 | continue; 432 | if (strncmp(ifap->ifa_name, ifname, l) != 0) 433 | continue; 434 | mac = (uint8_t *)LLADDR(sdl); 435 | sprintf(buf, "%02x:%02x:%02x:%02x:%02x:%02x", 436 | mac[0], mac[1], mac[2], 437 | mac[3], mac[4], mac[5]); 438 | if (verbose) 439 | D("source hwaddr %s", buf); 440 | break; 441 | } 442 | freeifaddrs(ifaphead); 443 | return ifap ? 0 : 1; 444 | } 445 | 446 | 447 | /* set the thread affinity. */ 448 | static int 449 | setaffinity(pthread_t me, int i) 450 | { 451 | cpuset_t cpumask; 452 | 453 | if (i == -1) 454 | return 0; 455 | 456 | /* Set thread affinity affinity.*/ 457 | CPU_ZERO(&cpumask); 458 | CPU_SET(i, &cpumask); 459 | 460 | if (pthread_setaffinity_np(me, sizeof(cpuset_t), &cpumask) != 0) { 461 | D("Unable to set affinity: %s", strerror(errno)); 462 | return 1; 463 | } 464 | return 0; 465 | } 466 | 467 | /* Compute the checksum of the given ip header. */ 468 | static uint16_t 469 | checksum(const void *data, uint16_t len, uint32_t sum) 470 | { 471 | const uint8_t *addr = data; 472 | uint32_t i; 473 | 474 | /* Checksum all the pairs of bytes first... */ 475 | for (i = 0; i < (len & ~1U); i += 2) { 476 | sum += (u_int16_t)ntohs(*((u_int16_t *)(addr + i))); 477 | if (sum > 0xFFFF) 478 | sum -= 0xFFFF; 479 | } 480 | /* 481 | * If there's a single byte left over, checksum it, too. 482 | * Network byte order is big-endian, so the remaining byte is 483 | * the high byte. 484 | */ 485 | if (i < len) { 486 | sum += addr[i] << 8; 487 | if (sum > 0xFFFF) 488 | sum -= 0xFFFF; 489 | } 490 | return sum; 491 | } 492 | 493 | static u_int16_t 494 | wrapsum(u_int32_t sum) 495 | { 496 | sum = ~sum & 0xFFFF; 497 | return (htons(sum)); 498 | } 499 | 500 | /* Check the payload of the packet for errors (use it for debug). 501 | * Look for consecutive ascii representations of the size of the packet. 502 | */ 503 | static void 504 | dump_payload(char *p, int len, struct netmap_ring *ring, int cur) 505 | { 506 | char buf[128]; 507 | int i, j, i0; 508 | 509 | /* get the length in ASCII of the length of the packet. */ 510 | 511 | printf("ring %p cur %5d [buf %6d flags 0x%04x len %5d]\n", 512 | ring, cur, ring->slot[cur].buf_idx, 513 | ring->slot[cur].flags, len); 514 | /* hexdump routine */ 515 | for (i = 0; i < len; ) { 516 | memset(buf, sizeof(buf), ' '); 517 | sprintf(buf, "%5d: ", i); 518 | i0 = i; 519 | for (j=0; j < 16 && i < len; i++, j++) 520 | sprintf(buf+7+j*3, "%02x ", (uint8_t)(p[i])); 521 | i = i0; 522 | for (j=0; j < 16 && i < len; i++, j++) 523 | sprintf(buf+7+j + 48, "%c", 524 | isprint(p[i]) ? p[i] : '.'); 525 | printf("%s\n", buf); 526 | } 527 | } 528 | 529 | /* 530 | * Fill a packet with some payload. 531 | * We create a UDP packet so the payload starts at 532 | * 14+20+8 = 42 bytes. 533 | */ 534 | #ifdef __linux__ 535 | #define uh_sport source 536 | #define uh_dport dest 537 | #define uh_ulen len 538 | #define uh_sum check 539 | #endif /* linux */ 540 | 541 | /* 542 | * increment the addressed in the packet, 543 | * starting from the least significant field. 544 | * DST_IP DST_PORT SRC_IP SRC_PORT 545 | */ 546 | static void 547 | update_addresses(struct pkt *pkt, struct glob_arg *g) 548 | { 549 | uint32_t a; 550 | uint16_t p; 551 | struct ip *ip = &pkt->ip; 552 | struct udphdr *udp = &pkt->udp; 553 | 554 | do { 555 | p = ntohs(udp->uh_sport); 556 | if (p < g->src_ip.port1) { /* just inc, no wrap */ 557 | udp->uh_sport = htons(p + 1); 558 | break; 559 | } 560 | udp->uh_sport = htons(g->src_ip.port0); 561 | 562 | a = ntohl(ip->ip_src.s_addr); 563 | if (a < g->src_ip.end) { /* just inc, no wrap */ 564 | ip->ip_src.s_addr = htonl(a + 1); 565 | break; 566 | } 567 | ip->ip_src.s_addr = htonl(g->src_ip.start); 568 | 569 | udp->uh_sport = htons(g->src_ip.port0); 570 | p = ntohs(udp->uh_dport); 571 | if (p < g->dst_ip.port1) { /* just inc, no wrap */ 572 | udp->uh_dport = htons(p + 1); 573 | break; 574 | } 575 | udp->uh_dport = htons(g->dst_ip.port0); 576 | 577 | a = ntohl(ip->ip_dst.s_addr); 578 | if (a < g->dst_ip.end) { /* just inc, no wrap */ 579 | ip->ip_dst.s_addr = htonl(a + 1); 580 | break; 581 | } 582 | ip->ip_dst.s_addr = htonl(g->dst_ip.start); 583 | } while (0); 584 | // update checksum 585 | } 586 | 587 | /* 588 | * initialize one packet and prepare for the next one. 589 | * The copy could be done better instead of repeating it each time. 590 | */ 591 | static void 592 | initialize_packet(struct targ *targ) 593 | { 594 | struct pkt *pkt = &targ->pkt; 595 | struct ether_header *eh; 596 | struct ip *ip; 597 | struct udphdr *udp; 598 | uint16_t paylen = targ->g->pkt_size - sizeof(*eh) - sizeof(struct ip); 599 | const char *payload = targ->g->options & OPT_INDIRECT ? 600 | indirect_payload : default_payload; 601 | int i, l0 = strlen(payload); 602 | 603 | /* create a nice NUL-terminated string */ 604 | for (i = 0; i < paylen; i += l0) { 605 | if (l0 > paylen - i) 606 | l0 = paylen - i; // last round 607 | bcopy(payload, pkt->body + i, l0); 608 | } 609 | pkt->body[i-1] = '\0'; 610 | ip = &pkt->ip; 611 | 612 | /* prepare the headers */ 613 | ip->ip_v = IPVERSION; 614 | ip->ip_hl = 5; 615 | ip->ip_id = 0; 616 | ip->ip_tos = IPTOS_LOWDELAY; 617 | ip->ip_len = ntohs(targ->g->pkt_size - sizeof(*eh)); 618 | ip->ip_id = 0; 619 | ip->ip_off = htons(IP_DF); /* Don't fragment */ 620 | ip->ip_ttl = IPDEFTTL; 621 | ip->ip_p = IPPROTO_UDP; 622 | ip->ip_dst.s_addr = htonl(targ->g->dst_ip.start); 623 | ip->ip_src.s_addr = htonl(targ->g->src_ip.start); 624 | ip->ip_sum = wrapsum(checksum(ip, sizeof(*ip), 0)); 625 | 626 | 627 | udp = &pkt->udp; 628 | udp->uh_sport = htons(targ->g->src_ip.port0); 629 | udp->uh_dport = htons(targ->g->dst_ip.port0); 630 | udp->uh_ulen = htons(paylen); 631 | /* Magic: taken from sbin/dhclient/packet.c */ 632 | udp->uh_sum = wrapsum(checksum(udp, sizeof(*udp), 633 | checksum(pkt->body, 634 | paylen - sizeof(*udp), 635 | checksum(&ip->ip_src, 2 * sizeof(ip->ip_src), 636 | IPPROTO_UDP + (u_int32_t)ntohs(udp->uh_ulen) 637 | ) 638 | ) 639 | )); 640 | 641 | eh = &pkt->eh; 642 | bcopy(&targ->g->src_mac.start, eh->ether_shost, 6); 643 | bcopy(&targ->g->dst_mac.start, eh->ether_dhost, 6); 644 | eh->ether_type = htons(ETHERTYPE_IP); 645 | 646 | bzero(&pkt->vh, sizeof(pkt->vh)); 647 | // dump_payload((void *)pkt, targ->g->pkt_size, NULL, 0); 648 | } 649 | 650 | 651 | 652 | /* 653 | * create and enqueue a batch of packets on a ring. 654 | * On the last one set NS_REPORT to tell the driver to generate 655 | * an interrupt when done. 656 | */ 657 | static int 658 | send_packets(struct netmap_ring *ring, struct pkt *pkt, void *frame, 659 | int size, struct glob_arg *g, u_int count, int options, 660 | u_int nfrags) 661 | { 662 | u_int n, sent, cur = ring->cur; 663 | u_int fcnt; 664 | 665 | n = nm_ring_space(ring); 666 | if (n < count) 667 | count = n; 668 | if (count < nfrags) { 669 | D("truncating packet, no room for frags %d %d", 670 | count, nfrags); 671 | } 672 | #if 0 673 | if (options & (OPT_COPY | OPT_PREFETCH) ) { 674 | for (sent = 0; sent < count; sent++) { 675 | struct netmap_slot *slot = &ring->slot[cur]; 676 | char *p = NETMAP_BUF(ring, slot->buf_idx); 677 | 678 | __builtin_prefetch(p); 679 | cur = nm_ring_next(ring, cur); 680 | } 681 | cur = ring->cur; 682 | } 683 | #endif 684 | for (fcnt = nfrags, sent = 0; sent < count; sent++) { 685 | struct netmap_slot *slot = &ring->slot[cur]; 686 | char *p = NETMAP_BUF(ring, slot->buf_idx); 687 | 688 | slot->flags = 0; 689 | if (options & OPT_INDIRECT) { 690 | slot->flags |= NS_INDIRECT; 691 | slot->ptr = (uint64_t)frame; 692 | } else if (options & OPT_COPY) { 693 | nm_pkt_copy(frame, p, size); 694 | if (fcnt == nfrags) 695 | update_addresses(pkt, g); 696 | } else if (options & OPT_MEMCPY) { 697 | memcpy(p, frame, size); 698 | if (fcnt == nfrags) 699 | update_addresses(pkt, g); 700 | } else if (options & OPT_PREFETCH) { 701 | __builtin_prefetch(p); 702 | } 703 | if (options & OPT_DUMP) 704 | dump_payload(p, size, ring, cur); 705 | slot->len = size; 706 | if (--fcnt > 0) 707 | slot->flags |= NS_MOREFRAG; 708 | else 709 | fcnt = nfrags; 710 | if (sent == count - 1) { 711 | slot->flags &= ~NS_MOREFRAG; 712 | slot->flags |= NS_REPORT; 713 | } 714 | cur = nm_ring_next(ring, cur); 715 | } 716 | ring->head = ring->cur = cur; 717 | 718 | return (sent); 719 | } 720 | 721 | /* 722 | * Send a packet, and wait for a response. 723 | * The payload (after UDP header, ofs 42) has a 4-byte sequence 724 | * followed by a struct timeval (or bintime?) 725 | */ 726 | #define PAY_OFS 42 /* where in the pkt... */ 727 | 728 | static void * 729 | pinger_body(void *data) 730 | { 731 | struct targ *targ = (struct targ *) data; 732 | struct pollfd pfd = { .fd = targ->fd, .events = POLLIN }; 733 | struct netmap_if *nifp = targ->nmd->nifp; 734 | int i, rx = 0, n = targ->g->npackets; 735 | void *frame; 736 | int size; 737 | uint32_t sent = 0; 738 | struct timespec ts, now, last_print; 739 | uint32_t count = 0, min = 1000000000, av = 0; 740 | 741 | frame = &targ->pkt; 742 | frame += sizeof(targ->pkt.vh) - targ->g->virt_header; 743 | size = targ->g->pkt_size + targ->g->virt_header; 744 | 745 | 746 | if (targ->g->nthreads > 1) { 747 | D("can only ping with 1 thread"); 748 | return NULL; 749 | } 750 | 751 | clock_gettime(CLOCK_REALTIME_PRECISE, &last_print); 752 | now = last_print; 753 | while (n == 0 || (int)sent < n) { 754 | struct netmap_ring *ring = NETMAP_TXRING(nifp, 0); 755 | struct netmap_slot *slot; 756 | char *p; 757 | for (i = 0; i < 1; i++) { /* XXX why the loop for 1 pkt ? */ 758 | slot = &ring->slot[ring->cur]; 759 | slot->len = size; 760 | p = NETMAP_BUF(ring, slot->buf_idx); 761 | 762 | if (nm_ring_empty(ring)) { 763 | D("-- ouch, cannot send"); 764 | } else { 765 | nm_pkt_copy(frame, p, size); 766 | clock_gettime(CLOCK_REALTIME_PRECISE, &ts); 767 | bcopy(&sent, p+42, sizeof(sent)); 768 | bcopy(&ts, p+46, sizeof(ts)); 769 | sent++; 770 | ring->head = ring->cur = nm_ring_next(ring, ring->cur); 771 | } 772 | } 773 | /* should use a parameter to decide how often to send */ 774 | if (poll(&pfd, 1, 3000) <= 0) { 775 | D("poll error/timeout on queue %d: %s", targ->me, 776 | strerror(errno)); 777 | continue; 778 | } 779 | /* see what we got back */ 780 | for (i = targ->nmd->first_tx_ring; 781 | i <= targ->nmd->last_tx_ring; i++) { 782 | ring = NETMAP_RXRING(nifp, i); 783 | while (!nm_ring_empty(ring)) { 784 | uint32_t seq; 785 | slot = &ring->slot[ring->cur]; 786 | p = NETMAP_BUF(ring, slot->buf_idx); 787 | 788 | clock_gettime(CLOCK_REALTIME_PRECISE, &now); 789 | bcopy(p+42, &seq, sizeof(seq)); 790 | bcopy(p+46, &ts, sizeof(ts)); 791 | ts.tv_sec = now.tv_sec - ts.tv_sec; 792 | ts.tv_nsec = now.tv_nsec - ts.tv_nsec; 793 | if (ts.tv_nsec < 0) { 794 | ts.tv_nsec += 1000000000; 795 | ts.tv_sec--; 796 | } 797 | if (1) D("seq %d/%d delta %d.%09d", seq, sent, 798 | (int)ts.tv_sec, (int)ts.tv_nsec); 799 | if (ts.tv_nsec < (int)min) 800 | min = ts.tv_nsec; 801 | count ++; 802 | av += ts.tv_nsec; 803 | ring->head = ring->cur = nm_ring_next(ring, ring->cur); 804 | rx++; 805 | } 806 | } 807 | //D("tx %d rx %d", sent, rx); 808 | //usleep(100000); 809 | ts.tv_sec = now.tv_sec - last_print.tv_sec; 810 | ts.tv_nsec = now.tv_nsec - last_print.tv_nsec; 811 | if (ts.tv_nsec < 0) { 812 | ts.tv_nsec += 1000000000; 813 | ts.tv_sec--; 814 | } 815 | if (ts.tv_sec >= 1) { 816 | D("count %d min %d av %d", 817 | count, min, av/count); 818 | count = 0; 819 | av = 0; 820 | min = 100000000; 821 | last_print = now; 822 | } 823 | } 824 | return NULL; 825 | } 826 | 827 | 828 | /* 829 | * reply to ping requests 830 | */ 831 | static void * 832 | ponger_body(void *data) 833 | { 834 | struct targ *targ = (struct targ *) data; 835 | struct pollfd pfd = { .fd = targ->fd, .events = POLLIN }; 836 | struct netmap_if *nifp = targ->nmd->nifp; 837 | struct netmap_ring *txring, *rxring; 838 | int i, rx = 0, sent = 0, n = targ->g->npackets; 839 | 840 | if (targ->g->nthreads > 1) { 841 | D("can only reply ping with 1 thread"); 842 | return NULL; 843 | } 844 | D("understood ponger %d but don't know how to do it", n); 845 | while (n == 0 || sent < n) { 846 | uint32_t txcur, txavail; 847 | //#define BUSYWAIT 848 | #ifdef BUSYWAIT 849 | ioctl(pfd.fd, NIOCRXSYNC, NULL); 850 | #else 851 | if (poll(&pfd, 1, 1000) <= 0) { 852 | D("poll error/timeout on queue %d: %s", targ->me, 853 | strerror(errno)); 854 | continue; 855 | } 856 | #endif 857 | txring = NETMAP_TXRING(nifp, 0); 858 | txcur = txring->cur; 859 | txavail = nm_ring_space(txring); 860 | /* see what we got back */ 861 | for (i = targ->nmd->first_rx_ring; i <= targ->nmd->last_rx_ring; i++) { 862 | rxring = NETMAP_RXRING(nifp, i); 863 | while (!nm_ring_empty(rxring)) { 864 | uint16_t *spkt, *dpkt; 865 | uint32_t cur = rxring->cur; 866 | struct netmap_slot *slot = &rxring->slot[cur]; 867 | char *src, *dst; 868 | src = NETMAP_BUF(rxring, slot->buf_idx); 869 | //D("got pkt %p of size %d", src, slot->len); 870 | rxring->head = rxring->cur = nm_ring_next(rxring, cur); 871 | rx++; 872 | if (txavail == 0) 873 | continue; 874 | dst = NETMAP_BUF(txring, 875 | txring->slot[txcur].buf_idx); 876 | /* copy... */ 877 | dpkt = (uint16_t *)dst; 878 | spkt = (uint16_t *)src; 879 | nm_pkt_copy(src, dst, slot->len); 880 | dpkt[0] = spkt[3]; 881 | dpkt[1] = spkt[4]; 882 | dpkt[2] = spkt[5]; 883 | dpkt[3] = spkt[0]; 884 | dpkt[4] = spkt[1]; 885 | dpkt[5] = spkt[2]; 886 | txring->slot[txcur].len = slot->len; 887 | /* XXX swap src dst mac */ 888 | txcur = nm_ring_next(txring, txcur); 889 | txavail--; 890 | sent++; 891 | } 892 | } 893 | txring->head = txring->cur = txcur; 894 | targ->count = sent; 895 | #ifdef BUSYWAIT 896 | ioctl(pfd.fd, NIOCTXSYNC, NULL); 897 | #endif 898 | //D("tx %d rx %d", sent, rx); 899 | } 900 | return NULL; 901 | } 902 | 903 | static __inline int 904 | timespec_ge(const struct timespec *a, const struct timespec *b) 905 | { 906 | 907 | if (a->tv_sec > b->tv_sec) 908 | return (1); 909 | if (a->tv_sec < b->tv_sec) 910 | return (0); 911 | if (a->tv_nsec >= b->tv_nsec) 912 | return (1); 913 | return (0); 914 | } 915 | 916 | static __inline struct timespec 917 | timeval2spec(const struct timeval *a) 918 | { 919 | struct timespec ts = { 920 | .tv_sec = a->tv_sec, 921 | .tv_nsec = a->tv_usec * 1000 922 | }; 923 | return ts; 924 | } 925 | 926 | static __inline struct timeval 927 | timespec2val(const struct timespec *a) 928 | { 929 | struct timeval tv = { 930 | .tv_sec = a->tv_sec, 931 | .tv_usec = a->tv_nsec / 1000 932 | }; 933 | return tv; 934 | } 935 | 936 | 937 | static __inline struct timespec 938 | timespec_add(struct timespec a, struct timespec b) 939 | { 940 | struct timespec ret = { a.tv_sec + b.tv_sec, a.tv_nsec + b.tv_nsec }; 941 | if (ret.tv_nsec >= 1000000000) { 942 | ret.tv_sec++; 943 | ret.tv_nsec -= 1000000000; 944 | } 945 | return ret; 946 | } 947 | 948 | static __inline struct timespec 949 | timespec_sub(struct timespec a, struct timespec b) 950 | { 951 | struct timespec ret = { a.tv_sec - b.tv_sec, a.tv_nsec - b.tv_nsec }; 952 | if (ret.tv_nsec < 0) { 953 | ret.tv_sec--; 954 | ret.tv_nsec += 1000000000; 955 | } 956 | return ret; 957 | } 958 | 959 | 960 | /* 961 | * wait until ts, either busy or sleeping if more than 1ms. 962 | * Return wakeup time. 963 | */ 964 | static struct timespec 965 | wait_time(struct timespec ts) 966 | { 967 | for (;;) { 968 | struct timespec w, cur; 969 | clock_gettime(CLOCK_REALTIME_PRECISE, &cur); 970 | w = timespec_sub(ts, cur); 971 | if (w.tv_sec < 0) 972 | return cur; 973 | else if (w.tv_sec > 0 || w.tv_nsec > 1000000) 974 | poll(NULL, 0, 1); 975 | } 976 | } 977 | 978 | static void * 979 | sender_body(void *data) 980 | { 981 | struct targ *targ = (struct targ *) data; 982 | struct pollfd pfd = { .fd = targ->fd, .events = POLLOUT }; 983 | struct netmap_if *nifp; 984 | struct netmap_ring *txring; 985 | int i, n = targ->g->npackets / targ->g->nthreads; 986 | int64_t sent = 0; 987 | int options = targ->g->options | OPT_COPY; 988 | struct timespec nexttime = { 0, 0}; // XXX silence compiler 989 | int rate_limit = targ->g->tx_rate; 990 | struct pkt *pkt = &targ->pkt; 991 | void *frame; 992 | int size; 993 | 994 | frame = pkt; 995 | frame += sizeof(pkt->vh) - targ->g->virt_header; 996 | size = targ->g->pkt_size + targ->g->virt_header; 997 | 998 | D("start"); 999 | if (setaffinity(targ->thread, targ->affinity)) 1000 | goto quit; 1001 | 1002 | /* main loop.*/ 1003 | clock_gettime(CLOCK_REALTIME_PRECISE, &targ->tic); 1004 | if (rate_limit) { 1005 | targ->tic = timespec_add(targ->tic, (struct timespec){2,0}); 1006 | targ->tic.tv_nsec = 0; 1007 | wait_time(targ->tic); 1008 | nexttime = targ->tic; 1009 | } 1010 | if (targ->g->dev_type == DEV_TAP) { 1011 | D("writing to file desc %d", targ->g->main_fd); 1012 | 1013 | for (i = 0; !targ->cancel && (n == 0 || sent < n); i++) { 1014 | if (write(targ->g->main_fd, frame, size) != -1) 1015 | sent++; 1016 | update_addresses(pkt, targ->g); 1017 | if (i > 10000) { 1018 | targ->count = sent; 1019 | i = 0; 1020 | } 1021 | } 1022 | #ifndef NO_PCAP 1023 | } else if (targ->g->dev_type == DEV_PCAP) { 1024 | pcap_t *p = targ->g->p; 1025 | 1026 | for (i = 0; !targ->cancel && (n == 0 || sent < n); i++) { 1027 | if (pcap_inject(p, frame, size) != -1) 1028 | sent++; 1029 | update_addresses(pkt, targ->g); 1030 | if (i > 10000) { 1031 | targ->count = sent; 1032 | i = 0; 1033 | } 1034 | } 1035 | #endif /* NO_PCAP */ 1036 | } else { 1037 | int tosend = 0; 1038 | int frags = targ->g->frags; 1039 | 1040 | nifp = targ->nmd->nifp; 1041 | while (!targ->cancel && (n == 0 || sent < n)) { 1042 | 1043 | if (rate_limit && tosend <= 0) { 1044 | tosend = targ->g->burst; 1045 | nexttime = timespec_add(nexttime, targ->g->tx_period); 1046 | wait_time(nexttime); 1047 | } 1048 | 1049 | /* 1050 | * wait for available room in the send queue(s) 1051 | */ 1052 | if (poll(&pfd, 1, 2000) <= 0) { 1053 | if (targ->cancel) 1054 | break; 1055 | D("poll error/timeout on queue %d: %s", targ->me, 1056 | strerror(errno)); 1057 | // goto quit; 1058 | } 1059 | if (pfd.revents & POLLERR) { 1060 | D("poll error"); 1061 | goto quit; 1062 | } 1063 | /* 1064 | * scan our queues and send on those with room 1065 | */ 1066 | if (options & OPT_COPY && sent > 100000 && !(targ->g->options & OPT_COPY) ) { 1067 | D("drop copy"); 1068 | options &= ~OPT_COPY; 1069 | } 1070 | for (i = targ->nmd->first_tx_ring; i <= targ->nmd->last_tx_ring; i++) { 1071 | int m, limit = rate_limit ? tosend : targ->g->burst; 1072 | if (n > 0 && n - sent < limit) 1073 | limit = n - sent; 1074 | txring = NETMAP_TXRING(nifp, i); 1075 | if (nm_ring_empty(txring)) 1076 | continue; 1077 | if (frags > 1) 1078 | limit = ((limit + frags - 1) / frags) * frags; 1079 | 1080 | m = send_packets(txring, pkt, frame, size, targ->g, 1081 | limit, options, frags); 1082 | ND("limit %d tail %d frags %d m %d", 1083 | limit, txring->tail, frags, m); 1084 | sent += m; 1085 | targ->count = sent; 1086 | if (rate_limit) { 1087 | tosend -= m; 1088 | if (tosend <= 0) 1089 | break; 1090 | } 1091 | } 1092 | } 1093 | /* flush any remaining packets */ 1094 | ioctl(pfd.fd, NIOCTXSYNC, NULL); 1095 | 1096 | /* final part: wait all the TX queues to be empty. */ 1097 | for (i = targ->nmd->first_tx_ring; i <= targ->nmd->last_tx_ring; i++) { 1098 | txring = NETMAP_TXRING(nifp, i); 1099 | while (nm_tx_pending(txring)) { 1100 | ioctl(pfd.fd, NIOCTXSYNC, NULL); 1101 | usleep(1); /* wait 1 tick */ 1102 | } 1103 | } 1104 | } /* end DEV_NETMAP */ 1105 | 1106 | clock_gettime(CLOCK_REALTIME_PRECISE, &targ->toc); 1107 | targ->completed = 1; 1108 | targ->count = sent; 1109 | 1110 | quit: 1111 | /* reset the ``used`` flag. */ 1112 | targ->used = 0; 1113 | 1114 | return (NULL); 1115 | } 1116 | 1117 | 1118 | #ifndef NO_PCAP 1119 | static void 1120 | receive_pcap(u_char *user, const struct pcap_pkthdr * h, 1121 | const u_char * bytes) 1122 | { 1123 | int *count = (int *)user; 1124 | (void)h; /* UNUSED */ 1125 | (void)bytes; /* UNUSED */ 1126 | (*count)++; 1127 | } 1128 | #endif /* !NO_PCAP */ 1129 | 1130 | static int 1131 | receive_packets(struct netmap_ring *ring, u_int limit, int dump) 1132 | { 1133 | u_int cur, rx, n; 1134 | 1135 | cur = ring->cur; 1136 | n = nm_ring_space(ring); 1137 | if (n < limit) 1138 | limit = n; 1139 | for (rx = 0; rx < limit; rx++) { 1140 | struct netmap_slot *slot = &ring->slot[cur]; 1141 | char *p = NETMAP_BUF(ring, slot->buf_idx); 1142 | 1143 | if (dump) 1144 | dump_payload(p, slot->len, ring, cur); 1145 | 1146 | cur = nm_ring_next(ring, cur); 1147 | } 1148 | ring->head = ring->cur = cur; 1149 | 1150 | return (rx); 1151 | } 1152 | 1153 | static void * 1154 | receiver_body(void *data) 1155 | { 1156 | struct targ *targ = (struct targ *) data; 1157 | struct pollfd pfd = { .fd = targ->fd, .events = POLLIN }; 1158 | struct netmap_if *nifp; 1159 | struct netmap_ring *rxring; 1160 | int i; 1161 | uint64_t received = 0; 1162 | 1163 | if (setaffinity(targ->thread, targ->affinity)) 1164 | goto quit; 1165 | 1166 | /* unbounded wait for the first packet. */ 1167 | for (;;) { 1168 | i = poll(&pfd, 1, 1000); 1169 | if (i > 0 && !(pfd.revents & POLLERR)) 1170 | break; 1171 | RD(1, "waiting for initial packets, poll returns %d %d", 1172 | i, pfd.revents); 1173 | } 1174 | 1175 | /* main loop, exit after 1s silence */ 1176 | clock_gettime(CLOCK_REALTIME_PRECISE, &targ->tic); 1177 | if (targ->g->dev_type == DEV_TAP) { 1178 | D("reading from %s fd %d", targ->g->ifname, targ->g->main_fd); 1179 | while (!targ->cancel) { 1180 | char buf[2048]; 1181 | /* XXX should we poll ? */ 1182 | if (read(targ->g->main_fd, buf, sizeof(buf)) > 0) 1183 | targ->count++; 1184 | } 1185 | #ifndef NO_PCAP 1186 | } else if (targ->g->dev_type == DEV_PCAP) { 1187 | while (!targ->cancel) { 1188 | /* XXX should we poll ? */ 1189 | pcap_dispatch(targ->g->p, targ->g->burst, receive_pcap, NULL); 1190 | } 1191 | #endif /* !NO_PCAP */ 1192 | } else { 1193 | int dump = targ->g->options & OPT_DUMP; 1194 | 1195 | nifp = targ->nmd->nifp; 1196 | while (!targ->cancel) { 1197 | /* Once we started to receive packets, wait at most 1 seconds 1198 | before quitting. */ 1199 | if (poll(&pfd, 1, 1 * 1000) <= 0 && !targ->g->forever) { 1200 | clock_gettime(CLOCK_REALTIME_PRECISE, &targ->toc); 1201 | targ->toc.tv_sec -= 1; /* Subtract timeout time. */ 1202 | goto out; 1203 | } 1204 | 1205 | if (pfd.revents & POLLERR) { 1206 | D("poll err"); 1207 | goto quit; 1208 | } 1209 | 1210 | for (i = targ->nmd->first_rx_ring; i <= targ->nmd->last_rx_ring; i++) { 1211 | int m; 1212 | 1213 | rxring = NETMAP_RXRING(nifp, i); 1214 | if (nm_ring_empty(rxring)) 1215 | continue; 1216 | 1217 | m = receive_packets(rxring, targ->g->burst, dump); 1218 | received += m; 1219 | } 1220 | targ->count = received; 1221 | } 1222 | } 1223 | 1224 | clock_gettime(CLOCK_REALTIME_PRECISE, &targ->toc); 1225 | 1226 | out: 1227 | targ->completed = 1; 1228 | targ->count = received; 1229 | 1230 | quit: 1231 | /* reset the ``used`` flag. */ 1232 | targ->used = 0; 1233 | 1234 | return (NULL); 1235 | } 1236 | 1237 | /* very crude code to print a number in normalized form. 1238 | * Caller has to make sure that the buffer is large enough. 1239 | */ 1240 | static const char * 1241 | norm(char *buf, double val) 1242 | { 1243 | char *units[] = { "", "K", "M", "G", "T" }; 1244 | u_int i; 1245 | 1246 | for (i = 0; val >=1000 && i < sizeof(units)/sizeof(char *) - 1; i++) 1247 | val /= 1000; 1248 | sprintf(buf, "%.2f %s", val, units[i]); 1249 | return buf; 1250 | } 1251 | 1252 | static void 1253 | tx_output(uint64_t sent, int size, double delta) 1254 | { 1255 | double bw, raw_bw, pps; 1256 | char b1[40], b2[80], b3[80]; 1257 | 1258 | printf("Sent %llu packets, %d bytes each, in %.2f seconds.\n", 1259 | (unsigned long long)sent, size, delta); 1260 | if (delta == 0) 1261 | delta = 1e-6; 1262 | if (size < 60) /* correct for min packet size */ 1263 | size = 60; 1264 | pps = sent / delta; 1265 | bw = (8.0 * size * sent) / delta; 1266 | /* raw packets have4 bytes crc + 20 bytes framing */ 1267 | raw_bw = (8.0 * (size + 24) * sent) / delta; 1268 | 1269 | printf("Speed: %spps Bandwidth: %sbps (raw %sbps)\n", 1270 | norm(b1, pps), norm(b2, bw), norm(b3, raw_bw) ); 1271 | } 1272 | 1273 | 1274 | static void 1275 | rx_output(uint64_t received, double delta) 1276 | { 1277 | double pps; 1278 | char b1[40]; 1279 | 1280 | printf("Received %llu packets, in %.2f seconds.\n", 1281 | (unsigned long long) received, delta); 1282 | 1283 | if (delta == 0) 1284 | delta = 1e-6; 1285 | pps = received / delta; 1286 | printf("Speed: %spps\n", norm(b1, pps)); 1287 | } 1288 | 1289 | static void 1290 | usage(void) 1291 | { 1292 | const char *cmd = "pkt-gen"; 1293 | fprintf(stderr, 1294 | "Usage:\n" 1295 | "%s arguments\n" 1296 | "\t-i interface interface name\n" 1297 | "\t-f function tx rx ping pong\n" 1298 | "\t-n count number of iterations (can be 0)\n" 1299 | "\t-t pkts_to_send also forces tx mode\n" 1300 | "\t-r pkts_to_receive also forces rx mode\n" 1301 | "\t-l pkt_size in bytes excluding CRC\n" 1302 | "\t-d dst_ip[:port[-dst_ip:port]] single or range\n" 1303 | "\t-s src_ip[:port[-src_ip:port]] single or range\n" 1304 | "\t-D dst-mac\n" 1305 | "\t-S src-mac\n" 1306 | "\t-a cpu_id use setaffinity\n" 1307 | "\t-b burst size testing, mostly\n" 1308 | "\t-c cores cores to use\n" 1309 | "\t-p threads processes/threads to use\n" 1310 | "\t-T report_ms milliseconds between reports\n" 1311 | "\t-P use libpcap instead of netmap\n" 1312 | "\t-w wait_for_link_time in seconds\n" 1313 | "\t-R rate in packets per second\n" 1314 | "\t-X dump payload\n" 1315 | "\t-H len add empty virtio-net-header with size 'len'\n" 1316 | "", 1317 | cmd); 1318 | 1319 | exit(0); 1320 | } 1321 | 1322 | static void 1323 | start_threads(struct glob_arg *g) 1324 | { 1325 | int i; 1326 | 1327 | targs = calloc(g->nthreads, sizeof(*targs)); 1328 | /* 1329 | * Now create the desired number of threads, each one 1330 | * using a single descriptor. 1331 | */ 1332 | for (i = 0; i < g->nthreads; i++) { 1333 | struct targ *t = &targs[i]; 1334 | 1335 | bzero(t, sizeof(*t)); 1336 | t->fd = -1; /* default, with pcap */ 1337 | t->g = g; 1338 | 1339 | if (g->dev_type == DEV_NETMAP) { 1340 | struct nm_desc nmd = *g->nmd; /* copy, we overwrite ringid */ 1341 | 1342 | if (g->nthreads > 1) { 1343 | if (nmd.req.nr_flags != NR_REG_ALL_NIC) { 1344 | D("invalid nthreads mode %d", nmd.req.nr_flags); 1345 | continue; 1346 | } 1347 | nmd.req.nr_flags = NR_REG_ONE_NIC; 1348 | nmd.req.nr_ringid = i; 1349 | } 1350 | /* Only touch one of the rings (rx is already ok) */ 1351 | if (g->td_body == receiver_body) 1352 | g->nmd_flags |= NETMAP_NO_TX_POLL; 1353 | 1354 | /* register interface. Override ifname and ringid etc. */ 1355 | if (g->options & OPT_MONITOR_TX) 1356 | g->nmd->req.nr_flags |= NR_MONITOR_TX; 1357 | if (g->options & OPT_MONITOR_RX) 1358 | g->nmd->req.nr_flags |= NR_MONITOR_RX; 1359 | 1360 | t->nmd = nm_open(t->g->ifname, NULL, g->nmd_flags | 1361 | NM_OPEN_IFNAME | NM_OPEN_NO_MMAP, g->nmd); 1362 | if (t->nmd == NULL) { 1363 | D("Unable to open %s: %s", 1364 | t->g->ifname, strerror(errno)); 1365 | continue; 1366 | } 1367 | t->fd = t->nmd->fd; 1368 | 1369 | } else { 1370 | targs[i].fd = g->main_fd; 1371 | } 1372 | t->used = 1; 1373 | t->me = i; 1374 | if (g->affinity >= 0) { 1375 | if (g->affinity < g->cpus) 1376 | t->affinity = g->affinity; 1377 | else 1378 | t->affinity = i % g->cpus; 1379 | } else { 1380 | t->affinity = -1; 1381 | } 1382 | /* default, init packets */ 1383 | initialize_packet(t); 1384 | 1385 | if (pthread_create(&t->thread, NULL, g->td_body, t) == -1) { 1386 | D("Unable to create thread %d: %s", i, strerror(errno)); 1387 | t->used = 0; 1388 | } 1389 | } 1390 | } 1391 | 1392 | static void 1393 | main_thread(struct glob_arg *g) 1394 | { 1395 | int i; 1396 | 1397 | uint64_t prev = 0; 1398 | uint64_t count = 0; 1399 | double delta_t; 1400 | struct timeval tic, toc; 1401 | 1402 | gettimeofday(&toc, NULL); 1403 | for (;;) { 1404 | struct timeval now, delta; 1405 | uint64_t pps, usec, my_count, npkts; 1406 | int done = 0; 1407 | 1408 | delta.tv_sec = g->report_interval/1000; 1409 | delta.tv_usec = (g->report_interval%1000)*1000; 1410 | select(0, NULL, NULL, NULL, &delta); 1411 | gettimeofday(&now, NULL); 1412 | timersub(&now, &toc, &toc); 1413 | my_count = 0; 1414 | for (i = 0; i < g->nthreads; i++) { 1415 | my_count += targs[i].count; 1416 | if (targs[i].used == 0) 1417 | done++; 1418 | } 1419 | usec = toc.tv_sec* 1000000 + toc.tv_usec; 1420 | if (usec < 10000) 1421 | continue; 1422 | npkts = my_count - prev; 1423 | pps = (npkts*1000000 + usec/2) / usec; 1424 | D("%llu pps (%llu pkts in %llu usec)", 1425 | (unsigned long long)pps, 1426 | (unsigned long long)npkts, 1427 | (unsigned long long)usec); 1428 | prev = my_count; 1429 | toc = now; 1430 | if (done == g->nthreads) 1431 | break; 1432 | } 1433 | 1434 | timerclear(&tic); 1435 | timerclear(&toc); 1436 | for (i = 0; i < g->nthreads; i++) { 1437 | struct timespec t_tic, t_toc; 1438 | /* 1439 | * Join active threads, unregister interfaces and close 1440 | * file descriptors. 1441 | */ 1442 | if (targs[i].used) 1443 | pthread_join(targs[i].thread, NULL); 1444 | close(targs[i].fd); 1445 | 1446 | if (targs[i].completed == 0) 1447 | D("ouch, thread %d exited with error", i); 1448 | 1449 | /* 1450 | * Collect threads output and extract information about 1451 | * how long it took to send all the packets. 1452 | */ 1453 | count += targs[i].count; 1454 | t_tic = timeval2spec(&tic); 1455 | t_toc = timeval2spec(&toc); 1456 | if (!timerisset(&tic) || timespec_ge(&targs[i].tic, &t_tic)) 1457 | tic = timespec2val(&targs[i].tic); 1458 | if (!timerisset(&toc) || timespec_ge(&targs[i].toc, &t_toc)) 1459 | toc = timespec2val(&targs[i].toc); 1460 | } 1461 | 1462 | /* print output. */ 1463 | timersub(&toc, &tic, &toc); 1464 | delta_t = toc.tv_sec + 1e-6* toc.tv_usec; 1465 | if (g->td_body == sender_body) 1466 | tx_output(count, g->pkt_size, delta_t); 1467 | else 1468 | rx_output(count, delta_t); 1469 | 1470 | if (g->dev_type == DEV_NETMAP) { 1471 | munmap(g->nmd->mem, g->nmd->req.nr_memsize); 1472 | close(g->main_fd); 1473 | } 1474 | } 1475 | 1476 | 1477 | struct sf { 1478 | char *key; 1479 | void *f; 1480 | }; 1481 | 1482 | static struct sf func[] = { 1483 | { "tx", sender_body }, 1484 | { "rx", receiver_body }, 1485 | { "ping", pinger_body }, 1486 | { "pong", ponger_body }, 1487 | { NULL, NULL } 1488 | }; 1489 | 1490 | static int 1491 | tap_alloc(char *dev) 1492 | { 1493 | struct ifreq ifr; 1494 | int fd, err; 1495 | char *clonedev = TAP_CLONEDEV; 1496 | 1497 | (void)err; 1498 | (void)dev; 1499 | /* Arguments taken by the function: 1500 | * 1501 | * char *dev: the name of an interface (or '\0'). MUST have enough 1502 | * space to hold the interface name if '\0' is passed 1503 | * int flags: interface flags (eg, IFF_TUN etc.) 1504 | */ 1505 | 1506 | #ifdef __FreeBSD__ 1507 | if (dev[3]) { /* tapSomething */ 1508 | static char buf[128]; 1509 | snprintf(buf, sizeof(buf), "/dev/%s", dev); 1510 | clonedev = buf; 1511 | } 1512 | #endif 1513 | /* open the device */ 1514 | if( (fd = open(clonedev, O_RDWR)) < 0 ) { 1515 | return fd; 1516 | } 1517 | D("%s open successful", clonedev); 1518 | 1519 | /* preparation of the struct ifr, of type "struct ifreq" */ 1520 | memset(&ifr, 0, sizeof(ifr)); 1521 | 1522 | #ifdef linux 1523 | ifr.ifr_flags = IFF_TAP | IFF_NO_PI; 1524 | 1525 | if (*dev) { 1526 | /* if a device name was specified, put it in the structure; otherwise, 1527 | * the kernel will try to allocate the "next" device of the 1528 | * specified type */ 1529 | strncpy(ifr.ifr_name, dev, IFNAMSIZ); 1530 | } 1531 | 1532 | /* try to create the device */ 1533 | if( (err = ioctl(fd, TUNSETIFF, (void *) &ifr)) < 0 ) { 1534 | D("failed to to a TUNSETIFF: %s", strerror(errno)); 1535 | close(fd); 1536 | return err; 1537 | } 1538 | 1539 | /* if the operation was successful, write back the name of the 1540 | * interface to the variable "dev", so the caller can know 1541 | * it. Note that the caller MUST reserve space in *dev (see calling 1542 | * code below) */ 1543 | strcpy(dev, ifr.ifr_name); 1544 | D("new name is %s", dev); 1545 | #endif /* linux */ 1546 | 1547 | /* this is the special file descriptor that the caller will use to talk 1548 | * with the virtual interface */ 1549 | return fd; 1550 | } 1551 | 1552 | int 1553 | main(int arc, char **argv) 1554 | { 1555 | int i; 1556 | 1557 | struct glob_arg g; 1558 | 1559 | int ch; 1560 | int wait_link = 2; 1561 | int devqueues = 1; /* how many device queues */ 1562 | 1563 | bzero(&g, sizeof(g)); 1564 | 1565 | g.main_fd = -1; 1566 | g.td_body = receiver_body; 1567 | g.report_interval = 1000; /* report interval */ 1568 | g.affinity = -1; 1569 | /* ip addresses can also be a range x.x.x.x-x.x.x.y */ 1570 | g.src_ip.name = "10.0.0.1"; 1571 | g.dst_ip.name = "10.1.0.1"; 1572 | g.dst_mac.name = "ff:ff:ff:ff:ff:ff"; 1573 | g.src_mac.name = NULL; 1574 | g.pkt_size = 60; 1575 | g.burst = 512; // default 1576 | g.nthreads = 1; 1577 | g.cpus = 1; 1578 | g.forever = 1; 1579 | g.tx_rate = 0; 1580 | g.frags = 1; 1581 | g.nmr_config = ""; 1582 | g.virt_header = 0; 1583 | 1584 | while ( (ch = getopt(arc, argv, 1585 | "a:f:F:n:i:Il:d:s:D:S:b:c:o:p:T:w:WvR:XC:H:e:m:")) != -1) { 1586 | struct sf *fn; 1587 | 1588 | switch(ch) { 1589 | default: 1590 | D("bad option %c %s", ch, optarg); 1591 | usage(); 1592 | break; 1593 | 1594 | case 'n': 1595 | g.npackets = atoi(optarg); 1596 | break; 1597 | 1598 | case 'F': 1599 | i = atoi(optarg); 1600 | if (i < 1 || i > 63) { 1601 | D("invalid frags %d [1..63], ignore", i); 1602 | break; 1603 | } 1604 | g.frags = i; 1605 | break; 1606 | 1607 | case 'f': 1608 | for (fn = func; fn->key; fn++) { 1609 | if (!strcmp(fn->key, optarg)) 1610 | break; 1611 | } 1612 | if (fn->key) 1613 | g.td_body = fn->f; 1614 | else 1615 | D("unrecognised function %s", optarg); 1616 | break; 1617 | 1618 | case 'o': /* data generation options */ 1619 | g.options = atoi(optarg); 1620 | break; 1621 | 1622 | case 'a': /* force affinity */ 1623 | g.affinity = atoi(optarg); 1624 | break; 1625 | 1626 | case 'i': /* interface */ 1627 | /* a prefix of tap: netmap: or pcap: forces the mode. 1628 | * otherwise we guess 1629 | */ 1630 | D("interface is %s", optarg); 1631 | if (strlen(optarg) > MAX_IFNAMELEN - 8) { 1632 | D("ifname too long %s", optarg); 1633 | break; 1634 | } 1635 | strcpy(g.ifname, optarg); 1636 | if (!strcmp(optarg, "null")) { 1637 | g.dev_type = DEV_NETMAP; 1638 | g.dummy_send = 1; 1639 | } else if (!strncmp(optarg, "tap:", 4)) { 1640 | g.dev_type = DEV_TAP; 1641 | strcpy(g.ifname, optarg + 4); 1642 | } else if (!strncmp(optarg, "pcap:", 5)) { 1643 | g.dev_type = DEV_PCAP; 1644 | strcpy(g.ifname, optarg + 5); 1645 | } else if (!strncmp(optarg, "netmap:", 7) || 1646 | !strncmp(optarg, "vale", 4)) { 1647 | g.dev_type = DEV_NETMAP; 1648 | } else if (!strncmp(optarg, "tap", 3)) { 1649 | g.dev_type = DEV_TAP; 1650 | } else { /* prepend netmap: */ 1651 | g.dev_type = DEV_NETMAP; 1652 | sprintf(g.ifname, "netmap:%s", optarg); 1653 | } 1654 | break; 1655 | 1656 | case 'I': 1657 | g.options |= OPT_INDIRECT; /* XXX use indirect buffer */ 1658 | break; 1659 | 1660 | case 'l': /* pkt_size */ 1661 | g.pkt_size = atoi(optarg); 1662 | break; 1663 | 1664 | case 'd': 1665 | g.dst_ip.name = optarg; 1666 | break; 1667 | 1668 | case 's': 1669 | g.src_ip.name = optarg; 1670 | break; 1671 | 1672 | case 'T': /* report interval */ 1673 | g.report_interval = atoi(optarg); 1674 | break; 1675 | 1676 | case 'w': 1677 | wait_link = atoi(optarg); 1678 | break; 1679 | 1680 | case 'W': /* XXX changed default */ 1681 | g.forever = 0; /* do not exit rx even with no traffic */ 1682 | break; 1683 | 1684 | case 'b': /* burst */ 1685 | g.burst = atoi(optarg); 1686 | break; 1687 | case 'c': 1688 | g.cpus = atoi(optarg); 1689 | break; 1690 | case 'p': 1691 | g.nthreads = atoi(optarg); 1692 | break; 1693 | 1694 | case 'D': /* destination mac */ 1695 | g.dst_mac.name = optarg; 1696 | break; 1697 | 1698 | case 'S': /* source mac */ 1699 | g.src_mac.name = optarg; 1700 | break; 1701 | case 'v': 1702 | verbose++; 1703 | break; 1704 | case 'R': 1705 | g.tx_rate = atoi(optarg); 1706 | break; 1707 | case 'X': 1708 | g.options |= OPT_DUMP; 1709 | break; 1710 | case 'C': 1711 | g.nmr_config = strdup(optarg); 1712 | break; 1713 | case 'H': 1714 | g.virt_header = atoi(optarg); 1715 | break; 1716 | case 'e': /* extra bufs */ 1717 | g.extra_bufs = atoi(optarg); 1718 | break; 1719 | case 'm': 1720 | if (strcmp(optarg, "tx") == 0) { 1721 | g.options |= OPT_MONITOR_TX; 1722 | } else if (strcmp(optarg, "rx") == 0) { 1723 | g.options |= OPT_MONITOR_RX; 1724 | } else { 1725 | D("unrecognized monitor mode %s", optarg); 1726 | } 1727 | break; 1728 | } 1729 | } 1730 | 1731 | if (g.ifname == NULL) { 1732 | D("missing ifname"); 1733 | usage(); 1734 | } 1735 | 1736 | i = system_ncpus(); 1737 | if (g.cpus < 0 || g.cpus > i) { 1738 | D("%d cpus is too high, have only %d cpus", g.cpus, i); 1739 | usage(); 1740 | } 1741 | if (g.cpus == 0) 1742 | g.cpus = i; 1743 | 1744 | if (g.pkt_size < 16 || g.pkt_size > 1536) { 1745 | D("bad pktsize %d\n", g.pkt_size); 1746 | usage(); 1747 | } 1748 | 1749 | if (g.src_mac.name == NULL) { 1750 | static char mybuf[20] = "00:00:00:00:00:00"; 1751 | /* retrieve source mac address. */ 1752 | if (source_hwaddr(g.ifname, mybuf) == -1) { 1753 | D("Unable to retrieve source mac"); 1754 | // continue, fail later 1755 | } 1756 | g.src_mac.name = mybuf; 1757 | } 1758 | /* extract address ranges */ 1759 | extract_ip_range(&g.src_ip); 1760 | extract_ip_range(&g.dst_ip); 1761 | extract_mac_range(&g.src_mac); 1762 | extract_mac_range(&g.dst_mac); 1763 | 1764 | if (g.src_ip.start != g.src_ip.end || 1765 | g.src_ip.port0 != g.src_ip.port1 || 1766 | g.dst_ip.start != g.dst_ip.end || 1767 | g.dst_ip.port0 != g.dst_ip.port1) 1768 | g.options |= OPT_COPY; 1769 | 1770 | if (g.virt_header != 0 && g.virt_header != VIRT_HDR_1 1771 | && g.virt_header != VIRT_HDR_2) { 1772 | D("bad virtio-net-header length"); 1773 | usage(); 1774 | } 1775 | 1776 | if (g.dev_type == DEV_TAP) { 1777 | D("want to use tap %s", g.ifname); 1778 | g.main_fd = tap_alloc(g.ifname); 1779 | if (g.main_fd < 0) { 1780 | D("cannot open tap %s", g.ifname); 1781 | usage(); 1782 | } 1783 | #ifndef NO_PCAP 1784 | } else if (g.dev_type == DEV_PCAP) { 1785 | char pcap_errbuf[PCAP_ERRBUF_SIZE]; 1786 | 1787 | D("using pcap on %s", g.ifname); 1788 | pcap_errbuf[0] = '\0'; // init the buffer 1789 | g.p = pcap_open_live(g.ifname, 0, 1, 100, pcap_errbuf); 1790 | if (g.p == NULL) { 1791 | D("cannot open pcap on %s", g.ifname); 1792 | usage(); 1793 | } 1794 | #endif /* !NO_PCAP */ 1795 | } else if (g.dummy_send) { /* but DEV_NETMAP */ 1796 | D("using a dummy send routine"); 1797 | } else { 1798 | struct nm_desc base_nmd; 1799 | 1800 | bzero(&base_nmd, sizeof(base_nmd)); 1801 | 1802 | g.nmd_flags = 0; 1803 | g.nmd_flags |= parse_nmr_config(g.nmr_config, &base_nmd.req); 1804 | if (g.extra_bufs) { 1805 | base_nmd.req.nr_arg3 = g.extra_bufs; 1806 | g.nmd_flags |= NM_OPEN_ARG3; 1807 | } 1808 | 1809 | /* 1810 | * Open the netmap device using nm_open(). 1811 | * 1812 | * protocol stack and may cause a reset of the card, 1813 | * which in turn may take some time for the PHY to 1814 | * reconfigure. We do the open here to have time to reset. 1815 | */ 1816 | g.nmd = nm_open(g.ifname, NULL, g.nmd_flags, &base_nmd); 1817 | if (g.nmd == NULL) { 1818 | D("Unable to open %s: %s", g.ifname, strerror(errno)); 1819 | goto out; 1820 | } 1821 | g.main_fd = g.nmd->fd; 1822 | D("mapped %dKB at %p", g.nmd->req.nr_memsize>>10, g.nmd->mem); 1823 | 1824 | devqueues = g.nmd->req.nr_rx_rings; 1825 | 1826 | /* validate provided nthreads. */ 1827 | if (g.nthreads < 1 || g.nthreads > devqueues) { 1828 | D("bad nthreads %d, have %d queues", g.nthreads, devqueues); 1829 | // continue, fail later 1830 | } 1831 | 1832 | if (verbose) { 1833 | struct netmap_if *nifp = g.nmd->nifp; 1834 | struct nmreq *req = &g.nmd->req; 1835 | 1836 | D("nifp at offset %d, %d tx %d rx region %d", 1837 | req->nr_offset, req->nr_tx_rings, req->nr_rx_rings, 1838 | req->nr_arg2); 1839 | for (i = 0; i <= req->nr_tx_rings; i++) { 1840 | D(" TX%d at 0x%lx", i, 1841 | (char *)NETMAP_TXRING(nifp, i) - (char *)nifp); 1842 | } 1843 | for (i = 0; i <= req->nr_rx_rings; i++) { 1844 | D(" RX%d at 0x%lx", i, 1845 | (char *)NETMAP_RXRING(nifp, i) - (char *)nifp); 1846 | } 1847 | } 1848 | 1849 | /* Print some debug information. */ 1850 | fprintf(stdout, 1851 | "%s %s: %d queues, %d threads and %d cpus.\n", 1852 | (g.td_body == sender_body) ? "Sending on" : "Receiving from", 1853 | g.ifname, 1854 | devqueues, 1855 | g.nthreads, 1856 | g.cpus); 1857 | if (g.td_body == sender_body) { 1858 | fprintf(stdout, "%s -> %s (%s -> %s)\n", 1859 | g.src_ip.name, g.dst_ip.name, 1860 | g.src_mac.name, g.dst_mac.name); 1861 | } 1862 | 1863 | out: 1864 | /* Exit if something went wrong. */ 1865 | if (g.main_fd < 0) { 1866 | D("aborting"); 1867 | usage(); 1868 | } 1869 | } 1870 | 1871 | 1872 | if (g.options) { 1873 | D("--- SPECIAL OPTIONS:%s%s%s%s%s\n", 1874 | g.options & OPT_PREFETCH ? " prefetch" : "", 1875 | g.options & OPT_ACCESS ? " access" : "", 1876 | g.options & OPT_MEMCPY ? " memcpy" : "", 1877 | g.options & OPT_INDIRECT ? " indirect" : "", 1878 | g.options & OPT_COPY ? " copy" : ""); 1879 | } 1880 | 1881 | g.tx_period.tv_sec = g.tx_period.tv_nsec = 0; 1882 | if (g.tx_rate > 0) { 1883 | /* try to have at least something every second, 1884 | * reducing the burst size to some 0.01s worth of data 1885 | * (but no less than one full set of fragments) 1886 | */ 1887 | uint64_t x; 1888 | int lim = (g.tx_rate)/300; 1889 | if (g.burst > lim) 1890 | g.burst = lim; 1891 | if (g.burst < g.frags) 1892 | g.burst = g.frags; 1893 | x = ((uint64_t)1000000000 * (uint64_t)g.burst) / (uint64_t) g.tx_rate; 1894 | g.tx_period.tv_nsec = x; 1895 | g.tx_period.tv_sec = g.tx_period.tv_nsec / 1000000000; 1896 | g.tx_period.tv_nsec = g.tx_period.tv_nsec % 1000000000; 1897 | } 1898 | if (g.td_body == sender_body) 1899 | D("Sending %d packets every %ld.%09ld s", 1900 | g.burst, g.tx_period.tv_sec, g.tx_period.tv_nsec); 1901 | /* Wait for PHY reset. */ 1902 | D("Wait %d secs for phy reset", wait_link); 1903 | sleep(wait_link); 1904 | D("Ready..."); 1905 | 1906 | /* Install ^C handler. */ 1907 | global_nthreads = g.nthreads; 1908 | signal(SIGINT, sigint_h); 1909 | 1910 | start_threads(&g); 1911 | main_thread(&g); 1912 | return 0; 1913 | } 1914 | 1915 | /* end of file */ 1916 | -------------------------------------------------------------------------------- /examples/setup.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | if [ ! $# = 2 ]; then 4 | echo "Usage: setup.sh ifname [start|finish]" 5 | exit 6 | fi 7 | 8 | if [ ! -f ../../netmap-release/examples/vale-ctl ]; then 9 | echo Usage: place ../../netmap-release/examples/vale-ctl 10 | exit 11 | fi 12 | 13 | if [ $2 = start ]; then 14 | ../../netmap-release/examples/vale-ctl -a valem:$1 15 | if [ `uname` = "FreeBSD" ]; then 16 | kldload ../sys/contrib/multistack/multistack.ko 17 | elif [ `uname` = "Linux" ]; then 18 | insmod ../LINUX/multistack_lin.ko 19 | fi 20 | elif [ $2 = finish ]; then 21 | if [ `uname` = "FreeBSD" ]; then 22 | kldunload multistack 23 | elif [ `uname` = "Linux" ]; then 24 | rmmod multistack 25 | fi 26 | ../../netmap-release/examples/vale-ctl -d valem:$1 27 | fi 28 | -------------------------------------------------------------------------------- /examples/test_bind.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | #define NETMAP_WITH_LIBS 13 | #include 14 | #include 15 | #include 16 | #include 17 | #include 18 | #include 19 | #include 20 | #include 21 | 22 | #define DEFAULT_VPORT "valem:mp0" 23 | #define TEST_TIME 2 24 | 25 | int 26 | main(int argc, char **argv) 27 | { 28 | int fd; 29 | struct sockaddr_storage ss; 30 | struct sockaddr_in *sin = (struct sockaddr_in *)&ss; 31 | struct msreq msr; 32 | struct nm_desc *nmd; 33 | int i; 34 | uint16_t lport; 35 | 36 | if (argc != 3) { 37 | fprintf(stderr, "Usage: ./test_bind addr port\n"); 38 | return 1; 39 | } 40 | lport = (uint16_t)atoi(argv[2]); 41 | fd = socket(AF_INET, SOCK_STREAM, IPPROTO_TCP); 42 | if (!fd) { 43 | perror("socket"); 44 | return 0; 45 | } 46 | sin->sin_family = AF_INET; 47 | sin->sin_port = htons(lport); 48 | if (inet_pton(AF_INET, argv[1], &sin->sin_addr) != 1) { 49 | perror("inet_pton"); 50 | close(fd); 51 | return 0; 52 | } 53 | printf("bind()ing %s %u\n", argv[1], lport); 54 | if (bind(fd, (struct sockaddr *)sin, sizeof(*sin))) { 55 | perror("bind"); 56 | close(fd); 57 | return 0; 58 | } 59 | 60 | nmd = nm_open(DEFAULT_VPORT, NULL, 0, NULL); 61 | if (nmd == NULL) { 62 | fprintf(stderr, "Unable to open %s\n", DEFAULT_VPORT); 63 | close(fd); 64 | return -1; 65 | } 66 | 67 | strncpy(msr.mr_name, nmd->req.nr_name, sizeof(msr.mr_name)); 68 | for (i = 0; i < TEST_TIME; i++) { 69 | printf("MultiStack-bind()ing %s %u\n", argv[1], lport + i); 70 | sin->sin_port = htons(lport + i); 71 | msr.mr_cmd = MULTISTACK_BIND; 72 | msr.mr_sin = *sin; 73 | msr.mr_proto = IPPROTO_TCP; 74 | 75 | if (ioctl(nmd->fd, NIOCCONFIG, &msr)) { 76 | perror("ioctl"); 77 | continue; 78 | } 79 | printf("success for port %d\n", lport + i); 80 | 81 | msr.mr_cmd = MULTISTACK_UNBIND; 82 | if (ioctl(nmd->fd, NIOCCONFIG, &msr)) { 83 | perror("ioctl"); 84 | } 85 | } 86 | 87 | munmap(nmd->mem, nmd->req.nr_memsize); 88 | close(nmd->fd); 89 | close(fd); 90 | /* 91 | munmap(mmap_addr, mmap_size); 92 | close(nfd); 93 | close(fd); 94 | */ 95 | return 0; 96 | } 97 | -------------------------------------------------------------------------------- /sys/contrib/multistack/Makefile: -------------------------------------------------------------------------------- 1 | .PATH: . 2 | KMOD= multistack 3 | SRCS= multistack.c 4 | SRCS+= opt_inet.h opt_inet6.h opt_sctp.h opt_compat.h opt_ipsec.h 5 | TARGET!= uname -m 6 | IDENT!= uname -i 7 | CFLAGS+= -I$(KSRC) 8 | CFLAGS+= -I../../../sys/ 9 | CFLAGS+= -I../../ 10 | CFLAGS+= -I$(KSRC)/sys/$(TARGET)/compile/$(IDENT) 11 | CFLAGS+= $(NMVER) 12 | 13 | .include 14 | -------------------------------------------------------------------------------- /sys/contrib/multistack/multistack.c: -------------------------------------------------------------------------------- 1 | /* 2 | * BSD LICENSE 3 | * 4 | * Copyright(c) 2015 NEC Europe Ltd. All rights reserved. 5 | * All rights reserved. 6 | * 7 | * Redistribution and use in source and binary forms, with or without 8 | * modification, are permitted provided that the following conditions 9 | * are met: 10 | * 11 | * * Redistributions of source code must retain the above copyright 12 | * notice, this list of conditions and the following disclaimer. 13 | * * Redistributions in binary form must reproduce the above copyright 14 | * notice, this list of conditions and the following disclaimer in 15 | * the documentation and/or other materials provided with the 16 | * distribution. 17 | * * Neither the name of NEC Europe Ltd. nor the names of 18 | * its contributors may be used to endorse or promote products derived 19 | * from this software without specific prior written permission. 20 | * 21 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22 | * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 | * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 24 | * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 25 | * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 26 | * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 27 | * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 28 | * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 29 | * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 30 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 31 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 32 | */ 33 | 34 | #if defined(__FreeBSD__) 35 | #include /* prerequisite */ 36 | 37 | #include 38 | #include 39 | #include 40 | #include 41 | #include /* cdevsw struct */ 42 | #include 43 | #include 44 | 45 | /* to compile netmap_kern.h */ 46 | #include 47 | #include 48 | #include 49 | #include 50 | #include 51 | #include /* XXX _IOWR. Should we use ioccom.h ? */ 52 | #include 53 | #include 54 | #include 55 | #include 56 | #include /* struct in_addr in ip.h */ 57 | #include /* struct inpcb */ 58 | #include /* struct ip */ 59 | #include /* struct ip6 */ 60 | #include /* in6_sprintf */ 61 | #include /* V_tcbinfo */ 62 | /* For debug */ 63 | #include 64 | #include /* struct tcp_hdr */ 65 | 66 | /* For ms_pcb_clash() */ 67 | #include "opt_inet6.h" 68 | #include "opt_sctp.h" 69 | #include 70 | #include 71 | #include 72 | #include 73 | #ifdef SCTP 74 | #include 75 | #endif /* SCTP */ 76 | #ifdef INET6 77 | #include 78 | #include 79 | #endif 80 | extern struct protosw inetsw[]; 81 | 82 | #include 83 | #define MS_RWLOCK_T struct rwlock 84 | #define MS_RWINIT(_lock, _m) rw_init(_lock, _m) 85 | #define MS_WLOCK() rw_wlock(&ms_global.lock) 86 | #define MS_WUNLOCK() rw_wunlock(&ms_global.lock) 87 | #define MS_RLOCK() rw_rlock(&ms_global.lock) 88 | #define MS_RUNLOCK() rw_runlock(&ms_global.lock) 89 | 90 | #define MS_LIST_INIT(_head) LIST_INIT(_head) 91 | #define MS_LIST_ENTRY(_type) LIST_ENTRY(_type) 92 | #define MS_LIST_ADD(_head, _n, _pos) LIST_INSERT_HEAD(_head, _n, _pos) 93 | #define MS_LIST_DEL(_n, _pos) LIST_REMOVE(_n, _pos) 94 | LIST_HEAD(ms_routelist, ms_route); 95 | #define MS_LIST_FOREACH LIST_FOREACH 96 | #define MS_LIST_FOREACH_SAFE LIST_FOREACH_SAFE 97 | #define MS_ROUTE_LIST struct ms_routelist 98 | 99 | #define MS_GET_VAR(lval) (lval) 100 | #define MS_SET_VAR(lval, p) ((lval) = (p)) 101 | 102 | #define MODULE_GLOBAL(__SYMBOL) V_##__SYMBOL 103 | #elif defined (linux) 104 | 105 | #include /* from netmap-release */ 106 | #include 107 | #include 108 | #include 109 | #endif /* linux */ 110 | 111 | /* Common headers */ 112 | #include 113 | #include /* XXX Provide path in Makefile */ 114 | #include 115 | 116 | #define MS_NAME "valem:" 117 | #define MS_ROUTEHASHSIZ 16384 118 | #define MS_F_STACK 0x01 119 | #define MS_F_HOST 0x02 120 | 121 | #ifdef MULTITACK_MBOXFILTER 122 | uint16_t udp_tbl[65536]; 123 | uint16_t tcp_tbl[65536]; 124 | #endif /* MULTITACK_MBOXFILTER */ 125 | 126 | /* struct tcphdr of FreeBSD */ 127 | struct ms_tcphdr { 128 | u_short th_sport; /* source port */ 129 | u_short th_dport; /* destination port */ 130 | tcp_seq th_seq; /* sequence number */ 131 | tcp_seq th_ack; /* acknowledgement number */ 132 | #if defined(__LITTLE_ENDIAN_BITFIELD) 133 | u_char th_x2:4, /* (unused) */ 134 | th_off:4; /* data offset */ 135 | #elif defined (__BIG_ENDIAN_BITFIELD) 136 | u_char th_off:4, /* data offset */ 137 | th_x2:4; /* (unused) */ 138 | #endif 139 | u_char th_flags; 140 | #define TH_FIN 0x01 141 | #define TH_SYN 0x02 142 | #define TH_RST 0x04 143 | #define TH_PUSH 0x08 144 | #define TH_ACK 0x10 145 | #define TH_URG 0x20 146 | #define TH_ECE 0x40 147 | #define TH_CWR 0x80 148 | #define TH_FLAGS (TH_FIN|TH_SYN|TH_RST|TH_PUSH|TH_ACK|TH_URG|TH_ECE|TH_CWR) 149 | #define PRINT_TH_FLAGS "\20\1FIN\2SYN\3RST\4PUSH\5ACK\6URG\7ECE\10CWR" 150 | 151 | u_short th_win; /* window */ 152 | u_short th_sum; /* checksum */ 153 | u_short th_urp; /* urgent pointer */ 154 | }; 155 | 156 | static inline void 157 | ip_sprintf(char *buf, const struct in_addr *addr) 158 | { 159 | const uint8_t *p = (const uint8_t *)addr; 160 | sprintf(buf, "%u.%u.%u.%u", p[0], p[1], p[2], p[3]); 161 | } 162 | 163 | static void 164 | ms_addr_sprintf(char *buf, const struct sockaddr *sa) 165 | { 166 | if (sa->sa_family == AF_INET) 167 | ip_sprintf(buf, &((const struct sockaddr_in *)sa)->sin_addr); 168 | else if (sa->sa_family == AF_INET6) 169 | ip6_sprintf(buf, &((const struct sockaddr_in6 *)sa)->sin6_addr); 170 | } 171 | 172 | #if 0 173 | static inline void 174 | eth_sprintf(char *buf, const uint8_t *addr) 175 | { 176 | sprintf(buf, "%02x:%02x:%02x:%02x:%02x:%02x", addr[0], addr[1], 177 | addr[2], addr[3], addr[4], addr[5]); 178 | } 179 | 180 | /* only for debug */ 181 | static void 182 | ms_pkt2str(const uint8_t *buf, char *dst) 183 | { 184 | uint16_t et; 185 | const uint8_t *th; 186 | char saddr_str[INET6_ADDRSTRLEN], daddr_str[INET6_ADDRSTRLEN]; 187 | char smac_str[18], dmac_str[18]; 188 | const struct ether_header *eth = (const struct ether_header *)buf; 189 | const struct ms_tcphdr *tcph; 190 | 191 | et = ntohs(eth->ether_type); 192 | eth_sprintf(smac_str, eth->ether_shost); 193 | eth_sprintf(dmac_str, eth->ether_dhost); 194 | 195 | if (et == ETHERTYPE_IP) { 196 | const struct ip *iph = (const struct ip *)(buf + ETHER_HDR_LEN); 197 | 198 | // th = (uint8_t *)iph + (iph->ip_hl << 2); 199 | th = (const uint8_t *)iph; 200 | // th += (iph->ip_hl << 2); 201 | th += 20; 202 | ip_sprintf(saddr_str, &iph->ip_src); 203 | ip_sprintf(daddr_str, &iph->ip_dst); 204 | 205 | sprintf(dst, "%s %s:%u > %s %s:%u %u len %u", 206 | smac_str, saddr_str, ntohs(*(const uint16_t *)th), 207 | dmac_str, daddr_str, ntohs(*( ((const uint16_t *)th)+1)), 208 | iph->ip_p, ntohs(iph->ip_len)); 209 | if (iph->ip_p == IPPROTO_TCP) { 210 | tcph = (const struct ms_tcphdr *)th; 211 | sprintf(dst + strlen(dst), " tcp flags 0x%x seq %u ack %u", tcph->th_flags, tcph->th_seq, tcph->th_ack); 212 | 213 | } 214 | } else if (et == ETHERTYPE_IPV6) { 215 | const struct ip6_hdr *ip6 = (const struct ip6_hdr *)(buf + ETHER_HDR_LEN); 216 | 217 | th = (const uint8_t *)(ip6+1); 218 | ip6_sprintf(saddr_str, &ip6->ip6_src); 219 | ip6_sprintf(daddr_str, &ip6->ip6_src); 220 | sprintf(dst, "%s %s:%u > %s:%s:%u %u len %u", 221 | smac_str, saddr_str, ntohs(*(const uint16_t *)th), 222 | dmac_str, daddr_str, ntohs(*( ((const uint16_t *)th)+1)), 223 | ip6->ip6_nxt, ntohs(ip6->ip6_plen)); 224 | if (ip6->ip6_nxt == IPPROTO_TCP) { 225 | tcph = (const struct ms_tcphdr *)th; 226 | sprintf(dst + strlen(dst), "tcp flags 0x%x seq %u ack %u", tcph->th_flags, tcph->th_seq, tcph->th_ack); 227 | 228 | } 229 | } else if (et == ETHERTYPE_ARP) { 230 | const struct arphdr *ah = (const struct arphdr *)(buf + ETHER_HDR_LEN); 231 | 232 | if (ntohs(ah->ar_op) == ARPOP_REQUEST) { 233 | ip_sprintf(saddr_str, 234 | (const struct in_addr *)((const char *)(ah+1) + 6)); 235 | ip_sprintf(daddr_str, 236 | (const struct in_addr *)((const char *)(ah+1) + 16)); 237 | sprintf(dst, "%s %s > %s ARP whohas %s", smac_str, 238 | saddr_str, dmac_str, daddr_str); 239 | } else if (ntohs(ah->ar_op) == ARPOP_REPLY) { 240 | ip_sprintf(saddr_str, 241 | (const struct in_addr *)((const char *)(ah+1) + 6)); 242 | ip_sprintf(daddr_str, 243 | (const struct in_addr *)((const char *)(ah+1) + 16)); 244 | sprintf(dst, "%s %s > %s ARP reply %s", smac_str, 245 | saddr_str, dmac_str, daddr_str); 246 | } else 247 | sprintf(dst, "%s > %s unknown ARP op %u", 248 | saddr_str, daddr_str, ah->ar_op); 249 | } else 250 | sprintf(dst, "unknown protocol"); 251 | } 252 | #endif /* 0 */ 253 | 254 | /* 255 | * container of 3-tuple registered by the app/port. 256 | * The app/port can register multiple 3-tuples, but a unique 3-tuple can 257 | * be registered only by a single app/port. 258 | * A single 3-tuple can be associated with only a single destination. 259 | * XXX fix alignment 260 | */ 261 | struct ms_route { 262 | MS_LIST_ENTRY(ms_route) next; /* hlist_node in linux */ 263 | struct msaddr addr; 264 | uint8_t bdg_port; 265 | uint8_t bdg_dstport; 266 | }; 267 | 268 | static void 269 | ms_rt2str(const struct ms_route *mrt, char *dst) 270 | { 271 | char tmp[64]; 272 | ms_addr_sprintf(tmp, &mrt->addr.sa); 273 | sprintf(dst, "bdg_port %u->%u %s:%u %u", 274 | mrt->bdg_port, mrt->bdg_dstport, tmp, 275 | ntohs(mrt->addr.sin.sin_port), mrt->addr.protocol); 276 | } 277 | 278 | /* useful pointers to manipulate 3-tuple */ 279 | struct ms_ptrs { 280 | uint32_t *addr; 281 | uint16_t *port; 282 | uint8_t *proto; 283 | uint8_t addrlen; 284 | uint8_t hashoff; 285 | }; 286 | 287 | static __inline int 288 | ms_addr_equal(struct ms_route *m, struct ms_ptrs *p) 289 | { 290 | return !memcmp(p->addr, &m->addr.sin.sin_addr, p->addrlen) && 291 | *p->proto == m->addr.protocol && 292 | *p->port == m->addr.sin.sin_port; 293 | } 294 | 295 | struct ms_portinfo { 296 | uint32_t flags; 297 | }; 298 | 299 | static struct ms_global { 300 | MS_ROUTE_LIST routelist[MS_ROUTEHASHSIZ]; 301 | struct ms_portinfo portinfo[NM_BDG_MAXPORTS]; 302 | MS_RWLOCK_T lock; 303 | int num_routes; 304 | } ms_global; 305 | 306 | /* writer-lock must be owned */ 307 | static void 308 | ms_route_free(struct ms_route *mrt) 309 | { 310 | char buf[64]; 311 | ms_rt2str(mrt, buf); 312 | D("freeing entry %s", buf); 313 | 314 | MS_LIST_DEL(mrt, next); 315 | free(mrt, M_DEVBUF); 316 | --ms_global.num_routes; 317 | } 318 | 319 | /* taken from netmap implementation */ 320 | #define mix(a, b, c) \ 321 | do { \ 322 | a -= b; a -= c; a ^= (c >> 13); \ 323 | b -= c; b -= a; b ^= (a << 8); \ 324 | c -= a; c -= b; c ^= (b >> 13); \ 325 | a -= b; a -= c; a ^= (c >> 12); \ 326 | b -= c; b -= a; b ^= (a << 16); \ 327 | c -= a; c -= b; c ^= (b >> 5); \ 328 | a -= b; a -= c; a ^= (c >> 3); \ 329 | b -= c; b -= a; b ^= (a << 10); \ 330 | c -= a; c -= b; c ^= (b >> 15); \ 331 | } while (/*CONSTCOND*/0) 332 | 333 | static inline uint32_t 334 | ms_rthash(struct ms_ptrs *ptrs) 335 | { 336 | uint32_t a = 0x9e3779b9, b = 0x9e3779b9, c = 0; // hask key 337 | uint8_t *p; 338 | 339 | b += *ptrs->proto; 340 | p = (uint8_t *)ptrs->port; 341 | b += p[1] << 16; 342 | b += p[0] << 8; 343 | p = (uint8_t *)ptrs->addr + ptrs->hashoff; 344 | b += p[3]; 345 | a += p[2] << 24; 346 | a += p[1] << 16; 347 | a += p[0] << 8; 348 | mix(a, b, c); 349 | #define MS_ROUTE_RTHASH_MASK (MS_ROUTEHASHSIZ-1) 350 | return (c & MS_ROUTE_RTHASH_MASK); 351 | } 352 | #undef mix 353 | 354 | #ifndef MULTISTACK_NOIPV4CSUM 355 | /* from tcp_lro.c iph->ip_sum = 0xffff ^ do_csum_data(...) */ 356 | static inline uint16_t 357 | ipv4_csum(uint16_t *raw, int len) 358 | { 359 | uint32_t csum; 360 | csum = 0; 361 | while (len > 0) { 362 | csum += *raw; 363 | raw++; 364 | csum += *raw; 365 | raw++; 366 | len -= 4; 367 | } 368 | csum = (csum >> 16) + (csum & 0xffff); 369 | csum = (csum >> 16) + (csum & 0xffff); 370 | return (uint16_t)csum; 371 | } 372 | #endif /* MULTISTACK_IPV4CSUM */ 373 | 374 | static struct ms_route * 375 | ms_route_pkt(uint8_t *buf, uint8_t **hint, int input) 376 | { 377 | struct ms_route *mrt; 378 | MS_ROUTE_LIST *head; 379 | uint16_t et; 380 | struct ms_ptrs ptrs; 381 | 382 | et = ntohs(*((uint16_t *)(buf + ETHER_ADDR_LEN * 2))); 383 | if (et == ETHERTYPE_IP) { 384 | struct ip *iph = (struct ip *)(buf + ETHER_HDR_LEN); 385 | #ifndef MULTISTACK_NOIPV4CSUM 386 | uint16_t sum; 387 | 388 | sum = iph->ip_sum; 389 | iph->ip_sum = 0; 390 | if (unlikely(sum != 391 | (0xffff ^ ipv4_csum((uint16_t *)iph, sizeof(*iph))))) { 392 | iph->ip_sum = sum; 393 | goto error; 394 | } 395 | iph->ip_sum = sum; 396 | #endif /* MULTISTACK_IPV4CSUM */ 397 | ptrs.proto = (uint8_t *)&iph->ip_p; 398 | if (input) { 399 | ptrs.addr = (uint32_t *)&iph->ip_dst; 400 | ptrs.port = (uint16_t *)((uint8_t *)iph 401 | + (iph->ip_hl<<2)) + 1; 402 | } else { 403 | ptrs.addr = (uint32_t *)&iph->ip_src; 404 | ptrs.port = (uint16_t *) ((uint8_t *)iph + 405 | (iph->ip_hl<<2)); 406 | } 407 | ptrs.addrlen = 4; 408 | ptrs.hashoff = 0; 409 | } else if (et == ETHERTYPE_IPV6) { 410 | struct ip6_hdr *ip6 = (struct ip6_hdr *)(buf + ETHER_HDR_LEN); 411 | 412 | ptrs.proto = (uint8_t *)&ip6->ip6_nxt; 413 | if (input) { 414 | ptrs.addr = (uint32_t *)&ip6->ip6_dst; 415 | ptrs.port = (uint16_t *)(&ip6 + 1) + 1; 416 | } else { 417 | ptrs.addr = (uint32_t *)&ip6->ip6_src; 418 | ptrs.port = (uint16_t *)(&ip6 + 1); 419 | } 420 | ptrs.addrlen = 16; 421 | ptrs.hashoff = 3; 422 | } else 423 | goto error; 424 | 425 | MS_RLOCK(); 426 | 427 | /* the least significant 4 bytes for IPv6 */ 428 | head = &ms_global.routelist[ms_rthash(&ptrs)]; 429 | MS_LIST_FOREACH(mrt, head, next) { 430 | if (ms_addr_equal(mrt, &ptrs)) { 431 | MS_RUNLOCK(); 432 | *hint = (uint8_t *) 433 | (input ? (ptrs.port-1) : (ptrs.port+1)); 434 | return mrt; 435 | } 436 | } 437 | MS_RUNLOCK(); 438 | error: 439 | return NULL; 440 | } 441 | 442 | #ifdef MULTITACK_MBOXFILTER 443 | static uint8_t 444 | ms_route_pkt2(uint8_t *buf, uint8_t **hint) 445 | { 446 | uint16_t et = ntohs(*((uint16_t *)(buf + ETHER_ADDR_LEN * 2))); 447 | 448 | if (et == ETHERTYPE_IP) { 449 | struct ip *iph = (struct ip *)(buf + ETHER_HDR_LEN); 450 | uint16_t sport, dport, *p, *tbl = NULL; 451 | #ifndef MULTISTACK_NOIPV4CSUM 452 | uint16_t sum; 453 | 454 | sum = iph->ip_sum; 455 | iph->ip_sum = 0; 456 | if (unlikely(sum != 457 | (0xffff ^ ipv4_csum((uint16_t *)iph, sizeof(*iph))))) { 458 | iph->ip_sum = sum; 459 | return NM_BDG_NOPORT; 460 | } 461 | iph->ip_sum = sum; 462 | #endif /* MULTISTACK_IPV4CSUM */ 463 | if (iph->ip_p == IPPROTO_UDP) 464 | tbl = udp_tbl; 465 | else if (iph->ip_p == IPPROTO_TCP) 466 | tbl = tcp_tbl; 467 | else 468 | return NM_BDG_NOPORT; 469 | 470 | p = (uint16_t *)((uint8_t *)iph + (iph->ip_hl<<2)); 471 | sport = ntohs(*p++); 472 | dport = ntohs(*p); 473 | 474 | if (tbl[dport] < NM_BDG_MAXPORTS) 475 | /* XXX go to default middlebox? */ 476 | return tbl[dport]; 477 | else if (tbl[sport] < NM_BDG_MAXPORTS) 478 | return tbl[sport]; 479 | } 480 | return NM_BDG_NOPORT; 481 | } 482 | #endif /* MULTITACK_MBOXFILTER */ 483 | 484 | /* ephemeral VALE ports do not have non-NULL ifp */ 485 | static inline int 486 | ms_host_na(const struct netmap_vp_adapter *na) 487 | { 488 | return na->up.ifp && NA(na->up.ifp)->na_vp != na; 489 | // return na->up.pdev ? 0 : 1; 490 | } 491 | 492 | /* Lookup function to be registered */ 493 | static u_int 494 | #ifdef NETMAP_API_4 495 | ms_lookup(struct nm_bdg_fwd *ft, uint8_t *ring_nr, const struct netmap_adapter *na) 496 | #else 497 | ms_lookup(struct nm_bdg_fwd *ft, uint8_t *ring_nr, 498 | struct netmap_vp_adapter *na) 499 | #endif 500 | { 501 | struct ms_route *mrt; 502 | uint8_t *hint; 503 | int input; 504 | // char tmp[256]; 505 | 506 | #ifdef MULTITACK_MBOXFILTER 507 | if (ms_global.portinfo[na->bdg_port].flags & MS_F_STACK) { 508 | return NM_BDG_NOPORT; /* XXX */ 509 | } else { 510 | *ring_nr = 0; 511 | return ms_route_pkt2(ft->ft_buf, &hint); 512 | } 513 | #endif /* MULTITACK_MBOXFILTER */ 514 | 515 | /* XXX treat packets from an unrecognized port as input */ 516 | // ms_pkt2str(ft->ft_buf, tmp); 517 | 518 | /* we don't validate packets from host stack */ 519 | if (ms_host_na(na)) { 520 | // return na->up.na_vp->bdg_port; 521 | return NA(na->up.ifp)->na_vp->bdg_port; 522 | } 523 | 524 | input = ms_global.portinfo[na->bdg_port].flags & MS_F_STACK ? 0 : 1; 525 | 526 | mrt = ms_route_pkt(ft->ft_buf, &hint, input); 527 | if (mrt == NULL) 528 | return na->up.na_hostvp->bdg_port; /* going to host stack */ 529 | /* The least significant byte of the opposite port */ 530 | *ring_nr = ntohs(*hint) & 0xF; 531 | return input ? mrt->bdg_port : mrt->bdg_dstport; 532 | } 533 | 534 | /* Callback on destruction of the bridge port (incl. process dies) */ 535 | static void 536 | #ifdef NETMAP_API_4 537 | ms_dtor(u_int bdg, u_int port) 538 | #else 539 | ms_dtor(const struct netmap_vp_adapter *vpna) 540 | #endif 541 | { 542 | struct ms_route *mrt, *tmp; 543 | MS_ROUTE_LIST *head; 544 | int i; 545 | #ifndef NETMAP_API_4 546 | u_int port = vpna->bdg_port; 547 | #endif 548 | #ifdef linux 549 | (void)tmp; 550 | #endif 551 | 552 | MS_WLOCK(); 553 | /* XXX should be optimized */ 554 | for (i = 0; i < MS_ROUTEHASHSIZ; i++) { 555 | head = &ms_global.routelist[i]; 556 | MS_LIST_FOREACH_SAFE(mrt, head, next, tmp) { 557 | if (mrt->bdg_port == port) 558 | ms_route_free(mrt); 559 | } 560 | } 561 | bzero(&ms_global.portinfo[port], sizeof(struct ms_portinfo)); 562 | MS_WUNLOCK(); 563 | } 564 | 565 | #ifdef __FreeBSD__ 566 | int 567 | ms_getifname(struct sockaddr *sa, char *ifname) 568 | { 569 | struct ifnet *ifn; 570 | struct ifaddr *ifa; 571 | int retval = 0; 572 | 573 | IFNET_RLOCK(); 574 | TAILQ_FOREACH(ifn, &MODULE_GLOBAL(ifnet), if_list) { 575 | IF_ADDR_RLOCK(ifn); 576 | TAILQ_FOREACH(ifa, &ifn->if_addrlist, ifa_list) { 577 | if (sa->sa_family == AF_INET) { 578 | if (satosin(sa)->sin_addr.s_addr != 579 | satosin(ifa->ifa_addr)->sin_addr.s_addr) 580 | continue; 581 | } 582 | if (sa->sa_family == AF_INET6) { 583 | if (!IN6_ARE_ADDR_EQUAL( 584 | &satosin6(sa)->sin6_addr, 585 | &satosin6(ifa->ifa_addr)->sin6_addr)) 586 | continue; 587 | } 588 | retval = 1; 589 | strncpy(ifname, ifn->if_xname, IFNAMSIZ); 590 | } 591 | IF_ADDR_RUNLOCK(ifn); 592 | if (retval) 593 | break; 594 | } 595 | IFNET_RUNLOCK(); 596 | return retval; 597 | } 598 | 599 | int 600 | ms_pcb_clash(struct sockaddr *sa, uint8_t protocol) 601 | { 602 | uint8_t proto; 603 | struct inpcb *inp; 604 | int error; 605 | struct in_addr faddr = {INADDR_ANY}; 606 | struct sockaddr_in *sin = (struct sockaddr_in *)sa; 607 | u_short fport = 0; 608 | char buf[64]; /* just for debug */ 609 | #ifdef INET6 610 | struct in6_addr faddr6 = IN6ADDR_ANY_INIT; 611 | struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)sa; 612 | 613 | if (sa->sa_family == AF_INET6) { 614 | proto = inet6sw[ip6_protox[protocol]].pr_protocol; 615 | ip6_sprintf(buf, &sin6->sin6_addr); 616 | } else 617 | #endif /* INET6 */ 618 | { 619 | proto = inetsw[ip_protox[protocol]].pr_protocol; 620 | ip_sprintf(buf, &sin->sin_addr); 621 | } 622 | 623 | if (proto != protocol) /* not registered in protosw */ 624 | return 0; 625 | 626 | if (proto == IPPROTO_UDP || proto == IPPROTO_TCP) { 627 | struct inpcbinfo *ipi; 628 | 629 | ipi = proto == IPPROTO_UDP ? &V_udbinfo : &V_tcbinfo; 630 | INP_INFO_RLOCK(ipi); 631 | #ifdef INET6 632 | if (sa->sa_family == AF_INET6) 633 | inp = in6_pcblookup(ipi, &faddr6, fport, 634 | &sin6->sin6_addr, sin6->sin6_port, 635 | INPLOOKUP_WILDCARD|INPLOOKUP_RLOCKPCB, NULL); 636 | else 637 | #endif /* INET6 */ 638 | inp = in_pcblookup(ipi, faddr, fport, 639 | sin->sin_addr, sin->sin_port, 640 | INPLOOKUP_WILDCARD|INPLOOKUP_RLOCKPCB, NULL); 641 | if (!inp || inp->inp_socket == NULL) { 642 | D("%s:%u is not bound", buf, ntohs(sin->sin_port)); 643 | error = ENOENT;/* I haven't bind this address */ 644 | } else { 645 | error = cr_canseeinpcb(curthread->td_ucred, inp); 646 | if (error) { 647 | /* I'm not the one bind() before */ 648 | D("%s:%u is not mine", buf, ntohs(sin->sin_port)); 649 | } 650 | } 651 | if (inp) 652 | INP_RUNLOCK(inp); 653 | INP_INFO_RUNLOCK(ipi); 654 | } 655 | #ifdef SCTP 656 | else if (proto == IPPROTO_SCTP) { 657 | struct sctp_inpcb *sinp; 658 | 659 | /* XXX not sure how we should do on find_tcp_pool and vrf_id */ 660 | sinp = sctp_pcb_findep(sa, 1, 0, SCTP_DEFAULT_VRFID); 661 | if (sinp == NULL) { 662 | D("%s:%u is not bound", buf, ntohs(sin->sin_port)); 663 | error = ENOENT; 664 | } else { 665 | inp = &sinp->ip_inp.inp; 666 | if (inp->inp_socket == NULL) { 667 | D("%s:%u is not bound", buf, ntohs(sin->sin_port)); 668 | error = ENOENT; 669 | } else { 670 | INP_RLOCK(inp); 671 | error = cr_canseeinpcb(curthread->td_ucred, inp); 672 | INP_RUNLOCK(inp); 673 | if (error) { 674 | D("%s:%u is not mine", buf, ntohs(sin->sin_port)); 675 | } 676 | } 677 | SCTP_INP_DECR_REF(sinp); 678 | } 679 | } 680 | #endif /* SCTP */ 681 | else /* we don't know how to check, take conservative.. */ 682 | error = ENOENT; 683 | return error; 684 | } 685 | #endif /* __FreeBSD__ */ 686 | 687 | static int 688 | ms_config(struct nm_ifreq *data) 689 | { 690 | struct msreq *msr = (struct msreq *)data; 691 | struct nmreq nmr; 692 | struct ms_route *mrt = NULL, *tmp; 693 | struct ms_ptrs ptrs; 694 | MS_ROUTE_LIST *head; 695 | int error = 0, me; 696 | char dbgbuf[64]; /* just for debug message */ 697 | 698 | if (msr->mr_cmd != MULTISTACK_BIND && msr->mr_cmd != MULTISTACK_UNBIND) 699 | return EINVAL; 700 | 701 | /* the process must have a credential, bind()ing beforehand */ 702 | if (ms_pcb_clash(&msr->mr_sa, msr->mr_proto)) 703 | return ENOENT; 704 | 705 | /* Get my index of bridge and port */ 706 | bzero(&nmr, sizeof(nmr)); 707 | nmr.nr_cmd = NETMAP_BDG_LIST; 708 | nmr.nr_version = NETMAP_API; 709 | strncpy(nmr.nr_name, msr->mr_name, sizeof(nmr.nr_name)); 710 | error = netmap_bdg_ctl(&nmr, NULL); 711 | if (error) { /* invalid request of interface or bridge */ 712 | D("%s is not in the bridge", nmr.nr_name); 713 | return error; 714 | } 715 | me = nmr.nr_arg2; 716 | 717 | /* get pointers to parameters */ 718 | ptrs.proto = &msr->mr_proto; 719 | if (msr->mr_sa.sa_family == AF_INET) { 720 | ptrs.addr = (uint32_t *)&msr->mr_sin.sin_addr.s_addr; 721 | ptrs.port = &msr->mr_sin.sin_port; 722 | ptrs.addrlen = 4; 723 | ptrs.hashoff = 0; 724 | } else if (msr->mr_sa.sa_family == AF_INET6) { 725 | ptrs.addr = (uint32_t *)&msr->mr_sin6.sin6_addr; 726 | ptrs.port = &msr->mr_sin6.sin6_port; 727 | ptrs.addrlen = 16; 728 | ptrs.hashoff = 3; /* use least significant 4 byte */ 729 | } else 730 | return EINVAL; 731 | 732 | MS_WLOCK(); 733 | 734 | /* Find an existing entry */ 735 | head = &ms_global.routelist[ms_rthash(&ptrs)]; 736 | MS_LIST_FOREACH(tmp, head, next) { 737 | if (ms_addr_equal(tmp, &ptrs)) { 738 | mrt = tmp; 739 | break; 740 | } 741 | } 742 | if (msr->mr_cmd == MULTISTACK_UNBIND) { 743 | if (!mrt) { 744 | D("UNBIND: not registered"); 745 | error = ENOENT; 746 | goto out_unlock; 747 | } 748 | ms_route_free(mrt); 749 | } else { /* MULTITACK_BIND */ 750 | char name[IFNAMSIZ]; 751 | 752 | if (mrt) { 753 | D("BIND: already registered"); 754 | error = EBUSY; 755 | goto out_unlock; 756 | } 757 | /* check the local address is valid */ 758 | if (!ms_getifname(&msr->mr_sa, name)) { 759 | ms_addr_sprintf(dbgbuf, &msr->mr_sa); 760 | D("%s doesn't exist", dbgbuf); 761 | return EINVAL; 762 | } 763 | 764 | /* Is the interface for this address already in the bridge? */ 765 | bzero(&nmr, sizeof(nmr)); 766 | nmr.nr_cmd = NETMAP_BDG_LIST; 767 | nmr.nr_version = NETMAP_API; 768 | strcpy(nmr.nr_name, MS_NAME); 769 | strcat(nmr.nr_name, name); 770 | error = netmap_bdg_ctl(&nmr, NULL); 771 | if (error) { 772 | D("%s is not in the bridge", nmr.nr_name); 773 | goto out_unlock; 774 | } 775 | 776 | mrt = (struct ms_route *)malloc(sizeof(*mrt), M_DEVBUF, 777 | M_NOWAIT|M_ZERO); 778 | if (!mrt) { 779 | error = ENOMEM; 780 | goto out_unlock; 781 | } 782 | mrt->addr = msr->mr_ifru.mr_addr; 783 | mrt->bdg_port = me; 784 | mrt->bdg_dstport = nmr.nr_arg2; 785 | MS_LIST_ADD(head, mrt, next); 786 | ms_global.portinfo[me].flags |= MS_F_STACK; 787 | ++ms_global.num_routes; 788 | 789 | ms_rt2str(mrt, dbgbuf); 790 | D("%s has been registered", dbgbuf); 791 | } 792 | out_unlock: 793 | MS_WUNLOCK(); 794 | return (error); 795 | } 796 | static struct netmap_bdg_ops ms_ops = {ms_lookup, ms_config, ms_dtor}; 797 | 798 | #ifdef MULTITACK_MBOXFILTER 799 | static void 800 | init_tables(void) 801 | { 802 | int i; 803 | bzero(udp_tbl, sizeof(udp_tbl)); 804 | bzero(tcp_tbl, sizeof(tcp_tbl)); 805 | for (i = 0; i < NM_BDG_MAXPORTS; i++) 806 | udp_tbl[i] = tcp_tbl[i] = i; 807 | } 808 | #endif /* MULTITACK_MBOXFILTER */ 809 | 810 | /* we assume a bridge with MS_NAME is already created */ 811 | int 812 | ms_init(void) 813 | { 814 | struct nmreq nmr; 815 | int i; 816 | 817 | bzero(&nmr, sizeof(nmr)); 818 | nmr.nr_version = NETMAP_API; 819 | strncpy(nmr.nr_name, MS_NAME, strlen(MS_NAME)); 820 | nmr.nr_cmd = NETMAP_BDG_REGOPS; 821 | if (netmap_bdg_ctl(&nmr, &ms_ops)) { 822 | D("no bridge named %s", nmr.nr_name); 823 | return ENOENT; 824 | } 825 | 826 | bzero(&ms_global, sizeof(ms_global)); 827 | MS_RWINIT(&ms_global.lock, "multistack lock"); 828 | for (i = 0; i < MS_ROUTEHASHSIZ; i++) 829 | MS_LIST_INIT(&ms_global.routelist[i]); 830 | 831 | #ifdef MULTITACK_MBOXFILTER 832 | init_tables(); 833 | #endif /* MULTITACK_MBOXFILTER */ 834 | printf("MultiStack: loaded module\n"); 835 | return 0; 836 | } 837 | 838 | void 839 | ms_fini(void) 840 | { 841 | struct nmreq nmr; 842 | int error; 843 | struct netmap_bdg_ops tmp = {netmap_bdg_learning, NULL, NULL}; 844 | 845 | bzero(&nmr, sizeof(nmr)); 846 | nmr.nr_version = NETMAP_API; 847 | strncpy(nmr.nr_name, MS_NAME, sizeof(nmr.nr_name)); 848 | nmr.nr_cmd = NETMAP_BDG_REGOPS; 849 | error = netmap_bdg_ctl(&nmr, &tmp); 850 | if (error) 851 | D("failed to release VALE bridge %d", error); 852 | printf("MultiStack: Unloaded module\n"); 853 | } 854 | 855 | #ifdef __FreeBSD__ 856 | static int 857 | ms_loader(module_t mod, int type, void *data) 858 | { 859 | int error = 0; 860 | 861 | switch (type) { 862 | case MOD_LOAD: 863 | error = ms_init(); 864 | break; 865 | case MOD_UNLOAD: 866 | ms_fini(); 867 | break; 868 | default: 869 | error = EINVAL; 870 | break; 871 | } 872 | return error; 873 | } 874 | 875 | DEV_MODULE(multistack, ms_loader, NULL); 876 | #endif /* __FreeBSD__ */ 877 | -------------------------------------------------------------------------------- /sys/contrib/multistack/multistack_kern.h: -------------------------------------------------------------------------------- 1 | /* 2 | * BSD LICENSE 3 | * 4 | * Copyright(c) 2015 NEC Europe Ltd. All rights reserved. 5 | * All rights reserved. 6 | * 7 | * Redistribution and use in source and binary forms, with or without 8 | * modification, are permitted provided that the following conditions 9 | * are met: 10 | * 11 | * * Redistributions of source code must retain the above copyright 12 | * notice, this list of conditions and the following disclaimer. 13 | * * Redistributions in binary form must reproduce the above copyright 14 | * notice, this list of conditions and the following disclaimer in 15 | * the documentation and/or other materials provided with the 16 | * distribution. 17 | * * Neither the name of NEC Europe Ltd. nor the names of 18 | * its contributors may be used to endorse or promote products derived 19 | * from this software without specific prior written permission. 20 | * 21 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22 | * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 | * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 24 | * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 25 | * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 26 | * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 27 | * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 28 | * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 29 | * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 30 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 31 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 32 | */ 33 | 34 | #ifndef _MULTISTACK_KERN_H 35 | #define _MULTISTACK_KERN_H 36 | #define WITH_VALE 37 | #define WITH_PIPES /* XXX Should get better from netmap side */ 38 | int ms_init(void); 39 | void ms_fini(void); 40 | int ms_getifname(struct sockaddr *, char *name); 41 | int ms_pcb_clash(struct sockaddr *, uint8_t); 42 | #endif 43 | -------------------------------------------------------------------------------- /sys/net/multistack.h: -------------------------------------------------------------------------------- 1 | /* 2 | * BSD LICENSE 3 | * 4 | * Copyright(c) 2015 NEC Europe Ltd. All rights reserved. 5 | * All rights reserved. 6 | * 7 | * Redistribution and use in source and binary forms, with or without 8 | * modification, are permitted provided that the following conditions 9 | * are met: 10 | * 11 | * * Redistributions of source code must retain the above copyright 12 | * notice, this list of conditions and the following disclaimer. 13 | * * Redistributions in binary form must reproduce the above copyright 14 | * notice, this list of conditions and the following disclaimer in 15 | * the documentation and/or other materials provided with the 16 | * distribution. 17 | * * Neither the name of NEC Europe Ltd. nor the names of 18 | * its contributors may be used to endorse or promote products derived 19 | * from this software without specific prior written permission. 20 | * 21 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22 | * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 | * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 24 | * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 25 | * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 26 | * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 27 | * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 28 | * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 29 | * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 30 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 31 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 32 | */ 33 | 34 | #if defined(linux) && defined(__KERNEL__) 35 | #include 36 | #else 37 | #include 38 | #endif 39 | 40 | #define MULTISTACK_BIND 1 41 | #define MULTISTACK_UNBIND 2 42 | 43 | struct msaddr { 44 | union { 45 | struct sockaddr_in sin; 46 | struct sockaddr_in6 sin6; 47 | struct sockaddr sa; 48 | }; 49 | uint8_t protocol; 50 | }; 51 | 52 | struct msreq { 53 | char mr_name[IFNAMSIZ]; 54 | union { 55 | struct msaddr mr_addr; 56 | } mr_ifru; 57 | int mr_cmd; 58 | }; 59 | #define mr_sin6 mr_ifru.mr_addr.sin6 60 | #define mr_sin mr_ifru.mr_addr.sin 61 | #define mr_sa mr_ifru.mr_addr.sa 62 | #define mr_proto mr_ifru.mr_addr.protocol 63 | 64 | --------------------------------------------------------------------------------