├── Kconfig ├── awkit-kbw ├── awkit-klat ├── Makefile ├── getopt.h ├── rdma_create_id.patch ├── krping-compat.patch ├── krping-ofa.patch ├── HOWTO_BUILD ├── install-ofa.sh ├── getopt.c ├── README └── krping.c /Kconfig: -------------------------------------------------------------------------------- 1 | config KRPING 2 | tristate "Kernel RDMA Ping Module" 3 | depends on INFINIBAND 4 | ---help--- 5 | Test module implementing a RDMA ping client/server. 6 | -------------------------------------------------------------------------------- /awkit-kbw: -------------------------------------------------------------------------------- 1 | 2 | { 3 | if ($2 == "delta") { 4 | sec=$4; 5 | usec=$7; 6 | iter=$9; 7 | size=$11; 8 | time=sec+(usec/1000000) 9 | bytes=iter*size 10 | printf("%u %f %f\n", size, bytes/time*8/1000000000, iter/time); 11 | } 12 | } 13 | -------------------------------------------------------------------------------- /awkit-klat: -------------------------------------------------------------------------------- 1 | 2 | { 3 | if ($2 == "delta") { 4 | sec=$4; 5 | usec=$7; 6 | iter=$9; 7 | size=$11; 8 | time=sec+(usec/1000000) 9 | bytes=iter*size 10 | printf("%u %f %f\n", size, 1 / (iter / time) / 2 * 1000*1000, iter/time); 11 | } 12 | } 13 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | KSRC=/lib/modules/`uname -r`/build 2 | KOBJ=/lib/modules/`uname -r`/build 3 | 4 | 5 | obj-m += rdma_krping.o 6 | rdma_krping-y := getopt.o krping.o 7 | 8 | default: 9 | make -C $(KSRC) M=`pwd` modules 10 | 11 | install: 12 | make -C $(KSRC) M=`pwd` modules_install 13 | depmod -a 14 | 15 | clean: 16 | rm -f *.o 17 | rm -f *.ko 18 | rm -f rdma_krping.mod.c 19 | rm -f Module.symvers 20 | rm -f Module.markers 21 | -------------------------------------------------------------------------------- /getopt.h: -------------------------------------------------------------------------------- 1 | /* 2 | * lifted from fs/ncpfs/getopt.c 3 | */ 4 | #ifndef _KRPING_GETOPT_H 5 | #define _KRPING_GETOPT_H 6 | 7 | #define OPT_NOPARAM 1 8 | #define OPT_INT 2 9 | #define OPT_STRING 4 10 | struct krping_option { 11 | const char *name; 12 | unsigned int has_arg; 13 | int val; 14 | }; 15 | 16 | extern int krping_getopt(const char *caller, char **options, const struct krping_option *opts, 17 | char **optopt, char **optarg, unsigned long *value); 18 | 19 | #endif /* _KRPING_GETOPT_H */ 20 | -------------------------------------------------------------------------------- /rdma_create_id.patch: -------------------------------------------------------------------------------- 1 | diff --git a/krping.c b/krping.c 2 | index 84b687a..d1aed3e 100644 3 | --- a/drivers/infiniband/hw/cxgb3/krping.c 4 | +++ b/drivers/infiniband/hw/cxgb3/krping.c 5 | @@ -2292,7 +2292,8 @@ int krping_doit(char *cmd) 6 | goto out; 7 | } 8 | 9 | - cb->cm_id = rdma_create_id(krping_cma_event_handler, cb, RDMA_PS_TCP); 10 | + cb->cm_id = rdma_create_id(krping_cma_event_handler, cb, RDMA_PS_TCP, 11 | + IB_QPT_RC); 12 | if (IS_ERR(cb->cm_id)) { 13 | ret = PTR_ERR(cb->cm_id); 14 | printk(KERN_ERR PFX "rdma_create_id error %d\n", ret); 15 | -------------------------------------------------------------------------------- /krping-compat.patch: -------------------------------------------------------------------------------- 1 | --- a/drivers/infiniband/hw/cxgb3/Makefile 2007-09-12 05:28:57.000000000 -0500 2 | +++ b/drivers/infiniband/hw/cxgb3/Makefile 2007-09-26 09:59:53.000000000 -0500 3 | @@ -1,8 +1,11 @@ 4 | ccflags-y := -I$(CWD)/drivers/net/ethernet/chelsio/cxgb3 5 | 6 | obj-$(CONFIG_INFINIBAND_CXGB3) += iw_cxgb3.o 7 | +obj-$(CONFIG_INFINIBAND_CXGB3) += rdma_krping.o 8 | 9 | iw_cxgb3-y := iwch_cm.o iwch_ev.o iwch_cq.o iwch_qp.o iwch_mem.o \ 10 | iwch_provider.o iwch.o cxio_hal.o cxio_resource.o 11 | 12 | +rdma_krping-y := krping.o getopt.o 13 | + 14 | ccflags-$(CONFIG_INFINIBAND_CXGB3_DEBUG) += -DDEBUG 15 | 16 | -------------------------------------------------------------------------------- /krping-ofa.patch: -------------------------------------------------------------------------------- 1 | --- a/drivers/infiniband/hw/cxgb3/Makefile 2007-09-12 05:28:57.000000000 -0500 2 | +++ b/drivers/infiniband/hw/cxgb3/Makefile 2007-09-26 09:59:53.000000000 -0500 3 | @@ -2,10 +2,13 @@ EXTRA_CFLAGS += -I$(TOPDIR)/drivers/net/ 4 | -I$(TOPDIR)/drivers/infiniband/hw/cxgb3/core 5 | 6 | obj-$(CONFIG_INFINIBAND_CXGB3) += iw_cxgb3.o 7 | +obj-$(CONFIG_INFINIBAND_CXGB3) += rdma_krping.o 8 | 9 | iw_cxgb3-y := iwch_cm.o iwch_ev.o iwch_cq.o iwch_qp.o iwch_mem.o \ 10 | iwch_provider.o iwch.o cxio_hal.o cxio_resource.o 11 | 12 | +rdma_krping-y := krping.o getopt.o 13 | + 14 | ifdef CONFIG_INFINIBAND_CXGB3_DEBUG 15 | EXTRA_CFLAGS += -DDEBUG 16 | endif 17 | -------------------------------------------------------------------------------- /HOWTO_BUILD: -------------------------------------------------------------------------------- 1 | 7/2008 2 | 3 | If you're building against a kernel.org kernel or a kernel with the 4 | RDMA drivers included (ie non-ofed), then simply edit the Makefile, 5 | change KSRC accordingly, and do 'make && make install'. 6 | 7 | If you want to build this on a system running the ofed drivers: 8 | 9 | The Makefile included seems to work on some platforms, but the only 10 | way I've seen to get a ofa-dependent module to build and generate the 11 | correct module versions is to build the module inside the ofa kernel tree. 12 | 13 | To that end I've created the install-ofa.sh script. It will add the 14 | rdma_krping module to the chelsio driver build directory and alter its 15 | Makefile to build and install rdma_krping along with the iw_cxgb3 module. 16 | Just run install-ofa.sh to get things going. Make sure you have the 17 | target ofed kernel tree available and configured. 18 | -------------------------------------------------------------------------------- /install-ofa.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | if [[ $# -ne 1 ]] ; then 3 | echo "$0 " 4 | echo "eg: $0 /usr/src/ofa_kernel-1.2.5" 5 | exit 1 6 | fi; 7 | 8 | echo "Copying krping files to $1/drivers/infiniband/hw/cxgb3" 9 | cp -f krping.c getopt.[ch] $1/drivers/infiniband/hw/cxgb3 10 | if [[ $? -ne 0 ]] ; then 11 | echo "cp failed!" 12 | exit 1 13 | fi 14 | 15 | echo "Patching $1/drivers/infiniband/hw/cxgb3/Makefile" 16 | is_compat_rdma=`echo $1 | grep -c compat-rdma` 17 | if [ $is_compat_rdma -gt 0 ]; then 18 | patch -d $1 -p1 < krping-compat.patch 19 | patch -d $1 -p1 < rdma_create_id.patch 20 | else 21 | patch -d $1 -p1 < krping-ofa.patch 22 | fi 23 | if [[ $? -ne 0 ]] ; then 24 | echo "path apply failed!" 25 | exit 1 26 | fi 27 | 28 | echo "Done! " 29 | echo "Now configure, build, and install the ofa kernel tree to get the rdma_krping module installed." 30 | 31 | exit 0 32 | -------------------------------------------------------------------------------- /getopt.c: -------------------------------------------------------------------------------- 1 | /* 2 | * lifted from fs/ncpfs/getopt.c 3 | */ 4 | #include 5 | #include 6 | 7 | #include 8 | 9 | #include "getopt.h" 10 | 11 | /** 12 | * krping_getopt - option parser 13 | * @caller: name of the caller, for error messages 14 | * @options: the options string 15 | * @opts: an array of &struct option entries controlling parser operations 16 | * @optopt: output; will contain the current option 17 | * @optarg: output; will contain the value (if one exists) 18 | * @flag: output; may be NULL; should point to a long for or'ing flags 19 | * @value: output; may be NULL; will be overwritten with the integer value 20 | * of the current argument. 21 | * 22 | * Helper to parse options on the format used by mount ("a=b,c=d,e,f"). 23 | * Returns opts->val if a matching entry in the 'opts' array is found, 24 | * 0 when no more tokens are found, -1 if an error is encountered. 25 | */ 26 | int krping_getopt(const char *caller, char **options, 27 | const struct krping_option *opts, char **optopt, 28 | char **optarg, unsigned long *value) 29 | { 30 | char *token; 31 | char *val; 32 | 33 | do { 34 | if ((token = strsep(options, ",")) == NULL) 35 | return 0; 36 | } while (*token == '\0'); 37 | if (optopt) 38 | *optopt = token; 39 | 40 | if ((val = strchr (token, '=')) != NULL) { 41 | *val++ = 0; 42 | } 43 | *optarg = val; 44 | for (; opts->name; opts++) { 45 | if (!strcmp(opts->name, token)) { 46 | if (!val) { 47 | if (opts->has_arg & OPT_NOPARAM) { 48 | return opts->val; 49 | } 50 | printk(KERN_INFO "%s: the %s option requires " 51 | "an argument\n", caller, token); 52 | return -EINVAL; 53 | } 54 | if (opts->has_arg & OPT_INT) { 55 | char* v; 56 | 57 | *value = simple_strtoul(val, &v, 0); 58 | if (!*v) { 59 | return opts->val; 60 | } 61 | printk(KERN_INFO "%s: invalid numeric value " 62 | "in %s=%s\n", caller, token, val); 63 | return -EDOM; 64 | } 65 | if (opts->has_arg & OPT_STRING) { 66 | return opts->val; 67 | } 68 | printk(KERN_INFO "%s: unexpected argument %s to the " 69 | "%s option\n", caller, val, token); 70 | return -EINVAL; 71 | } 72 | } 73 | printk(KERN_INFO "%s: Unrecognized option %s\n", caller, token); 74 | return -EOPNOTSUPP; 75 | } 76 | -------------------------------------------------------------------------------- /README: -------------------------------------------------------------------------------- 1 | Kernel Mode RDMA Ping Module 2 | Steve Wise - 8/2009 3 | 4 | --- 5 | Updated 8/2016 6 | --- 7 | 8 | ============ 9 | Introduction 10 | ============ 11 | 12 | The krping module is a kernel loadable module that utilizes the Open 13 | Fabrics verbs to implement a client/server ping/pong program. The module 14 | was implemented as a test vehicle for working with the iwarp branch of 15 | the OFA project. 16 | 17 | The goals of this program include: 18 | 19 | - Simple harness to test kernel-mode verbs: connection setup, send, 20 | recv, rdma read, rdma write, and completion notifications. 21 | 22 | - Client/server model. 23 | 24 | - IP addressing used to identify remote peer. 25 | 26 | - Transport independent utilizing the RDMA CMA service 27 | 28 | - No user-space application needed. 29 | 30 | - Just a test utility...nothing more. 31 | 32 | This module allows establishing connections and running ping/pong tests 33 | via a /proc entry called /proc/krping. This simple mechanism allows 34 | starting many kernel threads concurrently and avoids the need for a user 35 | space application. 36 | 37 | The krping module is designed to utilize all the major DTO operations: 38 | send, recv, rdma read, and rdma write. Its goal was to test the API 39 | and as such is not necessarily an efficient test. Once the connection 40 | is established, the client and server begin a ping/pong loop: 41 | 42 | Client Server 43 | --------------------------------------------------------------------- 44 | SEND(ping source buffer rkey/addr/len) 45 | 46 | RECV Completion with ping source info 47 | RDMA READ from client source MR 48 | RDMA Read completion 49 | SEND .go ahead. to client 50 | 51 | RECV Completion of .go ahead. 52 | SEND (ping sink buffer rkey/addr/len) 53 | 54 | RECV Completion with ping sink info 55 | RDMA Write to client sink MR 56 | RDMA Write completion 57 | SEND .go ahead. to client 58 | 59 | RECV Completion of .go ahead. 60 | Validate data in source and sink buffers 61 | 62 | 63 | 64 | 65 | ============ 66 | To build/install the krping module 67 | ============ 68 | 69 | # git clone git://git.openfabrics.org/~swise/krping 70 | # cd krping 71 | 72 | # make && make install 73 | # modprobe rdma_krping 74 | 75 | ============ 76 | Using Krping 77 | ============ 78 | 79 | Communication from user space is done via the /proc filesystem. 80 | Krping exports file /proc/krping. Writing commands in ascii format to 81 | /proc/krping will start krping threads in the kernel. The thread issuing 82 | the write to /proc/krping is used to run the krping test, so it will 83 | block until the test completes, or until the user interrupts the write. 84 | 85 | Here is a simple example to start an rping test using the rdma_krping 86 | module. The server's address is 192.168.69.127. The client will 87 | connect to this address at port 9999 and issue 100 ping/pong messages. 88 | This example assumes you have two systems connected via IB and the 89 | IPoverIB devices are configured on the 192.168.69/24 subnet accordingly. 90 | 91 | Server: 92 | 93 | # modprobe rdma_krping 94 | # echo "server,addr=192.168.69.127,port=9999" >/proc/krping 95 | 96 | 97 | The echo command above will block until the krping test completes, 98 | or the user hits ctrl-c. 99 | 100 | On the client: 101 | 102 | # modprobe rdma_krping 103 | # echo "client,addr=192.168.69.127,port=9999,count=100" >/proc/krping 104 | 105 | Just like on the server, the echo command above will block until the 106 | krping test completes, or the user hits ctrl-c. 107 | 108 | The syntax for krping commands is a string of options separated by commas. 109 | Options can be single keywords, or in the form: option=operand. 110 | 111 | Operands can be integers or strings. 112 | 113 | Note you must specify the _same_ options on both sides. For instance, 114 | if you want to use the server_invalidate option, then you must specify 115 | it on both the server and client command lines. 116 | 117 | Opcode Operand Type Description 118 | ------------------------------------------------------------------------ 119 | client none Initiate a client side krping thread. 120 | server none Initiate a server side krping thread. 121 | addr string The server's IP address in dotted 122 | decimal format. Note the server can 123 | use 0.0.0.0 to bind to all devices. 124 | port integer The server's port number in host byte 125 | order. 126 | count integer The number of rping iterations to 127 | perform before shutting down the test. 128 | If unspecified, the count is infinite. 129 | size integer The size of the rping data. Default for 130 | rping is 65 bytes. 131 | verbose none Enables printk()s that dump the rping 132 | data. Use with caution! 133 | validate none Enables validating the rping data on 134 | each iteration to detect data 135 | corruption. 136 | mem_mode string Determines how memory will be 137 | registered. Modes include dma, 138 | and reg. Default is dma. 139 | server_inv none Valid only in reg mr mode, this 140 | option enables invalidating the 141 | client's reg mr via 142 | SEND_WITH_INVALIDATE messages from 143 | the server. 144 | local_dma_lkey none Use the local dma lkey for the source 145 | of writes and sends, and in recvs 146 | read_inv none Server will use READ_WITH_INV. Only 147 | valid in reg mem_mode. 148 | 149 | ============ 150 | Memory Usage: 151 | ============ 152 | 153 | The krping client uses 4 memory areas: 154 | 155 | start_buf - the source of the ping data. This buffer is advertised to 156 | the server at the start of each iteration, and the server rdma reads 157 | the ping data from this buffer over the wire. 158 | 159 | rdma_buf - the sink of the ping data. This buffer is advertised to the 160 | server each iteration, and the server rdma writes the ping data that it 161 | read from the start buffer into this buffer. The start_buf and rdma_buf 162 | contents are then compared if the krping validate option is specified. 163 | 164 | recv_buf - used to recv "go ahead" SEND from the server. 165 | 166 | send_buf - used to advertise the rdma buffers to the server via SEND 167 | messages. 168 | 169 | The krping server uses 3 memory areas: 170 | 171 | rdma_buf - used as the sink of the RDMA READ to pull the ping data 172 | from the client, and then used as the source of an RDMA WRITE to 173 | push the ping data back to the client. 174 | 175 | recv_buf - used to receive rdma rkey/addr/length advertisements from 176 | the client. 177 | 178 | send_buf - used to send "go ahead" SEND messages to the client. 179 | 180 | 181 | ============ 182 | Memory Registration Modes: 183 | ============ 184 | 185 | Each of these memory areas are registered with the RDMA device using 186 | whatever memory mode was specified in the command line. The mem_mode 187 | values include: dma, and reg (aka fastreg). The default mode, if not 188 | specified, is dma. 189 | 190 | The dma mem_mode uses a single dma_mr for all memory buffers. 191 | 192 | The reg mem_mode uses a reg mr on the client side for the 193 | start_buf and rdma_buf buffers. Each time the client will advertise 194 | one of these buffers, it invalidates the previous registration and fast 195 | registers the new buffer with a new key. If the server_invalidate 196 | option is on, then the server will do the invalidation via the "go ahead" 197 | messages using the IB_WR_SEND_WITH_INV opcode. Otherwise the client 198 | invalidates the mr using the IB_WR_LOCAL_INV work request. 199 | 200 | On the server side, reg mem_mode causes the server to use the 201 | reg_mr rkey for its rdma_buf buffer IO. Before each rdma read and 202 | rdma write, the server will post an IB_WR_LOCAL_INV + IB_WR_REG_MR 203 | WR chain to register the buffer with a new key. If the krping read-inv 204 | option is set then the server will use IB_WR_READ_WITH_INV to do the 205 | rdma read and skip the IB_WR_LOCAL_INV wr before re-registering the 206 | buffer for the subsequent rdma write operation. 207 | 208 | ============ 209 | Stats 210 | ============ 211 | 212 | While krping threads are executing, you can obtain statistics on the 213 | thread by reading from the /proc/krping file. If you cat /proc/krping, 214 | you will dump IO statistics for each running krping thread. The format 215 | is one thread per line, and each thread contains the following stats 216 | separated by white spaces: 217 | 218 | Statistic Description 219 | --------------------------------------------------------------------- 220 | Name krping thread number and device being used. 221 | Send Bytes Number of bytes transferred in SEND WRs. 222 | Send Messages Number of SEND WRs posted 223 | Recv Bytes Number of bytes received via RECV completions. 224 | Recv Messages Number of RECV WRs completed. 225 | RDMA WRITE Bytes Number of bytes transferred in RDMA WRITE WRs. 226 | RDMA WRITE Messages Number of RDMA WRITE WRs posted. 227 | RDMA READ Bytes Number of bytes transferred via RDMA READ WRs. 228 | RDMA READ Messages Number of RDMA READ WRs posted. 229 | 230 | Here is an example of the server side output for 5 krping threads: 231 | 232 | # cat /proc/krping 233 | 1-amso0 0 0 16 1 12583960576 192016 0 0 234 | 2-mthca0 0 0 16 1 60108570624 917184 0 0 235 | 3-mthca0 0 0 16 1 59106131968 901888 0 0 236 | 4-mthca1 0 0 16 1 101658394624 1551184 0 0 237 | 5-mthca1 0 0 16 1 100201922560 1528960 0 0 238 | # 239 | 240 | ============ 241 | EXPERIMENTAL 242 | ============ 243 | 244 | There are other options that enable micro benchmarks to measure 245 | the kernel rdma performance. These include: 246 | 247 | Opcode Operand Type Description 248 | ------------------------------------------------------------------------ 249 | wlat none Write latency test 250 | rlat none read latency test 251 | poll none enable polling vs blocking for rlat 252 | bw none write throughput test 253 | duplex none valid only with bw, this 254 | enables bidirectional mode 255 | tx-depth none set the sq depth for bw tests 256 | 257 | 258 | See the awkit* files to take the data logged in the kernel log 259 | and compute RTT/2 or Gbps results. 260 | 261 | Use these at your own risk. 262 | 263 | 264 | END-OF-FILE 265 | -------------------------------------------------------------------------------- /krping.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2005 Ammasso, Inc. All rights reserved. 3 | * Copyright (c) 2006-2009 Open Grid Computing, Inc. All rights reserved. 4 | * 5 | * This software is available to you under a choice of one of two 6 | * licenses. You may choose to be licensed under the terms of the GNU 7 | * General Public License (GPL) Version 2, available from the file 8 | * COPYING in the main directory of this source tree, or the 9 | * OpenIB.org BSD license below: 10 | * 11 | * Redistribution and use in source and binary forms, with or 12 | * without modification, are permitted provided that the following 13 | * conditions are met: 14 | * 15 | * - Redistributions of source code must retain the above 16 | * copyright notice, this list of conditions and the following 17 | * disclaimer. 18 | * 19 | * - Redistributions in binary form must reproduce the above 20 | * copyright notice, this list of conditions and the following 21 | * disclaimer in the documentation and/or other materials 22 | * provided with the distribution. 23 | * 24 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 25 | * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 26 | * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 27 | * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 28 | * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 29 | * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 30 | * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 31 | * SOFTWARE. 32 | */ 33 | #include 34 | #include 35 | #include 36 | #include 37 | #include 38 | #include 39 | #include 40 | #include 41 | #include 42 | #include 43 | #include 44 | #include 45 | #include 46 | #include 47 | #include 48 | #include 49 | #include 50 | #include 51 | 52 | #include 53 | #include 54 | 55 | #include 56 | #include 57 | 58 | #include "getopt.h" 59 | 60 | #define PFX "krping: " 61 | 62 | static int debug = 0; 63 | module_param(debug, int, 0); 64 | MODULE_PARM_DESC(debug, "Debug level (0=none, 1=all)"); 65 | #define DEBUG_LOG if (debug) printk 66 | 67 | MODULE_AUTHOR("Steve Wise"); 68 | MODULE_DESCRIPTION("RDMA ping server"); 69 | MODULE_LICENSE("Dual BSD/GPL"); 70 | 71 | static const struct krping_option krping_opts[] = { 72 | {"count", OPT_INT, 'C'}, 73 | {"size", OPT_INT, 'S'}, 74 | {"addr", OPT_STRING, 'a'}, 75 | {"addr6", OPT_STRING, 'A'}, 76 | {"port", OPT_INT, 'p'}, 77 | {"verbose", OPT_NOPARAM, 'v'}, 78 | {"validate", OPT_NOPARAM, 'V'}, 79 | {"server", OPT_NOPARAM, 's'}, 80 | {"client", OPT_NOPARAM, 'c'}, 81 | {"server_inv", OPT_NOPARAM, 'I'}, 82 | {"wlat", OPT_NOPARAM, 'l'}, 83 | {"rlat", OPT_NOPARAM, 'L'}, 84 | {"bw", OPT_NOPARAM, 'B'}, 85 | {"duplex", OPT_NOPARAM, 'd'}, 86 | {"tos", OPT_INT, 't'}, 87 | {"txdepth", OPT_INT, 'T'}, 88 | {"poll", OPT_NOPARAM, 'P'}, 89 | {"local_dma_lkey", OPT_NOPARAM, 'Z'}, 90 | {"read_inv", OPT_NOPARAM, 'R'}, 91 | {"fr", OPT_NOPARAM, 'f'}, 92 | {NULL, 0, 0} 93 | }; 94 | 95 | struct krping_stats { 96 | unsigned long long send_bytes; 97 | unsigned long long send_msgs; 98 | unsigned long long recv_bytes; 99 | unsigned long long recv_msgs; 100 | unsigned long long write_bytes; 101 | unsigned long long write_msgs; 102 | unsigned long long read_bytes; 103 | unsigned long long read_msgs; 104 | }; 105 | 106 | #define htonll(x) cpu_to_be64((x)) 107 | #define ntohll(x) cpu_to_be64((x)) 108 | 109 | static DEFINE_MUTEX(krping_mutex); 110 | 111 | /* 112 | * List of running krping threads. 113 | */ 114 | static LIST_HEAD(krping_cbs); 115 | 116 | static struct proc_dir_entry *krping_proc; 117 | 118 | /* 119 | * Invoke like this, one on each side, using the server's address on 120 | * the RDMA device (iw%d): 121 | * 122 | * /bin/echo server,port=9999,addr=192.168.69.142,validate > /proc/krping 123 | * /bin/echo client,port=9999,addr=192.168.69.142,validate > /proc/krping 124 | * /bin/echo client,port=9999,addr6=2001:db8:0:f101::1,validate > /proc/krping 125 | * 126 | * krping "ping/pong" loop: 127 | * client sends source rkey/addr/len 128 | * server receives source rkey/add/len 129 | * server rdma reads "ping" data from source 130 | * server sends "go ahead" on rdma read completion 131 | * client sends sink rkey/addr/len 132 | * server receives sink rkey/addr/len 133 | * server rdma writes "pong" data to sink 134 | * server sends "go ahead" on rdma write completion 135 | * 136 | */ 137 | 138 | /* 139 | * These states are used to signal events between the completion handler 140 | * and the main client or server thread. 141 | * 142 | * Once CONNECTED, they cycle through RDMA_READ_ADV, RDMA_WRITE_ADV, 143 | * and RDMA_WRITE_COMPLETE for each ping. 144 | */ 145 | enum test_state { 146 | IDLE = 1, 147 | CONNECT_REQUEST, 148 | ADDR_RESOLVED, 149 | ROUTE_RESOLVED, 150 | CONNECTED, 151 | RDMA_READ_ADV, 152 | RDMA_READ_COMPLETE, 153 | RDMA_WRITE_ADV, 154 | RDMA_WRITE_COMPLETE, 155 | ERROR 156 | }; 157 | 158 | struct krping_rdma_info { 159 | uint64_t buf; 160 | uint32_t rkey; 161 | uint32_t size; 162 | }; 163 | 164 | /* 165 | * Default max buffer size for IO... 166 | */ 167 | #define RPING_BUFSIZE 128*1024 168 | #define RPING_SQ_DEPTH 64 169 | 170 | /* 171 | * Control block struct. 172 | */ 173 | struct krping_cb { 174 | int server; /* 0 iff client */ 175 | struct ib_cq *cq; 176 | struct ib_pd *pd; 177 | struct ib_qp *qp; 178 | 179 | struct ib_mr *dma_mr; 180 | 181 | struct ib_fast_reg_page_list *page_list; 182 | int page_list_len; 183 | struct ib_reg_wr reg_mr_wr; 184 | struct ib_send_wr invalidate_wr; 185 | struct ib_mr *reg_mr; 186 | int server_invalidate; 187 | int read_inv; 188 | u8 key; 189 | 190 | struct ib_recv_wr rq_wr; /* recv work request record */ 191 | struct ib_sge recv_sgl; /* recv single SGE */ 192 | struct krping_rdma_info recv_buf __aligned(16); /* malloc'd buffer */ 193 | u64 recv_dma_addr; 194 | DEFINE_DMA_UNMAP_ADDR(recv_mapping); 195 | 196 | struct ib_send_wr sq_wr; /* send work requrest record */ 197 | struct ib_sge send_sgl; 198 | struct krping_rdma_info send_buf __aligned(16); /* single send buf */ 199 | u64 send_dma_addr; 200 | DEFINE_DMA_UNMAP_ADDR(send_mapping); 201 | 202 | struct ib_rdma_wr rdma_sq_wr; /* rdma work request record */ 203 | struct ib_sge rdma_sgl; /* rdma single SGE */ 204 | char *rdma_buf; /* used as rdma sink */ 205 | u64 rdma_dma_addr; 206 | DEFINE_DMA_UNMAP_ADDR(rdma_mapping); 207 | struct ib_mr *rdma_mr; 208 | 209 | uint32_t remote_rkey; /* remote guys RKEY */ 210 | uint64_t remote_addr; /* remote guys TO */ 211 | uint32_t remote_len; /* remote guys LEN */ 212 | 213 | char *start_buf; /* rdma read src */ 214 | u64 start_dma_addr; 215 | DEFINE_DMA_UNMAP_ADDR(start_mapping); 216 | struct ib_mr *start_mr; 217 | 218 | enum test_state state; /* used for cond/signalling */ 219 | wait_queue_head_t sem; 220 | struct krping_stats stats; 221 | 222 | uint16_t port; /* dst port in NBO */ 223 | u8 addr[16]; /* dst addr in NBO */ 224 | char ip6_ndev_name[128]; /* IPv6 netdev name */ 225 | char *addr_str; /* dst addr string */ 226 | uint8_t addr_type; /* ADDR_FAMILY - IPv4/V6 */ 227 | int verbose; /* verbose logging */ 228 | int count; /* ping count */ 229 | int size; /* ping data size */ 230 | int validate; /* validate ping data */ 231 | int wlat; /* run wlat test */ 232 | int rlat; /* run rlat test */ 233 | int bw; /* run bw test */ 234 | int duplex; /* run bw full duplex test */ 235 | int poll; /* poll or block for rlat test */ 236 | int txdepth; /* SQ depth */ 237 | int local_dma_lkey; /* use 0 for lkey */ 238 | int frtest; /* reg test */ 239 | int tos; /* type of service */ 240 | 241 | /* CM stuff */ 242 | struct rdma_cm_id *cm_id; /* connection on client side,*/ 243 | /* listener on server side. */ 244 | struct rdma_cm_id *child_cm_id; /* connection on server side */ 245 | struct list_head list; 246 | }; 247 | 248 | static int krping_cma_event_handler(struct rdma_cm_id *cma_id, 249 | struct rdma_cm_event *event) 250 | { 251 | int ret; 252 | struct krping_cb *cb = cma_id->context; 253 | 254 | DEBUG_LOG("cma_event type %d cma_id %p (%s)\n", event->event, cma_id, 255 | (cma_id == cb->cm_id) ? "parent" : "child"); 256 | 257 | switch (event->event) { 258 | case RDMA_CM_EVENT_ADDR_RESOLVED: 259 | cb->state = ADDR_RESOLVED; 260 | ret = rdma_resolve_route(cma_id, 2000); 261 | if (ret) { 262 | printk(KERN_ERR PFX "rdma_resolve_route error %d\n", 263 | ret); 264 | wake_up_interruptible(&cb->sem); 265 | } 266 | break; 267 | 268 | case RDMA_CM_EVENT_ROUTE_RESOLVED: 269 | cb->state = ROUTE_RESOLVED; 270 | wake_up_interruptible(&cb->sem); 271 | break; 272 | 273 | case RDMA_CM_EVENT_CONNECT_REQUEST: 274 | cb->state = CONNECT_REQUEST; 275 | cb->child_cm_id = cma_id; 276 | DEBUG_LOG("child cma %p\n", cb->child_cm_id); 277 | wake_up_interruptible(&cb->sem); 278 | break; 279 | 280 | case RDMA_CM_EVENT_ESTABLISHED: 281 | DEBUG_LOG("ESTABLISHED\n"); 282 | if (!cb->server) { 283 | cb->state = CONNECTED; 284 | } 285 | wake_up_interruptible(&cb->sem); 286 | break; 287 | 288 | case RDMA_CM_EVENT_ADDR_ERROR: 289 | case RDMA_CM_EVENT_ROUTE_ERROR: 290 | case RDMA_CM_EVENT_CONNECT_ERROR: 291 | case RDMA_CM_EVENT_UNREACHABLE: 292 | case RDMA_CM_EVENT_REJECTED: 293 | printk(KERN_ERR PFX "cma event %d, error %d\n", event->event, 294 | event->status); 295 | cb->state = ERROR; 296 | wake_up_interruptible(&cb->sem); 297 | break; 298 | 299 | case RDMA_CM_EVENT_DISCONNECTED: 300 | printk(KERN_ERR PFX "DISCONNECT EVENT...\n"); 301 | cb->state = ERROR; 302 | wake_up_interruptible(&cb->sem); 303 | break; 304 | 305 | case RDMA_CM_EVENT_DEVICE_REMOVAL: 306 | printk(KERN_ERR PFX "cma detected device removal!!!!\n"); 307 | cb->state = ERROR; 308 | wake_up_interruptible(&cb->sem); 309 | break; 310 | 311 | default: 312 | printk(KERN_ERR PFX "oof bad type!\n"); 313 | wake_up_interruptible(&cb->sem); 314 | break; 315 | } 316 | return 0; 317 | } 318 | 319 | static int server_recv(struct krping_cb *cb, struct ib_wc *wc) 320 | { 321 | if (wc->byte_len != sizeof(cb->recv_buf)) { 322 | printk(KERN_ERR PFX "Received bogus data, size %d\n", 323 | wc->byte_len); 324 | return -1; 325 | } 326 | 327 | cb->remote_rkey = ntohl(cb->recv_buf.rkey); 328 | cb->remote_addr = ntohll(cb->recv_buf.buf); 329 | cb->remote_len = ntohl(cb->recv_buf.size); 330 | DEBUG_LOG("Received rkey %x addr %llx len %d from peer\n", 331 | cb->remote_rkey, (unsigned long long)cb->remote_addr, 332 | cb->remote_len); 333 | 334 | if (cb->state <= CONNECTED || cb->state == RDMA_WRITE_COMPLETE) 335 | cb->state = RDMA_READ_ADV; 336 | else 337 | cb->state = RDMA_WRITE_ADV; 338 | 339 | return 0; 340 | } 341 | 342 | static int client_recv(struct krping_cb *cb, struct ib_wc *wc) 343 | { 344 | if (wc->byte_len != sizeof(cb->recv_buf)) { 345 | printk(KERN_ERR PFX "Received bogus data, size %d\n", 346 | wc->byte_len); 347 | return -1; 348 | } 349 | 350 | if (cb->state == RDMA_READ_ADV) 351 | cb->state = RDMA_WRITE_ADV; 352 | else 353 | cb->state = RDMA_WRITE_COMPLETE; 354 | 355 | return 0; 356 | } 357 | 358 | static void krping_cq_event_handler(struct ib_cq *cq, void *ctx) 359 | { 360 | struct krping_cb *cb = ctx; 361 | struct ib_wc wc; 362 | const struct ib_recv_wr *bad_wr; 363 | int ret; 364 | 365 | BUG_ON(cb->cq != cq); 366 | if (cb->state == ERROR) { 367 | printk(KERN_ERR PFX "cq completion in ERROR state\n"); 368 | return; 369 | } 370 | if (cb->frtest) { 371 | printk(KERN_ERR PFX "cq completion event in frtest!\n"); 372 | return; 373 | } 374 | if (!cb->wlat && !cb->rlat && !cb->bw) 375 | ib_req_notify_cq(cb->cq, IB_CQ_NEXT_COMP); 376 | while ((ret = ib_poll_cq(cb->cq, 1, &wc)) == 1) { 377 | if (wc.status) { 378 | if (wc.status == IB_WC_WR_FLUSH_ERR) { 379 | DEBUG_LOG("cq flushed\n"); 380 | continue; 381 | } else { 382 | printk(KERN_ERR PFX "cq completion failed with " 383 | "wr_id %Lx status %d opcode %d vender_err %x\n", 384 | wc.wr_id, wc.status, wc.opcode, wc.vendor_err); 385 | goto error; 386 | } 387 | } 388 | 389 | switch (wc.opcode) { 390 | case IB_WC_SEND: 391 | DEBUG_LOG("send completion\n"); 392 | cb->stats.send_bytes += cb->send_sgl.length; 393 | cb->stats.send_msgs++; 394 | break; 395 | 396 | case IB_WC_RDMA_WRITE: 397 | DEBUG_LOG("rdma write completion\n"); 398 | cb->stats.write_bytes += cb->rdma_sq_wr.wr.sg_list->length; 399 | cb->stats.write_msgs++; 400 | cb->state = RDMA_WRITE_COMPLETE; 401 | wake_up_interruptible(&cb->sem); 402 | break; 403 | 404 | case IB_WC_RDMA_READ: 405 | DEBUG_LOG("rdma read completion\n"); 406 | cb->stats.read_bytes += cb->rdma_sq_wr.wr.sg_list->length; 407 | cb->stats.read_msgs++; 408 | cb->state = RDMA_READ_COMPLETE; 409 | wake_up_interruptible(&cb->sem); 410 | break; 411 | 412 | case IB_WC_RECV: 413 | DEBUG_LOG("recv completion\n"); 414 | cb->stats.recv_bytes += sizeof(cb->recv_buf); 415 | cb->stats.recv_msgs++; 416 | if (cb->wlat || cb->rlat || cb->bw) 417 | ret = server_recv(cb, &wc); 418 | else 419 | ret = cb->server ? server_recv(cb, &wc) : 420 | client_recv(cb, &wc); 421 | if (ret) { 422 | printk(KERN_ERR PFX "recv wc error: %d\n", ret); 423 | goto error; 424 | } 425 | 426 | ret = ib_post_recv(cb->qp, &cb->rq_wr, &bad_wr); 427 | if (ret) { 428 | printk(KERN_ERR PFX "post recv error: %d\n", 429 | ret); 430 | goto error; 431 | } 432 | wake_up_interruptible(&cb->sem); 433 | break; 434 | 435 | default: 436 | printk(KERN_ERR PFX 437 | "%s:%d Unexpected opcode %d, Shutting down\n", 438 | __func__, __LINE__, wc.opcode); 439 | goto error; 440 | } 441 | } 442 | if (ret) { 443 | printk(KERN_ERR PFX "poll error %d\n", ret); 444 | goto error; 445 | } 446 | return; 447 | error: 448 | cb->state = ERROR; 449 | wake_up_interruptible(&cb->sem); 450 | } 451 | 452 | static int krping_accept(struct krping_cb *cb) 453 | { 454 | struct rdma_conn_param conn_param; 455 | int ret; 456 | 457 | DEBUG_LOG("accepting client connection request\n"); 458 | 459 | memset(&conn_param, 0, sizeof conn_param); 460 | conn_param.responder_resources = 1; 461 | conn_param.initiator_depth = 1; 462 | 463 | rdma_lock_handler(cb->child_cm_id); 464 | ret = rdma_accept(cb->child_cm_id, &conn_param); 465 | rdma_unlock_handler(cb->child_cm_id); 466 | if (ret) { 467 | printk(KERN_ERR PFX "rdma_accept error: %d\n", ret); 468 | return ret; 469 | } 470 | 471 | if (!cb->wlat && !cb->rlat && !cb->bw) { 472 | wait_event_interruptible(cb->sem, cb->state >= CONNECTED); 473 | if (cb->state == ERROR) { 474 | printk(KERN_ERR PFX "wait for CONNECTED state %d\n", 475 | cb->state); 476 | return -1; 477 | } 478 | } 479 | return 0; 480 | } 481 | 482 | static void krping_setup_wr(struct krping_cb *cb) 483 | { 484 | cb->recv_sgl.addr = cb->recv_dma_addr; 485 | cb->recv_sgl.length = sizeof cb->recv_buf; 486 | cb->recv_sgl.lkey = cb->pd->local_dma_lkey; 487 | cb->rq_wr.sg_list = &cb->recv_sgl; 488 | cb->rq_wr.num_sge = 1; 489 | 490 | cb->send_sgl.addr = cb->send_dma_addr; 491 | cb->send_sgl.length = sizeof cb->send_buf; 492 | cb->send_sgl.lkey = cb->pd->local_dma_lkey; 493 | 494 | cb->sq_wr.opcode = IB_WR_SEND; 495 | cb->sq_wr.send_flags = IB_SEND_SIGNALED; 496 | cb->sq_wr.sg_list = &cb->send_sgl; 497 | cb->sq_wr.num_sge = 1; 498 | 499 | if (cb->server || cb->wlat || cb->rlat || cb->bw) { 500 | cb->rdma_sgl.addr = cb->rdma_dma_addr; 501 | cb->rdma_sq_wr.wr.send_flags = IB_SEND_SIGNALED; 502 | cb->rdma_sq_wr.wr.sg_list = &cb->rdma_sgl; 503 | cb->rdma_sq_wr.wr.num_sge = 1; 504 | } 505 | 506 | /* 507 | * A chain of 2 WRs, INVALDATE_MR + REG_MR. 508 | * both unsignaled. The client uses them to reregister 509 | * the rdma buffers with a new key each iteration. 510 | */ 511 | cb->reg_mr_wr.wr.opcode = IB_WR_REG_MR; 512 | cb->reg_mr_wr.mr = cb->reg_mr; 513 | 514 | cb->invalidate_wr.next = &cb->reg_mr_wr.wr; 515 | cb->invalidate_wr.opcode = IB_WR_LOCAL_INV; 516 | } 517 | 518 | static int krping_setup_buffers(struct krping_cb *cb) 519 | { 520 | int ret; 521 | 522 | DEBUG_LOG(PFX "krping_setup_buffers called on cb %p\n", cb); 523 | 524 | cb->recv_dma_addr = ib_dma_map_single(cb->pd->device, 525 | &cb->recv_buf, 526 | sizeof(cb->recv_buf), DMA_BIDIRECTIONAL); 527 | if (unlikely(ib_dma_mapping_error(cb->pd->device, cb->recv_dma_addr))) { 528 | DEBUG_LOG(PFX "recv_buf DMA map failed\n"); 529 | ret = -EINVAL; 530 | goto bail; 531 | } 532 | dma_unmap_addr_set(cb, recv_mapping, cb->recv_dma_addr); 533 | cb->send_dma_addr = ib_dma_map_single(cb->pd->device, 534 | &cb->send_buf, sizeof(cb->send_buf), 535 | DMA_BIDIRECTIONAL); 536 | if (unlikely(ib_dma_mapping_error(cb->pd->device, cb->send_dma_addr))) { 537 | DEBUG_LOG(PFX "send_buf DMA map failed\n"); 538 | ret = -EINVAL; 539 | goto bail; 540 | } 541 | dma_unmap_addr_set(cb, send_mapping, cb->send_dma_addr); 542 | 543 | cb->rdma_buf = kzalloc(cb->size, GFP_KERNEL); 544 | if (cb->rdma_buf) 545 | cb->rdma_dma_addr = ib_dma_map_single(cb->pd->device, cb->rdma_buf, cb->size, DMA_BIDIRECTIONAL); 546 | if (!cb->rdma_buf || ib_dma_mapping_error(cb->pd->device, cb->rdma_dma_addr)) { 547 | DEBUG_LOG(PFX "rdma_buf allocation failed\n"); 548 | kfree(cb->rdma_buf); 549 | ret = -ENOMEM; 550 | goto bail; 551 | } 552 | dma_unmap_addr_set(cb, rdma_mapping, cb->rdma_dma_addr); 553 | cb->page_list_len = (((cb->size - 1) & PAGE_MASK) + PAGE_SIZE) 554 | >> PAGE_SHIFT; 555 | cb->reg_mr = ib_alloc_mr(cb->pd, IB_MR_TYPE_MEM_REG, 556 | cb->page_list_len); 557 | if (IS_ERR(cb->reg_mr)) { 558 | ret = PTR_ERR(cb->reg_mr); 559 | DEBUG_LOG(PFX "recv_buf reg_mr failed %d\n", ret); 560 | goto bail; 561 | } 562 | DEBUG_LOG(PFX "reg rkey 0x%x page_list_len %u\n", 563 | cb->reg_mr->rkey, cb->page_list_len); 564 | 565 | if (!cb->server || cb->wlat || cb->rlat || cb->bw) { 566 | cb->start_buf = kzalloc(cb->size, GFP_KERNEL); 567 | if (cb->start_buf) 568 | cb->start_dma_addr = ib_dma_map_single(cb->pd->device, cb->start_buf, cb->size, DMA_BIDIRECTIONAL); 569 | if (!cb->start_buf || ib_dma_mapping_error(cb->pd->device, cb->start_dma_addr)) { 570 | DEBUG_LOG(PFX "start_buf malloc failed\n"); 571 | kfree(cb->start_buf); 572 | ret = -ENOMEM; 573 | goto bail; 574 | } 575 | dma_unmap_addr_set(cb, start_mapping, cb->start_dma_addr); 576 | } 577 | 578 | krping_setup_wr(cb); 579 | DEBUG_LOG(PFX "allocated & registered buffers...\n"); 580 | return 0; 581 | bail: 582 | if (!unlikely(ib_dma_mapping_error(cb->pd->device, cb->recv_dma_addr))) 583 | ib_dma_unmap_single(cb->pd->device, 584 | dma_unmap_addr(cb, recv_mapping), 585 | sizeof(cb->recv_buf), DMA_BIDIRECTIONAL); 586 | if (!unlikely(ib_dma_mapping_error(cb->pd->device, cb->send_dma_addr))) 587 | ib_dma_unmap_single(cb->pd->device, 588 | dma_unmap_addr(cb, send_mapping), 589 | sizeof(cb->send_buf), DMA_BIDIRECTIONAL); 590 | 591 | if (cb->reg_mr && !IS_ERR(cb->reg_mr)) 592 | ib_dereg_mr(cb->reg_mr); 593 | if (cb->rdma_mr && !IS_ERR(cb->rdma_mr)) 594 | ib_dereg_mr(cb->rdma_mr); 595 | if (cb->dma_mr && !IS_ERR(cb->dma_mr)) 596 | ib_dereg_mr(cb->dma_mr); 597 | if (cb->rdma_buf) { 598 | ib_dma_unmap_single(cb->pd->device, cb->rdma_dma_addr, cb->size, 599 | DMA_BIDIRECTIONAL); 600 | kfree(cb->rdma_buf); 601 | } 602 | if (cb->start_buf) { 603 | ib_dma_unmap_single(cb->pd->device, cb->start_dma_addr, cb->size, 604 | DMA_BIDIRECTIONAL); 605 | kfree(cb->start_buf); 606 | } 607 | return ret; 608 | } 609 | 610 | static void krping_free_buffers(struct krping_cb *cb) 611 | { 612 | DEBUG_LOG("krping_free_buffers called on cb %p\n", cb); 613 | 614 | if (cb->dma_mr) 615 | ib_dereg_mr(cb->dma_mr); 616 | if (cb->rdma_mr) 617 | ib_dereg_mr(cb->rdma_mr); 618 | if (cb->start_mr) 619 | ib_dereg_mr(cb->start_mr); 620 | if (cb->reg_mr) 621 | ib_dereg_mr(cb->reg_mr); 622 | 623 | ib_dma_unmap_single(cb->pd->device, 624 | dma_unmap_addr(cb, recv_mapping), 625 | sizeof(cb->recv_buf), DMA_BIDIRECTIONAL); 626 | ib_dma_unmap_single(cb->pd->device, 627 | dma_unmap_addr(cb, send_mapping), 628 | sizeof(cb->send_buf), DMA_BIDIRECTIONAL); 629 | 630 | ib_dma_unmap_single(cb->pd->device, dma_unmap_addr(cb, rdma_dma_addr), 631 | cb->size, DMA_BIDIRECTIONAL); 632 | kfree(cb->rdma_buf); 633 | 634 | if (cb->start_buf) { 635 | ib_dma_unmap_single(cb->pd->device, dma_unmap_addr(cb, start_dma_addr), 636 | cb->size, DMA_BIDIRECTIONAL); 637 | kfree(cb->start_buf); 638 | } 639 | } 640 | 641 | static int krping_create_qp(struct krping_cb *cb) 642 | { 643 | struct ib_qp_init_attr init_attr; 644 | int ret; 645 | 646 | memset(&init_attr, 0, sizeof(init_attr)); 647 | init_attr.cap.max_send_wr = cb->txdepth; 648 | init_attr.cap.max_recv_wr = 2; 649 | 650 | /* For flush_qp() */ 651 | init_attr.cap.max_send_wr++; 652 | init_attr.cap.max_recv_wr++; 653 | 654 | init_attr.cap.max_recv_sge = 1; 655 | init_attr.cap.max_send_sge = 1; 656 | init_attr.qp_type = IB_QPT_RC; 657 | init_attr.send_cq = cb->cq; 658 | init_attr.recv_cq = cb->cq; 659 | init_attr.sq_sig_type = IB_SIGNAL_REQ_WR; 660 | 661 | if (cb->server) { 662 | ret = rdma_create_qp(cb->child_cm_id, cb->pd, &init_attr); 663 | if (!ret) 664 | cb->qp = cb->child_cm_id->qp; 665 | } else { 666 | ret = rdma_create_qp(cb->cm_id, cb->pd, &init_attr); 667 | if (!ret) 668 | cb->qp = cb->cm_id->qp; 669 | } 670 | 671 | return ret; 672 | } 673 | 674 | static void krping_free_qp(struct krping_cb *cb) 675 | { 676 | ib_destroy_qp(cb->qp); 677 | ib_destroy_cq(cb->cq); 678 | ib_dealloc_pd(cb->pd); 679 | } 680 | 681 | static int krping_setup_qp(struct krping_cb *cb, struct rdma_cm_id *cm_id) 682 | { 683 | int ret; 684 | struct ib_cq_init_attr attr = {0}; 685 | 686 | cb->pd = ib_alloc_pd(cm_id->device, 0); 687 | if (IS_ERR(cb->pd)) { 688 | printk(KERN_ERR PFX "ib_alloc_pd failed\n"); 689 | return PTR_ERR(cb->pd); 690 | } 691 | DEBUG_LOG("created pd %p\n", cb->pd); 692 | 693 | attr.cqe = cb->txdepth * 2; 694 | attr.comp_vector = 0; 695 | cb->cq = ib_create_cq(cm_id->device, krping_cq_event_handler, NULL, 696 | cb, &attr); 697 | if (IS_ERR(cb->cq)) { 698 | printk(KERN_ERR PFX "ib_create_cq failed\n"); 699 | ret = PTR_ERR(cb->cq); 700 | goto err1; 701 | } 702 | DEBUG_LOG("created cq %p\n", cb->cq); 703 | 704 | if (!cb->wlat && !cb->rlat && !cb->bw && !cb->frtest) { 705 | ret = ib_req_notify_cq(cb->cq, IB_CQ_NEXT_COMP); 706 | if (ret) { 707 | printk(KERN_ERR PFX "ib_create_cq failed\n"); 708 | goto err2; 709 | } 710 | } 711 | 712 | ret = krping_create_qp(cb); 713 | if (ret) { 714 | printk(KERN_ERR PFX "krping_create_qp failed: %d\n", ret); 715 | goto err2; 716 | } 717 | DEBUG_LOG("created qp %p\n", cb->qp); 718 | return 0; 719 | err2: 720 | ib_destroy_cq(cb->cq); 721 | err1: 722 | ib_dealloc_pd(cb->pd); 723 | return ret; 724 | } 725 | 726 | /* 727 | * return the (possibly rebound) rkey for the rdma buffer. 728 | * REG mode: invalidate and rebind via reg wr. 729 | * other modes: just return the mr rkey. 730 | */ 731 | static u32 krping_rdma_rkey(struct krping_cb *cb, u64 buf, int post_inv) 732 | { 733 | u32 rkey; 734 | const struct ib_send_wr *bad_wr; 735 | int ret; 736 | struct scatterlist sg = {0}; 737 | sg_init_marker(&sg, 1); 738 | 739 | cb->invalidate_wr.ex.invalidate_rkey = cb->reg_mr->rkey; 740 | 741 | /* 742 | * Update the reg key. 743 | */ 744 | ib_update_fast_reg_key(cb->reg_mr, ++cb->key); 745 | cb->reg_mr_wr.key = cb->reg_mr->rkey; 746 | 747 | /* 748 | * Update the reg WR with new buf info. 749 | */ 750 | if (buf == (u64)cb->start_dma_addr) 751 | cb->reg_mr_wr.access = IB_ACCESS_REMOTE_READ; 752 | else 753 | cb->reg_mr_wr.access = IB_ACCESS_REMOTE_WRITE | IB_ACCESS_LOCAL_WRITE; 754 | sg_dma_address(&sg) = buf; 755 | sg_dma_len(&sg) = cb->size; 756 | 757 | ret = ib_map_mr_sg(cb->reg_mr, &sg, 1, NULL, PAGE_SIZE); 758 | BUG_ON(ret <= 0 || ret > cb->page_list_len); 759 | 760 | DEBUG_LOG(PFX "post_inv = %d, reg_mr new rkey 0x%x pgsz %u len %lu" 761 | " iova_start %llx\n", 762 | post_inv, 763 | cb->reg_mr_wr.key, 764 | cb->reg_mr->page_size, 765 | (unsigned long)cb->reg_mr->length, 766 | (unsigned long long)cb->reg_mr->iova); 767 | 768 | if (post_inv) 769 | ret = ib_post_send(cb->qp, &cb->invalidate_wr, &bad_wr); 770 | else 771 | ret = ib_post_send(cb->qp, &cb->reg_mr_wr.wr, &bad_wr); 772 | if (ret) { 773 | printk(KERN_ERR PFX "post send error %d\n", ret); 774 | cb->state = ERROR; 775 | } 776 | rkey = cb->reg_mr->rkey; 777 | return rkey; 778 | } 779 | 780 | static void krping_format_send(struct krping_cb *cb, u64 buf) 781 | { 782 | struct krping_rdma_info *info = &cb->send_buf; 783 | u32 rkey; 784 | 785 | /* 786 | * Client side will do reg or mw bind before 787 | * advertising the rdma buffer. Server side 788 | * sends have no data. 789 | */ 790 | if (!cb->server || cb->wlat || cb->rlat || cb->bw) { 791 | rkey = krping_rdma_rkey(cb, buf, !cb->server_invalidate); 792 | info->buf = htonll(buf); 793 | info->rkey = htonl(rkey); 794 | info->size = htonl(cb->size); 795 | DEBUG_LOG("RDMA addr %llx rkey %x len %d\n", 796 | (unsigned long long)buf, rkey, cb->size); 797 | } 798 | } 799 | 800 | static void krping_test_server(struct krping_cb *cb) 801 | { 802 | struct ib_send_wr inv; 803 | const struct ib_send_wr *bad_wr; 804 | int ret; 805 | 806 | while (1) { 807 | /* Wait for client's Start STAG/TO/Len */ 808 | wait_event_interruptible(cb->sem, cb->state >= RDMA_READ_ADV); 809 | if (cb->state != RDMA_READ_ADV) { 810 | printk(KERN_ERR PFX "wait for RDMA_READ_ADV state %d\n", 811 | cb->state); 812 | break; 813 | } 814 | 815 | DEBUG_LOG("server received sink adv\n"); 816 | 817 | cb->rdma_sq_wr.rkey = cb->remote_rkey; 818 | cb->rdma_sq_wr.remote_addr = cb->remote_addr; 819 | cb->rdma_sq_wr.wr.sg_list->length = cb->remote_len; 820 | cb->rdma_sgl.lkey = krping_rdma_rkey(cb, cb->rdma_dma_addr, !cb->read_inv); 821 | cb->rdma_sq_wr.wr.next = NULL; 822 | 823 | /* Issue RDMA Read. */ 824 | if (cb->read_inv) 825 | cb->rdma_sq_wr.wr.opcode = IB_WR_RDMA_READ_WITH_INV; 826 | else { 827 | 828 | cb->rdma_sq_wr.wr.opcode = IB_WR_RDMA_READ; 829 | /* 830 | * Immediately follow the read with a 831 | * fenced LOCAL_INV. 832 | */ 833 | cb->rdma_sq_wr.wr.next = &inv; 834 | memset(&inv, 0, sizeof inv); 835 | inv.opcode = IB_WR_LOCAL_INV; 836 | inv.ex.invalidate_rkey = cb->reg_mr->rkey; 837 | inv.send_flags = IB_SEND_FENCE; 838 | } 839 | 840 | ret = ib_post_send(cb->qp, &cb->rdma_sq_wr.wr, &bad_wr); 841 | if (ret) { 842 | printk(KERN_ERR PFX "post send error %d\n", ret); 843 | break; 844 | } 845 | cb->rdma_sq_wr.wr.next = NULL; 846 | 847 | DEBUG_LOG("server posted rdma read req \n"); 848 | 849 | /* Wait for read completion */ 850 | wait_event_interruptible(cb->sem, 851 | cb->state >= RDMA_READ_COMPLETE); 852 | if (cb->state != RDMA_READ_COMPLETE) { 853 | printk(KERN_ERR PFX 854 | "wait for RDMA_READ_COMPLETE state %d\n", 855 | cb->state); 856 | break; 857 | } 858 | DEBUG_LOG("server received read complete\n"); 859 | 860 | /* Display data in recv buf */ 861 | if (cb->verbose) 862 | printk(KERN_INFO PFX 863 | "server ping data (64B max): |%.64s|\n", 864 | cb->rdma_buf); 865 | 866 | /* Tell client to continue */ 867 | if (cb->server && cb->server_invalidate) { 868 | cb->sq_wr.ex.invalidate_rkey = cb->remote_rkey; 869 | cb->sq_wr.opcode = IB_WR_SEND_WITH_INV; 870 | DEBUG_LOG("send-w-inv rkey 0x%x\n", cb->remote_rkey); 871 | } 872 | ret = ib_post_send(cb->qp, &cb->sq_wr, &bad_wr); 873 | if (ret) { 874 | printk(KERN_ERR PFX "post send error %d\n", ret); 875 | break; 876 | } 877 | DEBUG_LOG("server posted go ahead\n"); 878 | 879 | /* Wait for client's RDMA STAG/TO/Len */ 880 | wait_event_interruptible(cb->sem, cb->state >= RDMA_WRITE_ADV); 881 | if (cb->state != RDMA_WRITE_ADV) { 882 | printk(KERN_ERR PFX 883 | "wait for RDMA_WRITE_ADV state %d\n", 884 | cb->state); 885 | break; 886 | } 887 | DEBUG_LOG("server received sink adv\n"); 888 | 889 | /* RDMA Write echo data */ 890 | cb->rdma_sq_wr.wr.opcode = IB_WR_RDMA_WRITE; 891 | cb->rdma_sq_wr.rkey = cb->remote_rkey; 892 | cb->rdma_sq_wr.remote_addr = cb->remote_addr; 893 | cb->rdma_sq_wr.wr.sg_list->length = strlen(cb->rdma_buf) + 1; 894 | if (cb->local_dma_lkey) 895 | cb->rdma_sgl.lkey = cb->pd->local_dma_lkey; 896 | else 897 | cb->rdma_sgl.lkey = krping_rdma_rkey(cb, cb->rdma_dma_addr, 0); 898 | 899 | DEBUG_LOG("rdma write from lkey %x laddr %llx len %d\n", 900 | cb->rdma_sq_wr.wr.sg_list->lkey, 901 | (unsigned long long)cb->rdma_sq_wr.wr.sg_list->addr, 902 | cb->rdma_sq_wr.wr.sg_list->length); 903 | 904 | ret = ib_post_send(cb->qp, &cb->rdma_sq_wr.wr, &bad_wr); 905 | if (ret) { 906 | printk(KERN_ERR PFX "post send error %d\n", ret); 907 | break; 908 | } 909 | 910 | /* Wait for completion */ 911 | ret = wait_event_interruptible(cb->sem, cb->state >= 912 | RDMA_WRITE_COMPLETE); 913 | if (cb->state != RDMA_WRITE_COMPLETE) { 914 | printk(KERN_ERR PFX 915 | "wait for RDMA_WRITE_COMPLETE state %d\n", 916 | cb->state); 917 | break; 918 | } 919 | DEBUG_LOG("server rdma write complete \n"); 920 | 921 | cb->state = CONNECTED; 922 | 923 | /* Tell client to begin again */ 924 | if (cb->server && cb->server_invalidate) { 925 | cb->sq_wr.ex.invalidate_rkey = cb->remote_rkey; 926 | cb->sq_wr.opcode = IB_WR_SEND_WITH_INV; 927 | DEBUG_LOG("send-w-inv rkey 0x%x\n", cb->remote_rkey); 928 | } 929 | ret = ib_post_send(cb->qp, &cb->sq_wr, &bad_wr); 930 | if (ret) { 931 | printk(KERN_ERR PFX "post send error %d\n", ret); 932 | break; 933 | } 934 | DEBUG_LOG("server posted go ahead\n"); 935 | } 936 | } 937 | 938 | static void rlat_test(struct krping_cb *cb) 939 | { 940 | int scnt; 941 | int iters = cb->count; 942 | ktime_t start, stop; 943 | int ret; 944 | struct ib_wc wc; 945 | const struct ib_send_wr *bad_wr; 946 | int ne; 947 | 948 | scnt = 0; 949 | cb->rdma_sq_wr.wr.opcode = IB_WR_RDMA_READ; 950 | cb->rdma_sq_wr.rkey = cb->remote_rkey; 951 | cb->rdma_sq_wr.remote_addr = cb->remote_addr; 952 | cb->rdma_sq_wr.wr.sg_list->length = cb->size; 953 | 954 | start = ktime_get(); 955 | if (!cb->poll) { 956 | cb->state = RDMA_READ_ADV; 957 | ib_req_notify_cq(cb->cq, IB_CQ_NEXT_COMP); 958 | } 959 | while (scnt < iters) { 960 | 961 | cb->state = RDMA_READ_ADV; 962 | ret = ib_post_send(cb->qp, &cb->rdma_sq_wr.wr, &bad_wr); 963 | if (ret) { 964 | printk(KERN_ERR PFX 965 | "Couldn't post send: ret=%d scnt %d\n", 966 | ret, scnt); 967 | return; 968 | } 969 | 970 | do { 971 | if (!cb->poll) { 972 | wait_event_interruptible(cb->sem, 973 | cb->state != RDMA_READ_ADV); 974 | if (cb->state == RDMA_READ_COMPLETE) { 975 | ne = 1; 976 | ib_req_notify_cq(cb->cq, 977 | IB_CQ_NEXT_COMP); 978 | } else { 979 | ne = -1; 980 | } 981 | } else 982 | ne = ib_poll_cq(cb->cq, 1, &wc); 983 | if (cb->state == ERROR) { 984 | printk(KERN_ERR PFX 985 | "state == ERROR...bailing scnt %d\n", 986 | scnt); 987 | return; 988 | } 989 | } while (ne == 0); 990 | 991 | if (ne < 0) { 992 | printk(KERN_ERR PFX "poll CQ failed %d\n", ne); 993 | return; 994 | } 995 | if (cb->poll && wc.status != IB_WC_SUCCESS) { 996 | printk(KERN_ERR PFX "Completion wth error at %s:\n", 997 | cb->server ? "server" : "client"); 998 | printk(KERN_ERR PFX "Failed status %d: wr_id %d\n", 999 | wc.status, (int) wc.wr_id); 1000 | return; 1001 | } 1002 | ++scnt; 1003 | } 1004 | stop = ktime_get(); 1005 | 1006 | printk(KERN_ERR PFX "delta nsec %llu iter %d size %d\n", 1007 | ktime_sub(stop, start), 1008 | scnt, cb->size); 1009 | } 1010 | 1011 | static void wlat_test(struct krping_cb *cb) 1012 | { 1013 | int ccnt, scnt, rcnt; 1014 | int iters=cb->count; 1015 | volatile char *poll_buf = (char *) cb->start_buf; 1016 | char *buf = (char *)cb->rdma_buf; 1017 | ktime_t start, stop; 1018 | cycles_t *post_cycles_start = NULL; 1019 | cycles_t *post_cycles_stop = NULL; 1020 | cycles_t *poll_cycles_start = NULL; 1021 | cycles_t *poll_cycles_stop = NULL; 1022 | cycles_t *last_poll_cycles_start = NULL; 1023 | cycles_t sum_poll = 0, sum_post = 0, sum_last_poll = 0; 1024 | int i; 1025 | int cycle_iters = 1000; 1026 | 1027 | ccnt = 0; 1028 | scnt = 0; 1029 | rcnt = 0; 1030 | 1031 | post_cycles_start = kmalloc(cycle_iters * sizeof(cycles_t), GFP_KERNEL); 1032 | if (!post_cycles_start) { 1033 | printk(KERN_ERR PFX "%s kmalloc failed\n", __FUNCTION__); 1034 | goto done; 1035 | } 1036 | post_cycles_stop = kmalloc(cycle_iters * sizeof(cycles_t), GFP_KERNEL); 1037 | if (!post_cycles_stop) { 1038 | printk(KERN_ERR PFX "%s kmalloc failed\n", __FUNCTION__); 1039 | goto done; 1040 | } 1041 | poll_cycles_start = kmalloc(cycle_iters * sizeof(cycles_t), GFP_KERNEL); 1042 | if (!poll_cycles_start) { 1043 | printk(KERN_ERR PFX "%s kmalloc failed\n", __FUNCTION__); 1044 | goto done; 1045 | } 1046 | poll_cycles_stop = kmalloc(cycle_iters * sizeof(cycles_t), GFP_KERNEL); 1047 | if (!poll_cycles_stop) { 1048 | printk(KERN_ERR PFX "%s kmalloc failed\n", __FUNCTION__); 1049 | goto done; 1050 | } 1051 | last_poll_cycles_start = kmalloc(cycle_iters * sizeof(cycles_t), 1052 | GFP_KERNEL); 1053 | if (!last_poll_cycles_start) { 1054 | printk(KERN_ERR PFX "%s kmalloc failed\n", __FUNCTION__); 1055 | goto done; 1056 | } 1057 | cb->rdma_sq_wr.wr.opcode = IB_WR_RDMA_WRITE; 1058 | cb->rdma_sq_wr.rkey = cb->remote_rkey; 1059 | cb->rdma_sq_wr.remote_addr = cb->remote_addr; 1060 | cb->rdma_sq_wr.wr.sg_list->length = cb->size; 1061 | 1062 | if (cycle_iters > iters) 1063 | cycle_iters = iters; 1064 | start = ktime_get(); 1065 | while (scnt < iters || ccnt < iters || rcnt < iters) { 1066 | 1067 | /* Wait till buffer changes. */ 1068 | if (rcnt < iters && !(scnt < 1 && !cb->server)) { 1069 | ++rcnt; 1070 | while (*poll_buf != (char)rcnt) { 1071 | if (cb->state == ERROR) { 1072 | printk(KERN_ERR PFX 1073 | "state = ERROR, bailing\n"); 1074 | goto done; 1075 | } 1076 | } 1077 | } 1078 | 1079 | if (scnt < iters) { 1080 | const struct ib_send_wr *bad_wr; 1081 | 1082 | *buf = (char)scnt+1; 1083 | if (scnt < cycle_iters) 1084 | post_cycles_start[scnt] = get_cycles(); 1085 | if (ib_post_send(cb->qp, &cb->rdma_sq_wr.wr, &bad_wr)) { 1086 | printk(KERN_ERR PFX 1087 | "Couldn't post send: scnt=%d\n", 1088 | scnt); 1089 | goto done; 1090 | } 1091 | if (scnt < cycle_iters) 1092 | post_cycles_stop[scnt] = get_cycles(); 1093 | scnt++; 1094 | } 1095 | 1096 | if (ccnt < iters) { 1097 | struct ib_wc wc; 1098 | int ne; 1099 | 1100 | if (ccnt < cycle_iters) 1101 | poll_cycles_start[ccnt] = get_cycles(); 1102 | do { 1103 | if (ccnt < cycle_iters) 1104 | last_poll_cycles_start[ccnt] = 1105 | get_cycles(); 1106 | ne = ib_poll_cq(cb->cq, 1, &wc); 1107 | } while (ne == 0); 1108 | if (ccnt < cycle_iters) 1109 | poll_cycles_stop[ccnt] = get_cycles(); 1110 | ++ccnt; 1111 | 1112 | if (ne < 0) { 1113 | printk(KERN_ERR PFX "poll CQ failed %d\n", ne); 1114 | goto done; 1115 | } 1116 | if (wc.status != IB_WC_SUCCESS) { 1117 | printk(KERN_ERR PFX 1118 | "Completion wth error at %s:\n", 1119 | cb->server ? "server" : "client"); 1120 | printk(KERN_ERR PFX 1121 | "Failed status %d: wr_id %d\n", 1122 | wc.status, (int) wc.wr_id); 1123 | printk(KERN_ERR PFX 1124 | "scnt=%d, rcnt=%d, ccnt=%d\n", 1125 | scnt, rcnt, ccnt); 1126 | goto done; 1127 | } 1128 | } 1129 | } 1130 | stop = ktime_get(); 1131 | 1132 | for (i=0; i < cycle_iters; i++) { 1133 | sum_post += post_cycles_stop[i] - post_cycles_start[i]; 1134 | sum_poll += poll_cycles_stop[i] - poll_cycles_start[i]; 1135 | sum_last_poll += poll_cycles_stop[i]-last_poll_cycles_start[i]; 1136 | } 1137 | printk(KERN_ERR PFX 1138 | "delta nsec %llu iter %d size %d cycle_iters %d" 1139 | " sum_post %llu sum_poll %llu sum_last_poll %llu\n", 1140 | ktime_sub(stop, start), 1141 | scnt, cb->size, cycle_iters, 1142 | (unsigned long long)sum_post, (unsigned long long)sum_poll, 1143 | (unsigned long long)sum_last_poll); 1144 | done: 1145 | kfree(post_cycles_start); 1146 | kfree(post_cycles_stop); 1147 | kfree(poll_cycles_start); 1148 | kfree(poll_cycles_stop); 1149 | kfree(last_poll_cycles_start); 1150 | } 1151 | 1152 | static void bw_test(struct krping_cb *cb) 1153 | { 1154 | int ccnt, scnt, rcnt; 1155 | int iters=cb->count; 1156 | ktime_t start, stop; 1157 | cycles_t *post_cycles_start = NULL; 1158 | cycles_t *post_cycles_stop = NULL; 1159 | cycles_t *poll_cycles_start = NULL; 1160 | cycles_t *poll_cycles_stop = NULL; 1161 | cycles_t *last_poll_cycles_start = NULL; 1162 | cycles_t sum_poll = 0, sum_post = 0, sum_last_poll = 0; 1163 | int i; 1164 | int cycle_iters = 1000; 1165 | 1166 | ccnt = 0; 1167 | scnt = 0; 1168 | rcnt = 0; 1169 | 1170 | post_cycles_start = kmalloc(cycle_iters * sizeof(cycles_t), GFP_KERNEL); 1171 | if (!post_cycles_start) { 1172 | printk(KERN_ERR PFX "%s kmalloc failed\n", __FUNCTION__); 1173 | goto done; 1174 | } 1175 | post_cycles_stop = kmalloc(cycle_iters * sizeof(cycles_t), GFP_KERNEL); 1176 | if (!post_cycles_stop) { 1177 | printk(KERN_ERR PFX "%s kmalloc failed\n", __FUNCTION__); 1178 | goto done; 1179 | } 1180 | poll_cycles_start = kmalloc(cycle_iters * sizeof(cycles_t), GFP_KERNEL); 1181 | if (!poll_cycles_start) { 1182 | printk(KERN_ERR PFX "%s kmalloc failed\n", __FUNCTION__); 1183 | goto done; 1184 | } 1185 | poll_cycles_stop = kmalloc(cycle_iters * sizeof(cycles_t), GFP_KERNEL); 1186 | if (!poll_cycles_stop) { 1187 | printk(KERN_ERR PFX "%s kmalloc failed\n", __FUNCTION__); 1188 | goto done; 1189 | } 1190 | last_poll_cycles_start = kmalloc(cycle_iters * sizeof(cycles_t), 1191 | GFP_KERNEL); 1192 | if (!last_poll_cycles_start) { 1193 | printk(KERN_ERR PFX "%s kmalloc failed\n", __FUNCTION__); 1194 | goto done; 1195 | } 1196 | cb->rdma_sq_wr.wr.opcode = IB_WR_RDMA_WRITE; 1197 | cb->rdma_sq_wr.rkey = cb->remote_rkey; 1198 | cb->rdma_sq_wr.remote_addr = cb->remote_addr; 1199 | cb->rdma_sq_wr.wr.sg_list->length = cb->size; 1200 | 1201 | if (cycle_iters > iters) 1202 | cycle_iters = iters; 1203 | start = ktime_get(); 1204 | while (scnt < iters || ccnt < iters) { 1205 | 1206 | while (scnt < iters && scnt - ccnt < cb->txdepth) { 1207 | const struct ib_send_wr *bad_wr; 1208 | 1209 | if (scnt < cycle_iters) 1210 | post_cycles_start[scnt] = get_cycles(); 1211 | if (ib_post_send(cb->qp, &cb->rdma_sq_wr.wr, &bad_wr)) { 1212 | printk(KERN_ERR PFX 1213 | "Couldn't post send: scnt=%d\n", 1214 | scnt); 1215 | goto done; 1216 | } 1217 | if (scnt < cycle_iters) 1218 | post_cycles_stop[scnt] = get_cycles(); 1219 | ++scnt; 1220 | } 1221 | 1222 | if (ccnt < iters) { 1223 | int ne; 1224 | struct ib_wc wc; 1225 | 1226 | if (ccnt < cycle_iters) 1227 | poll_cycles_start[ccnt] = get_cycles(); 1228 | do { 1229 | if (ccnt < cycle_iters) 1230 | last_poll_cycles_start[ccnt] = 1231 | get_cycles(); 1232 | ne = ib_poll_cq(cb->cq, 1, &wc); 1233 | } while (ne == 0); 1234 | if (ccnt < cycle_iters) 1235 | poll_cycles_stop[ccnt] = get_cycles(); 1236 | ccnt += 1; 1237 | 1238 | if (ne < 0) { 1239 | printk(KERN_ERR PFX "poll CQ failed %d\n", ne); 1240 | goto done; 1241 | } 1242 | if (wc.status != IB_WC_SUCCESS) { 1243 | printk(KERN_ERR PFX 1244 | "Completion wth error at %s:\n", 1245 | cb->server ? "server" : "client"); 1246 | printk(KERN_ERR PFX 1247 | "Failed status %d: wr_id %d\n", 1248 | wc.status, (int) wc.wr_id); 1249 | goto done; 1250 | } 1251 | } 1252 | } 1253 | stop = ktime_get(); 1254 | 1255 | for (i=0; i < cycle_iters; i++) { 1256 | sum_post += post_cycles_stop[i] - post_cycles_start[i]; 1257 | sum_poll += poll_cycles_stop[i] - poll_cycles_start[i]; 1258 | sum_last_poll += poll_cycles_stop[i]-last_poll_cycles_start[i]; 1259 | } 1260 | printk(KERN_ERR PFX 1261 | "delta nsec %llu iter %d size %d cycle_iters %d" 1262 | " sum_post %llu sum_poll %llu sum_last_poll %llu\n", 1263 | ktime_sub(stop, start), scnt, cb->size, cycle_iters, 1264 | (unsigned long long)sum_post, (unsigned long long)sum_poll, 1265 | (unsigned long long)sum_last_poll); 1266 | done: 1267 | kfree(post_cycles_start); 1268 | kfree(post_cycles_stop); 1269 | kfree(poll_cycles_start); 1270 | kfree(poll_cycles_stop); 1271 | kfree(last_poll_cycles_start); 1272 | } 1273 | 1274 | static void krping_rlat_test_server(struct krping_cb *cb) 1275 | { 1276 | const struct ib_send_wr *bad_wr; 1277 | struct ib_wc wc; 1278 | int ret; 1279 | 1280 | /* Spin waiting for client's Start STAG/TO/Len */ 1281 | while (cb->state < RDMA_READ_ADV) { 1282 | krping_cq_event_handler(cb->cq, cb); 1283 | } 1284 | 1285 | /* Send STAG/TO/Len to client */ 1286 | krping_format_send(cb, cb->start_dma_addr); 1287 | ret = ib_post_send(cb->qp, &cb->sq_wr, &bad_wr); 1288 | if (ret) { 1289 | printk(KERN_ERR PFX "post send error %d\n", ret); 1290 | return; 1291 | } 1292 | 1293 | /* Spin waiting for send completion */ 1294 | while ((ret = ib_poll_cq(cb->cq, 1, &wc) == 0)); 1295 | if (ret < 0) { 1296 | printk(KERN_ERR PFX "poll error %d\n", ret); 1297 | return; 1298 | } 1299 | if (wc.status) { 1300 | printk(KERN_ERR PFX "send completiong error %d\n", wc.status); 1301 | return; 1302 | } 1303 | 1304 | wait_event_interruptible(cb->sem, cb->state == ERROR); 1305 | } 1306 | 1307 | static void krping_wlat_test_server(struct krping_cb *cb) 1308 | { 1309 | const struct ib_send_wr *bad_wr; 1310 | struct ib_wc wc; 1311 | int ret; 1312 | 1313 | /* Spin waiting for client's Start STAG/TO/Len */ 1314 | while (cb->state < RDMA_READ_ADV) { 1315 | krping_cq_event_handler(cb->cq, cb); 1316 | } 1317 | 1318 | /* Send STAG/TO/Len to client */ 1319 | krping_format_send(cb, cb->start_dma_addr); 1320 | ret = ib_post_send(cb->qp, &cb->sq_wr, &bad_wr); 1321 | if (ret) { 1322 | printk(KERN_ERR PFX "post send error %d\n", ret); 1323 | return; 1324 | } 1325 | 1326 | /* Spin waiting for send completion */ 1327 | while ((ret = ib_poll_cq(cb->cq, 1, &wc) == 0)); 1328 | if (ret < 0) { 1329 | printk(KERN_ERR PFX "poll error %d\n", ret); 1330 | return; 1331 | } 1332 | if (wc.status) { 1333 | printk(KERN_ERR PFX "send completiong error %d\n", wc.status); 1334 | return; 1335 | } 1336 | 1337 | wlat_test(cb); 1338 | wait_event_interruptible(cb->sem, cb->state == ERROR); 1339 | } 1340 | 1341 | static void krping_bw_test_server(struct krping_cb *cb) 1342 | { 1343 | const struct ib_send_wr *bad_wr; 1344 | struct ib_wc wc; 1345 | int ret; 1346 | 1347 | /* Spin waiting for client's Start STAG/TO/Len */ 1348 | while (cb->state < RDMA_READ_ADV) { 1349 | krping_cq_event_handler(cb->cq, cb); 1350 | } 1351 | 1352 | /* Send STAG/TO/Len to client */ 1353 | krping_format_send(cb, cb->start_dma_addr); 1354 | ret = ib_post_send(cb->qp, &cb->sq_wr, &bad_wr); 1355 | if (ret) { 1356 | printk(KERN_ERR PFX "post send error %d\n", ret); 1357 | return; 1358 | } 1359 | 1360 | /* Spin waiting for send completion */ 1361 | while ((ret = ib_poll_cq(cb->cq, 1, &wc) == 0)); 1362 | if (ret < 0) { 1363 | printk(KERN_ERR PFX "poll error %d\n", ret); 1364 | return; 1365 | } 1366 | if (wc.status) { 1367 | printk(KERN_ERR PFX "send completiong error %d\n", wc.status); 1368 | return; 1369 | } 1370 | 1371 | if (cb->duplex) 1372 | bw_test(cb); 1373 | wait_event_interruptible(cb->sem, cb->state == ERROR); 1374 | } 1375 | 1376 | static int reg_supported(struct ib_device *dev) 1377 | { 1378 | u64 needed_flags = IB_DEVICE_MEM_MGT_EXTENSIONS; 1379 | 1380 | if ((dev->attrs.device_cap_flags & needed_flags) != needed_flags) { 1381 | printk(KERN_ERR PFX 1382 | "Fastreg not supported - device_cap_flags 0x%llx\n", 1383 | (unsigned long long)dev->attrs.device_cap_flags); 1384 | return 0; 1385 | } 1386 | DEBUG_LOG("Fastreg supported - device_cap_flags 0x%llx\n", 1387 | (unsigned long long)dev->attrs.device_cap_flags); 1388 | return 1; 1389 | } 1390 | 1391 | static void fill_sockaddr(struct sockaddr_storage *sin, struct krping_cb *cb) 1392 | { 1393 | memset(sin, 0, sizeof(*sin)); 1394 | 1395 | if (cb->addr_type == AF_INET) { 1396 | struct sockaddr_in *sin4 = (struct sockaddr_in *)sin; 1397 | sin4->sin_family = AF_INET; 1398 | memcpy((void *)&sin4->sin_addr.s_addr, cb->addr, 4); 1399 | sin4->sin_port = cb->port; 1400 | } else if (cb->addr_type == AF_INET6) { 1401 | struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)sin; 1402 | sin6->sin6_family = AF_INET6; 1403 | memcpy((void *)&sin6->sin6_addr, cb->addr, 16); 1404 | sin6->sin6_port = cb->port; 1405 | if (cb->ip6_ndev_name[0] != 0) { 1406 | struct net_device *ndev; 1407 | 1408 | ndev = __dev_get_by_name(&init_net, cb->ip6_ndev_name); 1409 | if (ndev != NULL) { 1410 | sin6->sin6_scope_id = ndev->ifindex; 1411 | dev_put(ndev); 1412 | } 1413 | } 1414 | } 1415 | } 1416 | 1417 | static int krping_bind_server(struct krping_cb *cb) 1418 | { 1419 | struct sockaddr_storage sin; 1420 | int ret; 1421 | 1422 | 1423 | fill_sockaddr(&sin, cb); 1424 | 1425 | ret = rdma_bind_addr(cb->cm_id, (struct sockaddr *)&sin); 1426 | if (ret) { 1427 | printk(KERN_ERR PFX "rdma_bind_addr error %d\n", ret); 1428 | return ret; 1429 | } 1430 | DEBUG_LOG("rdma_bind_addr successful\n"); 1431 | 1432 | DEBUG_LOG("rdma_listen\n"); 1433 | ret = rdma_listen(cb->cm_id, 3); 1434 | if (ret) { 1435 | printk(KERN_ERR PFX "rdma_listen failed: %d\n", ret); 1436 | return ret; 1437 | } 1438 | 1439 | wait_event_interruptible(cb->sem, cb->state >= CONNECT_REQUEST); 1440 | if (cb->state != CONNECT_REQUEST) { 1441 | printk(KERN_ERR PFX "wait for CONNECT_REQUEST state %d\n", 1442 | cb->state); 1443 | return -1; 1444 | } 1445 | 1446 | if (!reg_supported(cb->child_cm_id->device)) 1447 | return -EINVAL; 1448 | 1449 | return 0; 1450 | } 1451 | 1452 | static void krping_run_server(struct krping_cb *cb) 1453 | { 1454 | const struct ib_recv_wr *bad_wr; 1455 | int ret; 1456 | 1457 | ret = krping_bind_server(cb); 1458 | if (ret) 1459 | return; 1460 | 1461 | ret = krping_setup_qp(cb, cb->child_cm_id); 1462 | if (ret) { 1463 | printk(KERN_ERR PFX "setup_qp failed: %d\n", ret); 1464 | goto err0; 1465 | } 1466 | 1467 | ret = krping_setup_buffers(cb); 1468 | if (ret) { 1469 | printk(KERN_ERR PFX "krping_setup_buffers failed: %d\n", ret); 1470 | goto err1; 1471 | } 1472 | 1473 | ret = ib_post_recv(cb->qp, &cb->rq_wr, &bad_wr); 1474 | if (ret) { 1475 | printk(KERN_ERR PFX "ib_post_recv failed: %d\n", ret); 1476 | goto err2; 1477 | } 1478 | 1479 | ret = krping_accept(cb); 1480 | if (ret) { 1481 | printk(KERN_ERR PFX "connect error %d\n", ret); 1482 | goto err2; 1483 | } 1484 | 1485 | if (cb->wlat) 1486 | krping_wlat_test_server(cb); 1487 | else if (cb->rlat) 1488 | krping_rlat_test_server(cb); 1489 | else if (cb->bw) 1490 | krping_bw_test_server(cb); 1491 | else 1492 | krping_test_server(cb); 1493 | rdma_disconnect(cb->child_cm_id); 1494 | err2: 1495 | krping_free_buffers(cb); 1496 | err1: 1497 | krping_free_qp(cb); 1498 | err0: 1499 | rdma_destroy_id(cb->child_cm_id); 1500 | } 1501 | 1502 | static void krping_test_client(struct krping_cb *cb) 1503 | { 1504 | int ping, start, cc, i, ret; 1505 | const struct ib_send_wr *bad_wr; 1506 | unsigned char c; 1507 | 1508 | start = 65; 1509 | for (ping = 0; !cb->count || ping < cb->count; ping++) { 1510 | cb->state = RDMA_READ_ADV; 1511 | 1512 | /* Put some ascii text in the buffer. */ 1513 | cc = sprintf(cb->start_buf, "rdma-ping-%d: ", ping); 1514 | for (i = cc, c = start; i < cb->size; i++) { 1515 | cb->start_buf[i] = c; 1516 | c++; 1517 | if (c > 122) 1518 | c = 65; 1519 | } 1520 | start++; 1521 | if (start > 122) 1522 | start = 65; 1523 | cb->start_buf[cb->size - 1] = 0; 1524 | 1525 | krping_format_send(cb, cb->start_dma_addr); 1526 | if (cb->state == ERROR) { 1527 | printk(KERN_ERR PFX "krping_format_send failed\n"); 1528 | break; 1529 | } 1530 | ret = ib_post_send(cb->qp, &cb->sq_wr, &bad_wr); 1531 | if (ret) { 1532 | printk(KERN_ERR PFX "post send error %d\n", ret); 1533 | break; 1534 | } 1535 | 1536 | /* Wait for server to ACK */ 1537 | wait_event_interruptible(cb->sem, cb->state >= RDMA_WRITE_ADV); 1538 | if (cb->state != RDMA_WRITE_ADV) { 1539 | printk(KERN_ERR PFX 1540 | "wait for RDMA_WRITE_ADV state %d\n", 1541 | cb->state); 1542 | break; 1543 | } 1544 | 1545 | krping_format_send(cb, cb->rdma_dma_addr); 1546 | ret = ib_post_send(cb->qp, &cb->sq_wr, &bad_wr); 1547 | if (ret) { 1548 | printk(KERN_ERR PFX "post send error %d\n", ret); 1549 | break; 1550 | } 1551 | 1552 | /* Wait for the server to say the RDMA Write is complete. */ 1553 | wait_event_interruptible(cb->sem, 1554 | cb->state >= RDMA_WRITE_COMPLETE); 1555 | if (cb->state != RDMA_WRITE_COMPLETE) { 1556 | printk(KERN_ERR PFX 1557 | "wait for RDMA_WRITE_COMPLETE state %d\n", 1558 | cb->state); 1559 | break; 1560 | } 1561 | 1562 | if (cb->validate) 1563 | if (memcmp(cb->start_buf, cb->rdma_buf, cb->size)) { 1564 | printk(KERN_ERR PFX "data mismatch!\n"); 1565 | break; 1566 | } 1567 | 1568 | if (cb->verbose) 1569 | printk(KERN_INFO PFX "ping data (64B max): |%.64s|\n", 1570 | cb->rdma_buf); 1571 | #ifdef SLOW_KRPING 1572 | wait_event_interruptible_timeout(cb->sem, cb->state == ERROR, HZ); 1573 | #endif 1574 | } 1575 | } 1576 | 1577 | static void krping_rlat_test_client(struct krping_cb *cb) 1578 | { 1579 | const struct ib_send_wr *bad_wr; 1580 | struct ib_wc wc; 1581 | int ret; 1582 | 1583 | cb->state = RDMA_READ_ADV; 1584 | 1585 | /* Send STAG/TO/Len to client */ 1586 | krping_format_send(cb, cb->start_dma_addr); 1587 | if (cb->state == ERROR) { 1588 | printk(KERN_ERR PFX "krping_format_send failed\n"); 1589 | return; 1590 | } 1591 | ret = ib_post_send(cb->qp, &cb->sq_wr, &bad_wr); 1592 | if (ret) { 1593 | printk(KERN_ERR PFX "post send error %d\n", ret); 1594 | return; 1595 | } 1596 | 1597 | /* Spin waiting for send completion */ 1598 | while ((ret = ib_poll_cq(cb->cq, 1, &wc) == 0)); 1599 | if (ret < 0) { 1600 | printk(KERN_ERR PFX "poll error %d\n", ret); 1601 | return; 1602 | } 1603 | if (wc.status) { 1604 | printk(KERN_ERR PFX "send completion error %d\n", wc.status); 1605 | return; 1606 | } 1607 | 1608 | /* Spin waiting for server's Start STAG/TO/Len */ 1609 | while (cb->state < RDMA_WRITE_ADV) { 1610 | krping_cq_event_handler(cb->cq, cb); 1611 | } 1612 | 1613 | #if 0 1614 | { 1615 | int i; 1616 | ktime_t start, stop; 1617 | time_t sec; 1618 | suseconds_t usec; 1619 | unsigned long long elapsed; 1620 | struct ib_wc wc; 1621 | struct ib_send_wr *bad_wr; 1622 | int ne; 1623 | 1624 | cb->rdma_sq_wr.wr.opcode = IB_WR_RDMA_WRITE; 1625 | cb->rdma_sq_wr.rkey = cb->remote_rkey; 1626 | cb->rdma_sq_wr.remote_addr = cb->remote_addr; 1627 | cb->rdma_sq_wr.wr.sg_list->length = 0; 1628 | cb->rdma_sq_wr.wr.num_sge = 0; 1629 | 1630 | start = ktime_get(); 1631 | for (i=0; i < 100000; i++) { 1632 | if (ib_post_send(cb->qp, &cb->rdma_sq_wr.wr, &bad_wr)) { 1633 | printk(KERN_ERR PFX "Couldn't post send\n"); 1634 | return; 1635 | } 1636 | do { 1637 | ne = ib_poll_cq(cb->cq, 1, &wc); 1638 | } while (ne == 0); 1639 | if (ne < 0) { 1640 | printk(KERN_ERR PFX "poll CQ failed %d\n", ne); 1641 | return; 1642 | } 1643 | if (wc.status != IB_WC_SUCCESS) { 1644 | printk(KERN_ERR PFX "Completion wth error at %s:\n", 1645 | cb->server ? "server" : "client"); 1646 | printk(KERN_ERR PFX "Failed status %d: wr_id %d\n", 1647 | wc.status, (int) wc.wr_id); 1648 | return; 1649 | } 1650 | } 1651 | stop = ktime_get(); 1652 | 1653 | if (stop.tv_usec < start.tv_usec) { 1654 | stop.tv_usec += 1000000; 1655 | stop.tv_sec -= 1; 1656 | } 1657 | sec = stop.tv_sec - start.tv_sec; 1658 | usec = stop.tv_usec - start.tv_usec; 1659 | elapsed = sec * 1000000 + usec; 1660 | printk(KERN_ERR PFX "0B-write-lat iters 100000 usec %llu\n", elapsed); 1661 | } 1662 | #endif 1663 | 1664 | rlat_test(cb); 1665 | } 1666 | 1667 | static void krping_wlat_test_client(struct krping_cb *cb) 1668 | { 1669 | const struct ib_send_wr *bad_wr; 1670 | struct ib_wc wc; 1671 | int ret; 1672 | 1673 | cb->state = RDMA_READ_ADV; 1674 | 1675 | /* Send STAG/TO/Len to client */ 1676 | krping_format_send(cb, cb->start_dma_addr); 1677 | if (cb->state == ERROR) { 1678 | printk(KERN_ERR PFX "krping_format_send failed\n"); 1679 | return; 1680 | } 1681 | ret = ib_post_send(cb->qp, &cb->sq_wr, &bad_wr); 1682 | if (ret) { 1683 | printk(KERN_ERR PFX "post send error %d\n", ret); 1684 | return; 1685 | } 1686 | 1687 | /* Spin waiting for send completion */ 1688 | while ((ret = ib_poll_cq(cb->cq, 1, &wc) == 0)); 1689 | if (ret < 0) { 1690 | printk(KERN_ERR PFX "poll error %d\n", ret); 1691 | return; 1692 | } 1693 | if (wc.status) { 1694 | printk(KERN_ERR PFX "send completion error %d\n", wc.status); 1695 | return; 1696 | } 1697 | 1698 | /* Spin waiting for server's Start STAG/TO/Len */ 1699 | while (cb->state < RDMA_WRITE_ADV) { 1700 | krping_cq_event_handler(cb->cq, cb); 1701 | } 1702 | 1703 | wlat_test(cb); 1704 | } 1705 | 1706 | static void krping_bw_test_client(struct krping_cb *cb) 1707 | { 1708 | const struct ib_send_wr *bad_wr; 1709 | struct ib_wc wc; 1710 | int ret; 1711 | 1712 | cb->state = RDMA_READ_ADV; 1713 | 1714 | /* Send STAG/TO/Len to client */ 1715 | krping_format_send(cb, cb->start_dma_addr); 1716 | if (cb->state == ERROR) { 1717 | printk(KERN_ERR PFX "krping_format_send failed\n"); 1718 | return; 1719 | } 1720 | ret = ib_post_send(cb->qp, &cb->sq_wr, &bad_wr); 1721 | if (ret) { 1722 | printk(KERN_ERR PFX "post send error %d\n", ret); 1723 | return; 1724 | } 1725 | 1726 | /* Spin waiting for send completion */ 1727 | while ((ret = ib_poll_cq(cb->cq, 1, &wc) == 0)); 1728 | if (ret < 0) { 1729 | printk(KERN_ERR PFX "poll error %d\n", ret); 1730 | return; 1731 | } 1732 | if (wc.status) { 1733 | printk(KERN_ERR PFX "send completion error %d\n", wc.status); 1734 | return; 1735 | } 1736 | 1737 | /* Spin waiting for server's Start STAG/TO/Len */ 1738 | while (cb->state < RDMA_WRITE_ADV) { 1739 | krping_cq_event_handler(cb->cq, cb); 1740 | } 1741 | 1742 | bw_test(cb); 1743 | } 1744 | 1745 | /* 1746 | * Manual qp flush test 1747 | */ 1748 | static void flush_qp(struct krping_cb *cb) 1749 | { 1750 | struct ib_send_wr wr = { 0 }; 1751 | const struct ib_send_wr *bad; 1752 | struct ib_recv_wr recv_wr = { 0 }; 1753 | const struct ib_recv_wr *recv_bad; 1754 | struct ib_wc wc; 1755 | int ret; 1756 | int flushed = 0; 1757 | int ccnt = 0; 1758 | 1759 | rdma_disconnect(cb->cm_id); 1760 | DEBUG_LOG("disconnected!\n"); 1761 | 1762 | wr.opcode = IB_WR_SEND; 1763 | wr.wr_id = 0xdeadbeefcafebabe; 1764 | ret = ib_post_send(cb->qp, &wr, &bad); 1765 | if (ret) { 1766 | printk(KERN_ERR PFX "%s post_send failed ret %d\n", __func__, ret); 1767 | return; 1768 | } 1769 | 1770 | recv_wr.wr_id = 0xcafebabedeadbeef; 1771 | ret = ib_post_recv(cb->qp, &recv_wr, &recv_bad); 1772 | if (ret) { 1773 | printk(KERN_ERR PFX "%s post_recv failed ret %d\n", __func__, ret); 1774 | return; 1775 | } 1776 | 1777 | /* poll until the flush WRs complete */ 1778 | do { 1779 | ret = ib_poll_cq(cb->cq, 1, &wc); 1780 | if (ret < 0) { 1781 | printk(KERN_ERR PFX "ib_poll_cq failed %d\n", ret); 1782 | return; 1783 | } 1784 | if (ret == 0) 1785 | continue; 1786 | ccnt++; 1787 | if (wc.wr_id == 0xdeadbeefcafebabe || 1788 | wc.wr_id == 0xcafebabedeadbeef) 1789 | flushed++; 1790 | } while (flushed != 2); 1791 | DEBUG_LOG("qp_flushed! ccnt %u\n", ccnt); 1792 | } 1793 | 1794 | static unsigned long get_seconds(void) 1795 | { 1796 | time64_t sec; 1797 | 1798 | sec = ktime_get_seconds(); 1799 | return (unsigned long)sec; 1800 | } 1801 | 1802 | static void krping_fr_test(struct krping_cb *cb) 1803 | { 1804 | struct ib_send_wr inv; 1805 | const struct ib_send_wr *bad; 1806 | struct ib_reg_wr fr; 1807 | struct ib_wc wc; 1808 | u8 key = 0; 1809 | struct ib_mr *mr; 1810 | int ret; 1811 | int size = cb->size; 1812 | int plen = (((size - 1) & PAGE_MASK) + PAGE_SIZE) >> PAGE_SHIFT; 1813 | unsigned long start; 1814 | int count = 0; 1815 | int scnt = 0; 1816 | struct scatterlist sg = {0}; 1817 | 1818 | mr = ib_alloc_mr(cb->pd, IB_MR_TYPE_MEM_REG, plen); 1819 | if (IS_ERR(mr)) { 1820 | printk(KERN_ERR PFX "ib_alloc_mr failed %ld\n", PTR_ERR(mr)); 1821 | return; 1822 | } 1823 | 1824 | sg_dma_address(&sg) = (dma_addr_t)0xcafebabe0000ULL; 1825 | sg_dma_len(&sg) = size; 1826 | ret = ib_map_mr_sg(mr, &sg, 1, NULL, PAGE_SIZE); 1827 | if (ret <= 0) { 1828 | printk(KERN_ERR PFX "ib_map_mr_sge err %d\n", ret); 1829 | goto err2; 1830 | } 1831 | 1832 | memset(&fr, 0, sizeof fr); 1833 | fr.wr.opcode = IB_WR_REG_MR; 1834 | fr.access = IB_ACCESS_REMOTE_WRITE | IB_ACCESS_LOCAL_WRITE; 1835 | fr.mr = mr; 1836 | fr.wr.next = &inv; 1837 | 1838 | memset(&inv, 0, sizeof inv); 1839 | inv.opcode = IB_WR_LOCAL_INV; 1840 | inv.send_flags = IB_SEND_SIGNALED; 1841 | 1842 | DEBUG_LOG("fr_test: stag index 0x%x plen %u size %u depth %u\n", mr->rkey >> 8, plen, cb->size, cb->txdepth); 1843 | start = get_seconds(); 1844 | while (!cb->count || count <= cb->count) { 1845 | if (signal_pending(current)) { 1846 | printk(KERN_ERR PFX "signal!\n"); 1847 | break; 1848 | } 1849 | if ((get_seconds() - start) >= 9) { 1850 | DEBUG_LOG("fr_test: pausing 1 second! count %u latest size %u plen %u\n", count, size, plen); 1851 | wait_event_interruptible_timeout(cb->sem, cb->state == ERROR, HZ); 1852 | if (cb->state == ERROR) 1853 | break; 1854 | start = get_seconds(); 1855 | } 1856 | while (scnt < (cb->txdepth>>1)) { 1857 | ib_update_fast_reg_key(mr, ++key); 1858 | fr.key = mr->rkey; 1859 | inv.ex.invalidate_rkey = mr->rkey; 1860 | 1861 | size = get_random_u32() % cb->size; 1862 | if (size == 0) 1863 | size = cb->size; 1864 | sg_dma_len(&sg) = size; 1865 | ret = ib_map_mr_sg(mr, &sg, 1, NULL, PAGE_SIZE); 1866 | if (ret <= 0) { 1867 | printk(KERN_ERR PFX "ib_map_mr_sge err %d\n", ret); 1868 | goto err2; 1869 | } 1870 | ret = ib_post_send(cb->qp, &fr.wr, &bad); 1871 | if (ret) { 1872 | printk(KERN_ERR PFX "ib_post_send failed %d\n", ret); 1873 | goto err2; 1874 | } 1875 | scnt++; 1876 | } 1877 | 1878 | ret = ib_poll_cq(cb->cq, 1, &wc); 1879 | if (ret < 0) { 1880 | printk(KERN_ERR PFX "ib_poll_cq failed %d\n", ret); 1881 | goto err2; 1882 | } 1883 | if (ret == 1) { 1884 | if (wc.status) { 1885 | printk(KERN_ERR PFX "completion error %u\n", wc.status); 1886 | goto err2; 1887 | } 1888 | count++; 1889 | scnt--; 1890 | } 1891 | } 1892 | err2: 1893 | flush_qp(cb); 1894 | DEBUG_LOG("fr_test: done!\n"); 1895 | ib_dereg_mr(mr); 1896 | } 1897 | 1898 | static int krping_connect_client(struct krping_cb *cb) 1899 | { 1900 | struct rdma_conn_param conn_param; 1901 | int ret; 1902 | 1903 | memset(&conn_param, 0, sizeof conn_param); 1904 | conn_param.responder_resources = 1; 1905 | conn_param.initiator_depth = 1; 1906 | conn_param.retry_count = 10; 1907 | 1908 | ret = rdma_connect(cb->cm_id, &conn_param); 1909 | if (ret) { 1910 | printk(KERN_ERR PFX "rdma_connect error %d\n", ret); 1911 | return ret; 1912 | } 1913 | 1914 | wait_event_interruptible(cb->sem, cb->state >= CONNECTED); 1915 | if (cb->state == ERROR) { 1916 | printk(KERN_ERR PFX "wait for CONNECTED state %d\n", cb->state); 1917 | return -1; 1918 | } 1919 | 1920 | DEBUG_LOG("rdma_connect successful\n"); 1921 | return 0; 1922 | } 1923 | 1924 | static int krping_bind_client(struct krping_cb *cb) 1925 | { 1926 | struct sockaddr_storage sin; 1927 | int ret; 1928 | 1929 | fill_sockaddr(&sin, cb); 1930 | 1931 | ret = rdma_resolve_addr(cb->cm_id, NULL, (struct sockaddr *)&sin, 2000); 1932 | if (ret) { 1933 | printk(KERN_ERR PFX "rdma_resolve_addr error %d\n", ret); 1934 | return ret; 1935 | } 1936 | 1937 | wait_event_interruptible(cb->sem, cb->state >= ROUTE_RESOLVED); 1938 | if (cb->state != ROUTE_RESOLVED) { 1939 | printk(KERN_ERR PFX 1940 | "addr/route resolution did not resolve: state %d\n", 1941 | cb->state); 1942 | return -EINTR; 1943 | } 1944 | 1945 | if (!reg_supported(cb->cm_id->device)) 1946 | return -EINVAL; 1947 | 1948 | DEBUG_LOG("rdma_resolve_addr - rdma_resolve_route successful\n"); 1949 | return 0; 1950 | } 1951 | 1952 | static void krping_run_client(struct krping_cb *cb) 1953 | { 1954 | const struct ib_recv_wr *bad_wr; 1955 | int ret; 1956 | 1957 | /* set type of service, if any */ 1958 | if (cb->tos != 0) 1959 | rdma_set_service_type(cb->cm_id, cb->tos); 1960 | 1961 | ret = krping_bind_client(cb); 1962 | if (ret) 1963 | return; 1964 | 1965 | ret = krping_setup_qp(cb, cb->cm_id); 1966 | if (ret) { 1967 | printk(KERN_ERR PFX "setup_qp failed: %d\n", ret); 1968 | return; 1969 | } 1970 | 1971 | ret = krping_setup_buffers(cb); 1972 | if (ret) { 1973 | printk(KERN_ERR PFX "krping_setup_buffers failed: %d\n", ret); 1974 | goto err1; 1975 | } 1976 | 1977 | ret = ib_post_recv(cb->qp, &cb->rq_wr, &bad_wr); 1978 | if (ret) { 1979 | printk(KERN_ERR PFX "ib_post_recv failed: %d\n", ret); 1980 | goto err2; 1981 | } 1982 | 1983 | ret = krping_connect_client(cb); 1984 | if (ret) { 1985 | printk(KERN_ERR PFX "connect error %d\n", ret); 1986 | goto err2; 1987 | } 1988 | 1989 | if (cb->wlat) 1990 | krping_wlat_test_client(cb); 1991 | else if (cb->rlat) 1992 | krping_rlat_test_client(cb); 1993 | else if (cb->bw) 1994 | krping_bw_test_client(cb); 1995 | else if (cb->frtest) 1996 | krping_fr_test(cb); 1997 | else 1998 | krping_test_client(cb); 1999 | rdma_disconnect(cb->cm_id); 2000 | err2: 2001 | krping_free_buffers(cb); 2002 | err1: 2003 | krping_free_qp(cb); 2004 | } 2005 | 2006 | static int krping_doit(char *cmd) 2007 | { 2008 | struct krping_cb *cb; 2009 | int op; 2010 | int ret = 0; 2011 | char *optarg; 2012 | char *scope; 2013 | unsigned long optint; 2014 | 2015 | cb = kzalloc(sizeof(*cb), GFP_KERNEL); 2016 | if (!cb) 2017 | return -ENOMEM; 2018 | 2019 | mutex_lock(&krping_mutex); 2020 | list_add_tail(&cb->list, &krping_cbs); 2021 | mutex_unlock(&krping_mutex); 2022 | 2023 | cb->server = -1; 2024 | cb->state = IDLE; 2025 | cb->size = 64; 2026 | cb->txdepth = RPING_SQ_DEPTH; 2027 | init_waitqueue_head(&cb->sem); 2028 | 2029 | while ((op = krping_getopt("krping", &cmd, krping_opts, NULL, &optarg, 2030 | &optint)) != 0) { 2031 | switch (op) { 2032 | case 'a': 2033 | cb->addr_str = optarg; 2034 | in4_pton(optarg, -1, cb->addr, -1, NULL); 2035 | cb->addr_type = AF_INET; 2036 | DEBUG_LOG("ipaddr (%s)\n", optarg); 2037 | break; 2038 | case 'A': 2039 | cb->addr_str = optarg; 2040 | scope = strstr(optarg, "%"); 2041 | if (scope != NULL) { 2042 | *scope++ = 0; 2043 | strncpy(cb->ip6_ndev_name, scope, 2044 | sizeof(cb->ip6_ndev_name)); 2045 | /* force zero-termination */ 2046 | cb->ip6_ndev_name[ 2047 | sizeof(cb->ip6_ndev_name) - 1] = 0; 2048 | } 2049 | in6_pton(optarg, -1, cb->addr, -1, NULL); 2050 | cb->addr_type = AF_INET6; 2051 | DEBUG_LOG("ipv6addr (%s)\n", optarg); 2052 | break; 2053 | case 'p': 2054 | cb->port = htons(optint); 2055 | DEBUG_LOG("port %d\n", (int)optint); 2056 | break; 2057 | case 'P': 2058 | cb->poll = 1; 2059 | DEBUG_LOG("server\n"); 2060 | break; 2061 | case 's': 2062 | cb->server = 1; 2063 | DEBUG_LOG("server\n"); 2064 | break; 2065 | case 'c': 2066 | cb->server = 0; 2067 | DEBUG_LOG("client\n"); 2068 | break; 2069 | case 'S': 2070 | cb->size = optint; 2071 | if ((cb->size < 1) || 2072 | (cb->size > RPING_BUFSIZE)) { 2073 | printk(KERN_ERR PFX "Invalid size %d " 2074 | "(valid range is 1 to %d)\n", 2075 | cb->size, RPING_BUFSIZE); 2076 | ret = EINVAL; 2077 | } else 2078 | DEBUG_LOG("size %d\n", (int)optint); 2079 | break; 2080 | case 'C': 2081 | cb->count = optint; 2082 | if (cb->count < 0) { 2083 | printk(KERN_ERR PFX "Invalid count %d\n", 2084 | cb->count); 2085 | ret = EINVAL; 2086 | } else 2087 | DEBUG_LOG("count %d\n", (int) cb->count); 2088 | break; 2089 | case 'v': 2090 | cb->verbose++; 2091 | DEBUG_LOG("verbose\n"); 2092 | break; 2093 | case 'V': 2094 | cb->validate++; 2095 | DEBUG_LOG("validate data\n"); 2096 | break; 2097 | case 'l': 2098 | cb->wlat++; 2099 | break; 2100 | case 'L': 2101 | cb->rlat++; 2102 | break; 2103 | case 'B': 2104 | cb->bw++; 2105 | break; 2106 | case 'd': 2107 | cb->duplex++; 2108 | break; 2109 | case 'I': 2110 | cb->server_invalidate = 1; 2111 | break; 2112 | case 't': 2113 | cb->tos = optint; 2114 | DEBUG_LOG("type of service, tos=%d\n", (int) cb->tos); 2115 | break; 2116 | case 'T': 2117 | cb->txdepth = optint; 2118 | DEBUG_LOG("txdepth %d\n", (int) cb->txdepth); 2119 | break; 2120 | case 'Z': 2121 | cb->local_dma_lkey = 1; 2122 | DEBUG_LOG("using local dma lkey\n"); 2123 | break; 2124 | case 'R': 2125 | cb->read_inv = 1; 2126 | DEBUG_LOG("using read-with-inv\n"); 2127 | break; 2128 | case 'f': 2129 | cb->frtest = 1; 2130 | DEBUG_LOG("fast-reg test!\n"); 2131 | break; 2132 | default: 2133 | printk(KERN_ERR PFX "unknown opt %s\n", optarg); 2134 | ret = -EINVAL; 2135 | break; 2136 | } 2137 | } 2138 | if (ret) 2139 | goto out; 2140 | 2141 | if (cb->server == -1) { 2142 | printk(KERN_ERR PFX "must be either client or server\n"); 2143 | ret = -EINVAL; 2144 | goto out; 2145 | } 2146 | 2147 | if (cb->server && cb->frtest) { 2148 | printk(KERN_ERR PFX "must be client to run frtest\n"); 2149 | ret = -EINVAL; 2150 | goto out; 2151 | } 2152 | 2153 | if ((cb->frtest + cb->bw + cb->rlat + cb->wlat) > 1) { 2154 | printk(KERN_ERR PFX "Pick only one test: fr, bw, rlat, wlat\n"); 2155 | ret = -EINVAL; 2156 | goto out; 2157 | } 2158 | 2159 | if (cb->wlat || cb->rlat || cb->bw) { 2160 | printk(KERN_ERR PFX "wlat, rlat, and bw tests only support mem_mode MR - which is no longer supported\n"); 2161 | ret = -EINVAL; 2162 | goto out; 2163 | } 2164 | 2165 | cb->cm_id = rdma_create_id(&init_net, krping_cma_event_handler, cb, RDMA_PS_TCP, IB_QPT_RC); 2166 | if (IS_ERR(cb->cm_id)) { 2167 | ret = PTR_ERR(cb->cm_id); 2168 | printk(KERN_ERR PFX "rdma_create_id error %d\n", ret); 2169 | goto out; 2170 | } 2171 | DEBUG_LOG("created cm_id %p\n", cb->cm_id); 2172 | 2173 | if (cb->server) 2174 | krping_run_server(cb); 2175 | else 2176 | krping_run_client(cb); 2177 | 2178 | DEBUG_LOG("destroy cm_id %p\n", cb->cm_id); 2179 | rdma_destroy_id(cb->cm_id); 2180 | out: 2181 | mutex_lock(&krping_mutex); 2182 | list_del(&cb->list); 2183 | mutex_unlock(&krping_mutex); 2184 | kfree(cb); 2185 | return ret; 2186 | } 2187 | 2188 | /* 2189 | * Read proc returns stats for each device. 2190 | */ 2191 | static int krping_read_proc(struct seq_file *seq, void *v) 2192 | { 2193 | struct krping_cb *cb; 2194 | int num = 1; 2195 | 2196 | if (!try_module_get(THIS_MODULE)) 2197 | return -ENODEV; 2198 | DEBUG_LOG(KERN_INFO PFX "proc read called...\n"); 2199 | mutex_lock(&krping_mutex); 2200 | list_for_each_entry(cb, &krping_cbs, list) { 2201 | if (cb->pd) { 2202 | seq_printf(seq, 2203 | "%d-%s %lld %lld %lld %lld %lld %lld %lld %lld\n", 2204 | num++, cb->pd->device->name, cb->stats.send_bytes, 2205 | cb->stats.send_msgs, cb->stats.recv_bytes, 2206 | cb->stats.recv_msgs, cb->stats.write_bytes, 2207 | cb->stats.write_msgs, 2208 | cb->stats.read_bytes, 2209 | cb->stats.read_msgs); 2210 | } else { 2211 | seq_printf(seq, "%d listen\n", num++); 2212 | } 2213 | } 2214 | mutex_unlock(&krping_mutex); 2215 | module_put(THIS_MODULE); 2216 | return 0; 2217 | } 2218 | 2219 | /* 2220 | * Write proc is used to start a ping client or server. 2221 | */ 2222 | static ssize_t krping_write_proc(struct file * file, const char __user * buffer, 2223 | size_t count, loff_t *ppos) 2224 | { 2225 | char *cmd; 2226 | int rc; 2227 | 2228 | if (!try_module_get(THIS_MODULE)) 2229 | return -ENODEV; 2230 | 2231 | cmd = kmalloc(count, GFP_KERNEL); 2232 | if (cmd == NULL) { 2233 | printk(KERN_ERR PFX "kmalloc failure\n"); 2234 | return -ENOMEM; 2235 | } 2236 | if (copy_from_user(cmd, buffer, count)) { 2237 | kfree(cmd); 2238 | return -EFAULT; 2239 | } 2240 | 2241 | /* 2242 | * remove the \n. 2243 | */ 2244 | cmd[count - 1] = 0; 2245 | DEBUG_LOG(KERN_INFO PFX "proc write |%s|\n", cmd); 2246 | rc = krping_doit(cmd); 2247 | kfree(cmd); 2248 | module_put(THIS_MODULE); 2249 | if (rc) 2250 | return rc; 2251 | else 2252 | return (int) count; 2253 | } 2254 | 2255 | static int krping_read_open(struct inode *inode, struct file *file) 2256 | { 2257 | return single_open(file, krping_read_proc, inode->i_private); 2258 | } 2259 | 2260 | static const struct proc_ops krping_ops = { 2261 | // .owner = THIS_MODULE, 2262 | .proc_open = krping_read_open, 2263 | .proc_read = seq_read, 2264 | .proc_write = krping_write_proc, 2265 | .proc_lseek = seq_lseek, 2266 | .proc_release = single_release, 2267 | }; 2268 | 2269 | static int __init krping_init(void) 2270 | { 2271 | DEBUG_LOG("krping_init\n"); 2272 | krping_proc = proc_create("krping", 0666, NULL, &krping_ops); 2273 | if (krping_proc == NULL) { 2274 | printk(KERN_ERR PFX "cannot create /proc/krping\n"); 2275 | return -ENOMEM; 2276 | } 2277 | return 0; 2278 | } 2279 | 2280 | static void __exit krping_exit(void) 2281 | { 2282 | DEBUG_LOG("krping_exit\n"); 2283 | remove_proc_entry("krping", NULL); 2284 | } 2285 | 2286 | module_init(krping_init); 2287 | module_exit(krping_exit); 2288 | --------------------------------------------------------------------------------