├── README.md ├── common └── siw_user.h ├── kernel ├── IMPLEMENTATION.txt ├── Kbuild ├── Makefile ├── README_KERNEL_VERSIONS.txt ├── iwarp.h ├── siw.h ├── siw_ae.c ├── siw_cm.c ├── siw_cm.h ├── siw_cq.c ├── siw_debug.c ├── siw_debug.h ├── siw_main.c ├── siw_mem.c ├── siw_obj.c ├── siw_obj.h ├── siw_qp.c ├── siw_qp_rx.c ├── siw_qp_tx.c ├── siw_verbs.c └── siw_verbs.h └── userlib ├── Makefile.am ├── autogen.sh ├── configure.in ├── libsiw.spec.in ├── siw.driver └── src ├── siw.c ├── siw.h ├── siw_abi.h ├── siw_uverbs.c └── siw_verbs.c /README.md: -------------------------------------------------------------------------------- 1 | ## DISCLAIMER 2 | 3 | **THIS CODE IS NOT LONGER MAINTAINED, SINCE THE SOFTIWARP DRIVER 4 | BECAME PART OF UPSTREAM LINUX WITH KERNEL 5.2, AND IS MAINTAINED 5 | THERE.** 6 | 7 | Please use the latest SoftiWarp driver and its user RDMA library 8 | from your Linux distro for running software RDMA over IP interfaces. 9 | We keep this zrlio siw repo only for reference. 10 | 11 | # SoftiWARP 12 | 13 | SoftiWARP (siw) is a software iWARP kernel driver and user library 14 | for Linux. It implements the iWARP protocol suite (MPA/DDP/RDMAP, 15 | IETF-RFC 5044/5041/5040) completely in software, without requiring 16 | any dedicated RDMA hardware. It comprises a loadable Linux kernel 17 | module `siw` located in `kernel/` and a user level library `libsiw` 18 | located in `userlib/`. 19 | 20 | 21 | SoftiWarp targets for integration with the OpenFabrics (OFA) 22 | ecosystem. For OFA integration, it is written against its kernel 23 | and user level interfaces. 24 | 25 | SoftiWARP supports both user level and kernel level applications. 26 | It makes use of the OFA connection manager to set up connections. 27 | The kernel component runs on top of TCP kernel sockets. 28 | 29 | ## Code structure 30 | ```bash 31 | kernel/: kernel module 32 | userlib/: user library 33 | common/: common include file(s) 34 | ``` 35 | 36 | ## Build and install 37 | 38 | ### Linux kernel versions 39 | 40 | SoftiWARP code tries to stay up to date with recent Linux kernels. 41 | Git `master` is supposed to run on the newest stable kernel. 42 | To ease code maintenance and to allow for back porting 43 | of any new features, old versions of SoftiWARP will be branched 44 | off with discriptive names. `master` is always tagged with the kernel 45 | version it matches. 46 | 47 | Re-installing a newer SoftiWARP version after a kernel upgrade shall include 48 | making and re-installing both user library and kernel module 49 | (see below). 50 | 51 | ### User-space library 52 | 53 | ```bash 54 | cd /path/to/your/clone/userlib 55 | ./autogen.sh 56 | ./configure 57 | make install 58 | ``` 59 | 60 | ### Kernel module 61 | To build: 62 | ```bash 63 | cd /path/to/your/clone/kernel 64 | make 65 | ``` 66 | 67 | To load: 68 | 69 | settings 1: for starting TX threads on available CPUs 70 | (check dmesg which CPUs runs TX thread) 71 | ```bash 72 | sudo insmod ./siw.ko 73 | ``` 74 | 75 | setting 2: for starting TX thread on all CPUs given in 76 | comma separated list, if CPU available 77 | ```bash 78 | sudo insmod ./siw.ko tx_cpu_list=[n,m,...] 79 | ``` 80 | 81 | ## Contributions 82 | 83 | PRs are always welcome. Please fork, and make necessary modifications 84 | you propose, and let us know. 85 | 86 | ## Contact 87 | 88 | If you have questions or suggestions, feel free to post at: 89 | 90 | https://groups.google.com/forum/#!forum/zrlio-users 91 | 92 | or email: zrlio-users@googlegroups.com 93 | 94 | -------------------------------------------------------------------------------- /common/siw_user.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Software iWARP device driver for Linux 3 | * 4 | * Authors: Bernard Metzler 5 | * 6 | * Copyright (c) 2008-2016, IBM Corporation 7 | * 8 | * This software is available to you under a choice of one of two 9 | * licenses. You may choose to be licensed under the terms of the GNU 10 | * General Public License (GPL) Version 2, available from the file 11 | * COPYING in the main directory of this source tree, or the 12 | * BSD license below: 13 | * 14 | * Redistribution and use in source and binary forms, with or 15 | * without modification, are permitted provided that the following 16 | * conditions are met: 17 | * 18 | * - Redistributions of source code must retain the above copyright notice, 19 | * this list of conditions and the following disclaimer. 20 | * 21 | * - Redistributions in binary form must reproduce the above copyright 22 | * notice, this list of conditions and the following disclaimer in the 23 | * documentation and/or other materials provided with the distribution. 24 | * 25 | * - Neither the name of IBM nor the names of its contributors may be 26 | * used to endorse or promote products derived from this software without 27 | * specific prior written permission. 28 | * 29 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 30 | * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 31 | * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 32 | * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 33 | * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 34 | * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 35 | * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 36 | * SOFTWARE. 37 | */ 38 | 39 | #ifndef _SIW_USER_H 40 | #define _SIW_USER_H 41 | 42 | #ifdef __KERNEL__ 43 | #include 44 | #else 45 | #include 46 | #endif 47 | 48 | /* 49 | * user commands/command responses must correlate with the siw_abi 50 | * in user land. 51 | */ 52 | /*Common string that is matched to accept the device by the user library*/ 53 | #define SIW_NODE_DESC_COMMON "Software iWARP stack" 54 | 55 | #define SIW_IBDEV_PREFIX "siw_" 56 | 57 | #define VERSION_ID_SOFTIWARP 2 58 | 59 | #define SIW_MAX_SGE 6 60 | #define SIW_MAX_UOBJ_KEY 0xffffff 61 | #define SIW_INVAL_UOBJ_KEY (SIW_MAX_UOBJ_KEY + 1) 62 | 63 | struct siw_uresp_create_cq { 64 | uint32_t cq_id; 65 | uint32_t num_cqe; 66 | uint32_t cq_key; 67 | }; 68 | 69 | struct siw_uresp_create_qp { 70 | uint32_t qp_id; 71 | uint32_t num_sqe; 72 | uint32_t num_rqe; 73 | uint32_t sq_key; 74 | uint32_t rq_key; 75 | }; 76 | 77 | struct siw_ureq_reg_mr { 78 | uint8_t stag_key; 79 | uint8_t reserved[3]; 80 | }; 81 | 82 | struct siw_uresp_reg_mr { 83 | uint32_t stag; 84 | }; 85 | 86 | struct siw_uresp_create_srq { 87 | uint32_t num_rqe; 88 | uint32_t srq_key; 89 | }; 90 | 91 | struct siw_uresp_alloc_ctx { 92 | uint32_t dev_id; 93 | uint32_t rdma_db_nr; 94 | }; 95 | 96 | enum siw_db_type { 97 | SIW_DB_SQ = 1 98 | }; 99 | 100 | enum siw_opcode { 101 | SIW_OP_WRITE = 0, 102 | SIW_OP_READ = 1, 103 | SIW_OP_SEND = 2, 104 | SIW_OP_SEND_WITH_IMM = 3, 105 | 106 | /* Unsupported */ 107 | SIW_OP_FETCH_AND_ADD = 4, 108 | SIW_OP_COMP_AND_SWAP = 5, 109 | SIW_OP_INVAL_STAG = 6, 110 | 111 | SIW_OP_RECEIVE = 7, 112 | #ifdef __KERNEL__ 113 | SIW_OP_READ_RESPONSE = 8, /* provider internal */ 114 | SIW_NUM_OPCODES = 9, 115 | #else 116 | SIW_NUM_OPCODES = 8, 117 | #endif 118 | SIW_OP_INVALID = SIW_NUM_OPCODES + 1 119 | }; 120 | 121 | /* Keep it same as ibv_sge to allow for memcpy */ 122 | struct siw_sge { 123 | uint64_t laddr; 124 | uint32_t length; 125 | uint32_t lkey; 126 | }; 127 | 128 | /* 129 | * Inline data are kept within the work request itself occupying 130 | * the space of sge[1] .. sge[n]. Therefore, inline data cannot be 131 | * supported if SIW_MAX_SGE is below 2 elements. 132 | */ 133 | #define SIW_MAX_INLINE (sizeof(struct siw_sge) * (SIW_MAX_SGE - 1)) 134 | 135 | #if SIW_MAX_SGE < 2 136 | #error "SIW_MAX_SGE must be at least 2" 137 | #endif 138 | 139 | enum siw_wqe_flags { 140 | SIW_WQE_VALID = 1, 141 | SIW_WQE_INLINE = (1 << 1), 142 | SIW_WQE_SIGNALLED = (1 << 2), 143 | SIW_WQE_SOLICITED = (1 << 3), 144 | SIW_WQE_READ_FENCE = (1 << 4), 145 | SIW_WQE_COMPLETED = (1 << 5) 146 | }; 147 | 148 | /* Minimum sized Send Queue Element */ 149 | struct siw_sqe { 150 | uint64_t id; 151 | uint16_t flags; 152 | uint8_t num_sge; 153 | uint8_t opcode; /* Actual enum siw_opcode values */ 154 | uint32_t rkey; 155 | uint64_t raddr; 156 | struct siw_sge sge[SIW_MAX_SGE]; 157 | }; 158 | 159 | struct siw_rqe { 160 | uint64_t id; 161 | uint32_t flags; 162 | uint32_t num_sge; 163 | struct siw_sge sge[SIW_MAX_SGE]; 164 | }; 165 | 166 | enum siw_notify_flags { 167 | SIW_NOTIFY_NOT = (0), 168 | SIW_NOTIFY_SOLICITED = (1 << 0), 169 | SIW_NOTIFY_NEXT_COMPLETION = (1 << 1), 170 | SIW_NOTIFY_MISSED_EVENTS = (1 << 2), 171 | SIW_NOTIFY_ALL = SIW_NOTIFY_SOLICITED | 172 | SIW_NOTIFY_NEXT_COMPLETION | 173 | SIW_NOTIFY_MISSED_EVENTS 174 | }; 175 | 176 | enum siw_wc_status { 177 | SIW_WC_SUCCESS = 0, 178 | SIW_WC_LOC_LEN_ERR = 1, 179 | SIW_WC_LOC_PROT_ERR = 2, 180 | SIW_WC_LOC_QP_OP_ERR = 3, 181 | SIW_WC_WR_FLUSH_ERR = 4, 182 | SIW_WC_BAD_RESP_ERR = 5, 183 | SIW_WC_LOC_ACCESS_ERR = 6, 184 | SIW_WC_REM_ACCESS_ERR = 7, 185 | SIW_WC_GENERAL_ERR = 8, 186 | SIW_NUM_WC_STATUS = 9 187 | }; 188 | 189 | struct siw_cqe { 190 | uint64_t id; 191 | uint8_t flags; 192 | uint8_t opcode; 193 | uint16_t status; 194 | uint32_t bytes; 195 | uint64_t imm_data; 196 | /* QP number or QP pointer */ 197 | union { 198 | void *qp; 199 | uint64_t qp_id; 200 | }; 201 | }; 202 | 203 | /* 204 | * Shared structure between user and kernel 205 | * to control CQ arming. 206 | */ 207 | struct siw_cq_ctrl { 208 | enum siw_notify_flags notify; 209 | }; 210 | 211 | 212 | #endif 213 | -------------------------------------------------------------------------------- /kernel/IMPLEMENTATION.txt: -------------------------------------------------------------------------------- 1 | SoftiWARP: 'siw' Software iWARP kernel driver module. 2 | 3 | SoftiWARP (siw) implements the iWARP protocol suite (MPA/DDP/RDMAP, 4 | IETF-RFC 5044/5041/5040) completely in software as a Linux kernel module. 5 | Targeted for integration with OpenFabrics (OFA) interfaces, it appears as 6 | a kernel module in the drivers/infiniband/hw subdirectory of the Linux kernel. 7 | SoftiWARP exports the OFA RDMA verbs interface, currently useable only 8 | for user level applications. It makes use of the OFA connection manager 9 | to set up connections. siw runs on top of TCP kernel sockets. 10 | 11 | 12 | 13 | Status: 14 | ======= 15 | siw is work in progress. While the implementation is not complete, 16 | it implements basic connection management, all iWARP wire operations 17 | (SEND, READ, WRITE), and memory protection. 18 | 19 | 20 | 21 | Transmit Path: 22 | ============== 23 | If a send queue (SQ) work queue element (wqe) gets posted, siw tries 24 | first to send it directly out of the application context. If the SQ 25 | was non-empty, SQ processing is done by a kernel worker thread. 26 | This thread schedules work, if the tcp socket signals new write 27 | space to be available. If during send operation the socket send space 28 | get exhausted, SQ is abandoned until it resumes via write space available 29 | socket callback. 30 | 31 | Packet Fragmentation: 32 | --------------------- 33 | siw tries to take into account medium's MTU. FPDU's are constructed not to 34 | exceed the MTU to avoid fragmentation on the wire. Since TCP is a byte stream 35 | protocol, no guarantee can be given if FPDU's are not fragmented. 36 | 37 | Zero Copy Send: 38 | --------------- 39 | Where allowed by RDMA semantics, siw uses sendpage() for transmitting 40 | user data. This avoids a local data copy operation. As long as the data 41 | are not placed in peers target buffer, any changes to the content of 42 | the local buffer to be sent will result in non predictable target buffer 43 | content. Furthermore, if CRC checksumming is enabled, any change to non 44 | transmitted data already under control of TCP will result in CRC 45 | corruption. 46 | 47 | Current experimental indicators for using sendpage(): 48 | 49 | CRC is disabled, AND 50 | operation is a READ.response, OR 51 | operation is a non signalled SEND, OR 52 | operation is a non signalled WRITE. 53 | 54 | Furthermore, sendpage() gets used only after a certain threshold of 55 | payload data. All sendpage() usage is experimental and will be extended 56 | to guarantee that the memory must stay resident until the data are 57 | transmitted. 58 | 59 | 60 | Receive Path: 61 | ============- 62 | All application data is directly received within the softirq socket callback 63 | via tcp_read_sock()). This can be easily achieved, since all target 64 | buffers are kernel resident. 65 | 66 | 67 | Connection Management: 68 | ====================== 69 | To be rewritten for stability and simplification. The interaction of 70 | three state machienes (socket, QP, connection endpoint) tends to get 71 | confusing. The iSCSI kernel code gives a good example on how to do TCP 72 | connection management better. Current connection manager code is known 73 | to have bugs and is under change. 74 | 75 | 76 | Memory Management: 77 | ================== 78 | siw abandoned using OFA core user page management. It uses private functions 79 | to pin and maintain memory for later use in data transfer operations. 80 | Transmit and receive memory is checked 81 | against correct access permissions only in the moment of access by the 82 | network input path or before pushing it to the socket for transmission. 83 | 84 | 85 | Performance: 86 | ============ 87 | Overall, performance was not yet our main focus. There is known headroom 88 | for achieving higher performance. 89 | 90 | Fast Path Operations: 91 | While RDMA hardware (RNIC) is typically using a private fast path 92 | between application and RDMA device, siw normally uses the OFA kernel 93 | environment to trigger execution of work previously posted on the send 94 | queue. 95 | 96 | As an experimental extension, we implemented a code path to use a 97 | prorietary system call to ring the doorbell during user space post_send() 98 | operations. The SoftiWarp code detects the availability of that system 99 | call and falls back to using above described mechanism for triggering 100 | transmit operations. The proprietary system call is not part of the 101 | software. 102 | 103 | For user level operations, endpoint resources are by default mapped 104 | to user space. This includes send queue, receive queue, and completion 105 | queue. With that, the doorbell call during post_send() is the only 106 | operation which requires the application to trap into the kernel 107 | during fast path operations. All work queue element / completion queue 108 | element handling is done within the user library. 109 | 110 | 111 | Debugging: 112 | ========== 113 | siw flexibly allows to set different levels of runtime debugging (see 114 | siw_debug.h). Debug level setting is only compile-time. 115 | 116 | 117 | Incomplete List of Missing Functionality: 118 | ========================================= 119 | MPA Marker insertion 120 | MPA Marker reception (not very useful) 121 | 122 | 123 | 124 | Comments: 125 | ========= 126 | Please send comments to Bernard Metzler, 127 | bmt@zurich.ibm.com. 128 | -------------------------------------------------------------------------------- /kernel/Kbuild: -------------------------------------------------------------------------------- 1 | EXTRA_CFLAGS += -I$(src)/../common 2 | 3 | obj-m += siw.o 4 | 5 | siw-objs := siw_main.o siw_cm.o siw_verbs.o siw_obj.o \ 6 | siw_qp.o siw_qp_tx.o siw_qp_rx.o siw_cq.o siw_cm.o \ 7 | siw_debug.o siw_ae.o siw_mem.o 8 | -------------------------------------------------------------------------------- /kernel/Makefile: -------------------------------------------------------------------------------- 1 | LINUX_SRC_PATH = /lib/modules/$(shell uname -r)/build 2 | 3 | # 4 | ## If building against OFED, include the OFED headers first. 5 | # 6 | ifneq ($(OFA_DIR),) 7 | EXTRA_CFLAGS += -DOFED_BUILD 8 | FOO := -I$(OFA_DIR)/include $(LINUXINCLUDE) 9 | override LINUXINCLUDE=$(FOO) 10 | endif 11 | 12 | default: modules 13 | 14 | install: modules 15 | @${MAKE} -C $(LINUX_SRC_PATH) M=`pwd` modules_install 16 | 17 | modules: 18 | @${MAKE} -C $(LINUX_SRC_PATH) M=`pwd` modules 19 | 20 | all: 21 | @${MAKE} -C $(LINUX_SRC_PATH) M=`pwd` modules 22 | 23 | clean: 24 | -@${MAKE} -C $(LINUX_SRC_PATH) M=`pwd` clean 25 | 26 | .PHONY: clean modules install 27 | 28 | -------------------------------------------------------------------------------- /kernel/README_KERNEL_VERSIONS.txt: -------------------------------------------------------------------------------- 1 | 07/21/2011 2 | 3 | To ease development and maintenance of the siw kernel module 4 | source code, it has been splitted into two independent directories 5 | softiwarp/ and softiwarp_old/. both directories keep all files 6 | needed to build a siw kernel module. 7 | 8 | softiwarp/ 9 | contains the code aligned to the current linux kernel development 10 | tree. it does not contain any legacy code to run with older kernels. 11 | this code is updated frequently, but runs only on newer kernels. 12 | it has been tested for kernels back to kernel version 2.6.36.2. 13 | code documentation (IMPLEMENTATION.txt) will be updated within 14 | the next days. 15 | 16 | softiwarp_old/ 17 | contains code which compiles and runs on older kernels, but may 18 | soon break on newer kernels. code maintenance is sloppy and 19 | it is recommended to use the newer softiwarp/ code base if 20 | possible. 21 | 22 | 23 | 06/13/2014 24 | 25 | softiwarp_old/ has been removed. 26 | 27 | 28 | 01/20/2015 29 | 30 | With kernel version 3.15 the OFA core changed the way it maintains 31 | registered user communication buffers. The new code would result 32 | in inefficient initialization of RDMA data source or sink location for 33 | a software RDMA stack. Therefore, siw abandons using OFA core 34 | user page management (ib_umem_get() etc.) and implememnts its 35 | own simple, but better suited management of pinned user pages. 36 | -------------------------------------------------------------------------------- /kernel/iwarp.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Software iWARP device driver for Linux 3 | * 4 | * Authors: Bernard Metzler 5 | * Fredy Neeser 6 | * 7 | * Copyright (c) 2008-2016, IBM Corporation 8 | * 9 | * This software is available to you under a choice of one of two 10 | * licenses. You may choose to be licensed under the terms of the GNU 11 | * General Public License (GPL) Version 2, available from the file 12 | * COPYING in the main directory of this source tree, or the 13 | * BSD license below: 14 | * 15 | * Redistribution and use in source and binary forms, with or 16 | * without modification, are permitted provided that the following 17 | * conditions are met: 18 | * 19 | * - Redistributions of source code must retain the above copyright notice, 20 | * this list of conditions and the following disclaimer. 21 | * 22 | * - Redistributions in binary form must reproduce the above copyright 23 | * notice, this list of conditions and the following disclaimer in the 24 | * documentation and/or other materials provided with the distribution. 25 | * 26 | * - Neither the name of IBM nor the names of its contributors may be 27 | * used to endorse or promote products derived from this software without 28 | * specific prior written permission. 29 | * 30 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 31 | * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 32 | * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 33 | * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 34 | * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 35 | * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 36 | * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 37 | * SOFTWARE. 38 | */ 39 | 40 | #ifndef _IWARP_H 41 | #define _IWARP_H 42 | 43 | #include /* RDMA_MAX_PRIVATE_DATA */ 44 | #include 45 | #include 46 | 47 | 48 | #define RDMAP_VERSION 1 49 | #define DDP_VERSION 1 50 | #define MPA_REVISION_1 1 51 | #define MPA_MAX_PRIVDATA RDMA_MAX_PRIVATE_DATA 52 | #define MPA_KEY_REQ "MPA ID Req Frame" 53 | #define MPA_KEY_REP "MPA ID Rep Frame" 54 | 55 | struct mpa_rr_params { 56 | __be16 bits; 57 | __be16 pd_len; 58 | }; 59 | 60 | /* 61 | * MPA request/response Hdr bits & fields 62 | */ 63 | enum { 64 | MPA_RR_FLAG_MARKERS = __cpu_to_be16(0x8000), 65 | MPA_RR_FLAG_CRC = __cpu_to_be16(0x4000), 66 | MPA_RR_FLAG_REJECT = __cpu_to_be16(0x2000), 67 | MPA_RR_RESERVED = __cpu_to_be16(0x1f00), 68 | MPA_RR_MASK_REVISION = __cpu_to_be16(0x00ff) 69 | }; 70 | 71 | /* 72 | * MPA request/reply header 73 | */ 74 | struct mpa_rr { 75 | __u8 key[16]; 76 | struct mpa_rr_params params; 77 | }; 78 | 79 | 80 | static inline void __mpa_rr_set_revision(u16 *bits, u8 rev) 81 | { 82 | *bits = (*bits & ~MPA_RR_MASK_REVISION) 83 | | (cpu_to_be16(rev) & MPA_RR_MASK_REVISION); 84 | } 85 | 86 | static inline u8 __mpa_rr_revision(u16 mpa_rr_bits) 87 | { 88 | u16 rev = mpa_rr_bits & MPA_RR_MASK_REVISION; 89 | return (u8)be16_to_cpu(rev); 90 | } 91 | 92 | 93 | /* 94 | * Don't change the layout/size of this struct! 95 | */ 96 | struct mpa_marker { 97 | __be16 rsvd; 98 | __be16 fpdu_hmd; /* FPDU header-marker distance (= MPA's FPDUPTR) */ 99 | }; 100 | 101 | #define MPA_MARKER_SPACING 512 102 | #define MPA_HDR_SIZE 2 103 | 104 | /* 105 | * MPA marker size: 106 | * - Standards-compliant marker insertion: Use sizeof(struct mpa_marker) 107 | * - "Invisible markers" for testing sender's marker insertion 108 | * without affecting receiver: Use 0 109 | */ 110 | #define MPA_MARKER_SIZE sizeof(struct mpa_marker) 111 | 112 | 113 | /* 114 | * maximum MPA trailer 115 | */ 116 | struct mpa_trailer { 117 | char pad[4]; 118 | __be32 crc; 119 | }; 120 | 121 | #define MPA_CRC_SIZE 4 122 | 123 | 124 | /* 125 | * Common portion of iWARP headers (MPA, DDP, RDMAP) 126 | * for any FPDU 127 | */ 128 | struct iwarp_ctrl { 129 | __be16 mpa_len; 130 | __be16 ddp_rdmap_ctrl; 131 | }; 132 | 133 | /* 134 | * DDP/RDMAP Hdr bits & fields 135 | */ 136 | enum { 137 | DDP_FLAG_TAGGED = __cpu_to_be16(0x8000), 138 | DDP_FLAG_LAST = __cpu_to_be16(0x4000), 139 | DDP_MASK_RESERVED = __cpu_to_be16(0x3C00), 140 | DDP_MASK_VERSION = __cpu_to_be16(0x0300), 141 | RDMAP_MASK_VERSION = __cpu_to_be16(0x00C0), 142 | RDMAP_MASK_RESERVED = __cpu_to_be16(0x0030), 143 | RDMAP_MASK_OPCODE = __cpu_to_be16(0x000f) 144 | }; 145 | 146 | static inline u8 __ddp_version(struct iwarp_ctrl *ctrl) 147 | { 148 | return (u8)(be16_to_cpu(ctrl->ddp_rdmap_ctrl & DDP_MASK_VERSION) >> 8); 149 | }; 150 | 151 | static inline void __ddp_set_version(struct iwarp_ctrl *ctrl, u8 version) 152 | { 153 | ctrl->ddp_rdmap_ctrl = (ctrl->ddp_rdmap_ctrl & ~DDP_MASK_VERSION) 154 | | (__cpu_to_be16((u16)version << 8) & DDP_MASK_VERSION); 155 | }; 156 | 157 | static inline u8 __rdmap_version(struct iwarp_ctrl *ctrl) 158 | { 159 | u16 ver = ctrl->ddp_rdmap_ctrl & RDMAP_MASK_VERSION; 160 | return (u8)(be16_to_cpu(ver) >> 6); 161 | }; 162 | 163 | static inline void __rdmap_set_version(struct iwarp_ctrl *ctrl, u8 version) 164 | { 165 | ctrl->ddp_rdmap_ctrl = (ctrl->ddp_rdmap_ctrl & ~RDMAP_MASK_VERSION) 166 | | (__cpu_to_be16(version << 6) & RDMAP_MASK_VERSION); 167 | } 168 | 169 | static inline u8 __rdmap_opcode(struct iwarp_ctrl *ctrl) 170 | { 171 | return (u8)be16_to_cpu(ctrl->ddp_rdmap_ctrl & RDMAP_MASK_OPCODE); 172 | } 173 | 174 | static inline void __rdmap_set_opcode(struct iwarp_ctrl *ctrl, u8 opcode) 175 | { 176 | ctrl->ddp_rdmap_ctrl = (ctrl->ddp_rdmap_ctrl & ~RDMAP_MASK_OPCODE) 177 | | (__cpu_to_be16(opcode) & RDMAP_MASK_OPCODE); 178 | } 179 | 180 | 181 | struct iwarp_rdma_write { 182 | struct iwarp_ctrl ctrl; 183 | __be32 sink_stag; 184 | __be64 sink_to; 185 | }; 186 | 187 | struct iwarp_rdma_rreq { 188 | struct iwarp_ctrl ctrl; 189 | __be32 rsvd; 190 | __be32 ddp_qn; 191 | __be32 ddp_msn; 192 | __be32 ddp_mo; 193 | __be32 sink_stag; 194 | __be64 sink_to; 195 | __be32 read_size; 196 | __be32 source_stag; 197 | __be64 source_to; 198 | }; 199 | 200 | struct iwarp_rdma_rresp { 201 | struct iwarp_ctrl ctrl; 202 | __be32 sink_stag; 203 | __be64 sink_to; 204 | }; 205 | 206 | struct iwarp_send { 207 | struct iwarp_ctrl ctrl; 208 | __be32 rsvd; 209 | __be32 ddp_qn; 210 | __be32 ddp_msn; 211 | __be32 ddp_mo; 212 | }; 213 | 214 | struct iwarp_send_inv { 215 | struct iwarp_ctrl ctrl; 216 | __be32 inval_stag; 217 | __be32 ddp_qn; 218 | __be32 ddp_msn; 219 | __be32 ddp_mo; 220 | }; 221 | 222 | struct iwarp_terminate { 223 | struct iwarp_ctrl ctrl; 224 | __be32 rsvd; 225 | __be32 ddp_qn; 226 | __be32 ddp_msn; 227 | __be32 ddp_mo; 228 | __be32 term_ctrl; 229 | }; 230 | 231 | /* 232 | * Terminate Hdr bits & fields 233 | */ 234 | enum { 235 | RDMAP_TERM_MASK_LAYER = __cpu_to_be32(0xf0000000), 236 | RDMAP_TERM_MASK_ETYPE = __cpu_to_be32(0x0f000000), 237 | RDMAP_TERM_MASK_ECODE = __cpu_to_be32(0x00ff0000), 238 | RDMAP_TERM_FLAG_M = __cpu_to_be32(0x00008000), 239 | RDMAP_TERM_FLAG_D = __cpu_to_be32(0x00004000), 240 | RDMAP_TERM_FLAG_R = __cpu_to_be32(0x00002000), 241 | RDMAP_TERM_MASK_RESVD = __cpu_to_be32(0x00001fff) 242 | }; 243 | 244 | static inline u8 __rdmap_term_layer(struct iwarp_terminate *ctrl) 245 | { 246 | return (u8)(be32_to_cpu(ctrl->term_ctrl & RDMAP_TERM_MASK_LAYER) 247 | >> 28); 248 | }; 249 | 250 | static inline u8 __rdmap_term_etype(struct iwarp_terminate *ctrl) 251 | { 252 | return (u8)(be32_to_cpu(ctrl->term_ctrl & RDMAP_TERM_MASK_ETYPE) 253 | >> 24); 254 | }; 255 | 256 | static inline u8 __rdmap_term_ecode(struct iwarp_terminate *ctrl) 257 | { 258 | return (u8)(be32_to_cpu(ctrl->term_ctrl & RDMAP_TERM_MASK_ECODE) 259 | >> 20); 260 | }; 261 | 262 | 263 | /* 264 | * Common portion of iWARP headers (MPA, DDP, RDMAP) 265 | * for an FPDU carrying an untagged DDP segment 266 | */ 267 | struct iwarp_ctrl_untagged { 268 | struct iwarp_ctrl ctrl; 269 | __be32 rsvd; 270 | __be32 ddp_qn; 271 | __be32 ddp_msn; 272 | __be32 ddp_mo; 273 | }; 274 | 275 | /* 276 | * Common portion of iWARP headers (MPA, DDP, RDMAP) 277 | * for an FPDU carrying a tagged DDP segment 278 | */ 279 | struct iwarp_ctrl_tagged { 280 | struct iwarp_ctrl ctrl; 281 | __be32 ddp_stag; 282 | __be64 ddp_to; 283 | }; 284 | 285 | union iwarp_hdrs { 286 | struct iwarp_ctrl ctrl; 287 | struct iwarp_ctrl_untagged c_untagged; 288 | struct iwarp_ctrl_tagged c_tagged; 289 | struct iwarp_rdma_write rwrite; 290 | struct iwarp_rdma_rreq rreq; 291 | struct iwarp_rdma_rresp rresp; 292 | struct iwarp_terminate terminate; 293 | struct iwarp_send send; 294 | struct iwarp_send_inv send_inv; 295 | }; 296 | 297 | 298 | #define MPA_MIN_FRAG ((sizeof(union iwarp_hdrs) + MPA_CRC_SIZE)) 299 | 300 | enum ddp_etype { 301 | DDP_ETYPE_CATASTROPHIC = 0x0, 302 | DDP_ETYPE_TAGGED_BUF = 0x1, 303 | DDP_ETYPE_UNTAGGED_BUF = 0x2, 304 | DDP_ETYPE_RSVD = 0x3 305 | }; 306 | 307 | enum ddp_ecode { 308 | DDP_ECODE_CATASTROPHIC = 0x00, 309 | /* Tagged Buffer Errors */ 310 | DDP_ECODE_T_INVALID_STAG = 0x00, 311 | DDP_ECODE_T_BASE_BOUNDS = 0x01, 312 | DDP_ECODE_T_STAG_NOT_ASSOC = 0x02, 313 | DDP_ECODE_T_TO_WRAP = 0x03, 314 | DDP_ECODE_T_DDP_VERSION = 0x04, 315 | /* Untagged Buffer Errors */ 316 | DDP_ECODE_UT_INVALID_QN = 0x01, 317 | DDP_ECODE_UT_INVALID_MSN_NOBUF = 0x02, 318 | DDP_ECODE_UT_INVALID_MSN_RANGE = 0x03, 319 | DDP_ECODE_UT_INVALID_MO = 0x04, 320 | DDP_ECODE_UT_MSG_TOOLONG = 0x05, 321 | DDP_ECODE_UT_DDP_VERSION = 0x06 322 | }; 323 | 324 | 325 | enum rdmap_untagged_qn { 326 | RDMAP_UNTAGGED_QN_SEND = 0, 327 | RDMAP_UNTAGGED_QN_RDMA_READ = 1, 328 | RDMAP_UNTAGGED_QN_TERMINATE = 2, 329 | RDMAP_UNTAGGED_QN_COUNT = 3 330 | }; 331 | 332 | enum rdmap_etype { 333 | RDMAP_ETYPE_CATASTROPHIC = 0x0, 334 | RDMAP_ETYPE_REMOTE_PROTECTION = 0x1, 335 | RDMAP_ETYPE_REMOTE_OPERATION = 0x2 336 | }; 337 | 338 | enum rdmap_ecode { 339 | RDMAP_ECODE_INVALID_STAG = 0x00, 340 | RDMAP_ECODE_BASE_BOUNDS = 0x01, 341 | RDMAP_ECODE_ACCESS_RIGHTS = 0x02, 342 | RDMAP_ECODE_STAG_NOT_ASSOC = 0x03, 343 | RDMAP_ECODE_TO_WRAP = 0x04, 344 | RDMAP_ECODE_RDMAP_VERSION = 0x05, 345 | RDMAP_ECODE_UNEXPECTED_OPCODE = 0x06, 346 | RDMAP_ECODE_CATASTROPHIC_STREAM = 0x07, 347 | RDMAP_ECODE_CATASTROPHIC_GLOBAL = 0x08, 348 | RDMAP_ECODE_STAG_NOT_INVALIDATE = 0x09, 349 | RDMAP_ECODE_UNSPECIFIED = 0xff 350 | }; 351 | 352 | enum rdmap_elayer { 353 | RDMAP_ERROR_LAYER_RDMA = 0x00, 354 | RDMAP_ERROR_LAYER_DDP = 0x01, 355 | RDMAP_ERROR_LAYER_LLP = 0x02 /* eg., MPA */ 356 | }; 357 | 358 | enum rdma_opcode { 359 | RDMAP_RDMA_WRITE = 0x0, 360 | RDMAP_RDMA_READ_REQ = 0x1, 361 | RDMAP_RDMA_READ_RESP = 0x2, 362 | RDMAP_SEND = 0x3, 363 | RDMAP_SEND_INVAL = 0x4, 364 | RDMAP_SEND_SE = 0x5, 365 | RDMAP_SEND_SE_INVAL = 0x6, 366 | RDMAP_TERMINATE = 0x7, 367 | RDMAP_NOT_SUPPORTED = RDMAP_TERMINATE + 1 368 | }; 369 | 370 | #endif 371 | -------------------------------------------------------------------------------- /kernel/siw.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Software iWARP device driver for Linux 3 | * 4 | * Authors: Bernard Metzler 5 | * 6 | * Copyright (c) 2008-2016, IBM Corporation 7 | * 8 | * This software is available to you under a choice of one of two 9 | * licenses. You may choose to be licensed under the terms of the GNU 10 | * General Public License (GPL) Version 2, available from the file 11 | * COPYING in the main directory of this source tree, or the 12 | * BSD license below: 13 | * 14 | * Redistribution and use in source and binary forms, with or 15 | * without modification, are permitted provided that the following 16 | * conditions are met: 17 | * 18 | * - Redistributions of source code must retain the above copyright notice, 19 | * this list of conditions and the following disclaimer. 20 | * 21 | * - Redistributions in binary form must reproduce the above copyright 22 | * notice, this list of conditions and the following disclaimer in the 23 | * documentation and/or other materials provided with the distribution. 24 | * 25 | * - Neither the name of IBM nor the names of its contributors may be 26 | * used to endorse or promote products derived from this software without 27 | * specific prior written permission. 28 | * 29 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 30 | * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 31 | * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 32 | * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 33 | * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 34 | * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 35 | * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 36 | * SOFTWARE. 37 | */ 38 | 39 | #ifndef _SIW_H 40 | #define _SIW_H 41 | 42 | #include 43 | #include 44 | #include 45 | #include 46 | #include 47 | #include 48 | #include 49 | #include 50 | #include /* MLOCK_LIMIT */ 51 | #include 52 | #include 53 | #include 54 | 55 | #include 56 | #include "iwarp.h" 57 | 58 | #define _load_shared(a) (*(volatile typeof(a) *)&(a)) 59 | 60 | enum siw_if_type { 61 | SIW_IF_OFED = 0, /* only via standard ofed syscall if */ 62 | SIW_IF_MAPPED = 1 /* private qp and cq mapping */ 63 | }; 64 | 65 | #define DEVICE_ID_SOFTIWARP 0x0815 66 | #define SIW_VENDOR_ID 0x626d74 /* ascii 'bmt' for now */ 67 | #define SIW_VENDORT_PART_ID 0 68 | #define SIW_MAX_QP (1024 * 100) 69 | #define SIW_MAX_QP_WR (1024 * 32) 70 | #define SIW_MAX_ORD 128 71 | #define SIW_MAX_IRD 128 72 | #define SIW_MAX_SGE_RD 1 /* iwarp limitation. we could relax */ 73 | #define SIW_MAX_CQ (1024 * 100) 74 | #define SIW_MAX_CQE (SIW_MAX_QP_WR * 100) 75 | #define SIW_MAX_MR (SIW_MAX_QP * 10) 76 | #define SIW_MAX_PD SIW_MAX_QP 77 | #define SIW_MAX_MW 0 /* to be set if MW's are supported */ 78 | #define SIW_MAX_FMR 0 79 | #define SIW_MAX_SRQ SIW_MAX_QP 80 | #define SIW_MAX_SRQ_WR (SIW_MAX_QP_WR * 10) 81 | #define SIW_MAX_CONTEXT SIW_MAX_PD 82 | 83 | #define SENDPAGE_THRESH PAGE_SIZE /* min bytes for using sendpage() */ 84 | #define SQ_USER_MAXBURST 10 85 | 86 | #if defined __NR_rdma_db 87 | #define SIW_DB_SYSCALL 88 | #endif 89 | 90 | struct siw_devinfo { 91 | unsigned device; 92 | unsigned version; 93 | 94 | /* close match to ib_device_attr where appropriate */ 95 | u32 vendor_id; 96 | u32 vendor_part_id; 97 | u32 sw_version; 98 | int max_qp; 99 | int max_qp_wr; 100 | int max_ord; /* max. outbound read queue depth */ 101 | int max_ird; /* max. inbound read queue depth */ 102 | 103 | enum ib_device_cap_flags cap_flags; 104 | int max_sge; 105 | int max_sge_rd; 106 | int max_cq; 107 | int max_cqe; 108 | u64 max_mr_size; 109 | int max_mr; 110 | int max_pd; 111 | int max_mw; 112 | int max_fmr; 113 | int max_srq; 114 | int max_srq_wr; 115 | int max_srq_sge; 116 | /* end ib_device_attr */ 117 | 118 | enum siw_if_type iftype; 119 | }; 120 | 121 | 122 | struct siw_dev { 123 | struct ib_device ofa_dev; 124 | struct list_head list; 125 | struct net_device *netdev; 126 | struct siw_devinfo attrs; 127 | int is_registered; /* Registered with OFA core */ 128 | 129 | /* physical port state (only one port per device) */ 130 | enum ib_port_state state; 131 | 132 | /* object management */ 133 | struct list_head cep_list; 134 | struct list_head qp_list; 135 | spinlock_t idr_lock; 136 | struct idr qp_idr; 137 | struct idr cq_idr; 138 | struct idr pd_idr; 139 | struct idr mem_idr; /* MRs & MWs */ 140 | 141 | /* active objects statistics */ 142 | atomic_t num_qp; 143 | atomic_t num_cq; 144 | atomic_t num_pd; 145 | atomic_t num_mem; 146 | atomic_t num_srq; 147 | atomic_t num_cep; 148 | atomic_t num_ctx; 149 | 150 | struct dentry *debugfs; 151 | }; 152 | 153 | struct siw_objhdr { 154 | u32 id; /* for idr based object lookup */ 155 | struct kref ref; 156 | struct siw_dev *sdev; 157 | }; 158 | 159 | struct siw_uobj { 160 | struct list_head list; 161 | void *addr; 162 | u32 size; 163 | u32 key; 164 | }; 165 | 166 | struct siw_ucontext { 167 | struct ib_ucontext ib_ucontext; 168 | struct siw_dev *sdev; 169 | /* List of user mappable queue objects */ 170 | spinlock_t uobj_lock; 171 | struct list_head uobj_list; 172 | u32 uobj_key; 173 | }; 174 | 175 | struct siw_pd { 176 | struct siw_objhdr hdr; 177 | struct ib_pd ofa_pd; 178 | }; 179 | 180 | enum siw_access_flags { 181 | SR_MEM_LREAD = (1<<0), 182 | SR_MEM_LWRITE = (1<<1), 183 | SR_MEM_RREAD = (1<<2), 184 | SR_MEM_RWRITE = (1<<3), 185 | 186 | SR_MEM_FLAGS_LOCAL = 187 | (SR_MEM_LREAD | SR_MEM_LWRITE), 188 | SR_MEM_FLAGS_REMOTE = 189 | (SR_MEM_RWRITE | SR_MEM_RREAD) 190 | }; 191 | 192 | #define STAG_VALID 1 193 | #define STAG_INVALID 0 194 | #define SIW_STAG_MAX 0xffffffff 195 | 196 | struct siw_mr; 197 | 198 | /* 199 | * siw presentation of user memory registered as source 200 | * or target of RDMA operations. 201 | */ 202 | 203 | struct siw_page_chunk { 204 | struct page **p; 205 | }; 206 | 207 | struct siw_umem { 208 | struct siw_page_chunk *page_chunk; 209 | int num_pages; 210 | u64 fp_addr; /* First page base address */ 211 | struct pid *pid; 212 | struct mm_struct *mm_s; 213 | struct work_struct work; 214 | }; 215 | 216 | /* 217 | * generic memory representation for registered siw memory. 218 | * memory lookup always via higher 24 bit of stag (stag index). 219 | * the stag is stored as part of the siw object header (id). 220 | * object relates to memory window if embedded mr pointer is valid 221 | */ 222 | struct siw_mem { 223 | struct siw_objhdr hdr; 224 | 225 | struct siw_mr *mr; /* assoc. MR if MW, NULL if MR */ 226 | u64 va; /* VA of memory */ 227 | u64 len; /* amount of memory bytes */ 228 | 229 | u32 stag_state:1, /* VALID or INVALID */ 230 | is_zbva:1, /* zero based virt. addr. */ 231 | mw_bind_enabled:1, /* check only if MR */ 232 | remote_inval_enabled:1, /* VALID or INVALID */ 233 | consumer_owns_key:1, /* key/index split ? */ 234 | rsvd:27; 235 | 236 | enum siw_access_flags perms; /* local/remote READ & WRITE */ 237 | }; 238 | 239 | #define SIW_MEM_IS_MW(m) ((m)->mr != NULL) 240 | 241 | /* 242 | * MR and MW definition. 243 | * Used OFA structs ib_mr/ib_mw holding: 244 | * lkey, rkey, MW reference count on MR 245 | */ 246 | struct siw_mr { 247 | struct ib_mr ofa_mr; 248 | struct siw_mem mem; 249 | struct rcu_head rcu; 250 | struct siw_umem *umem; 251 | struct siw_pd *pd; 252 | }; 253 | 254 | struct siw_mw { 255 | struct ib_mw ofa_mw; 256 | struct siw_mem mem; 257 | struct rcu_head rcu; 258 | }; 259 | 260 | /********** WR definitions ****************/ 261 | 262 | #define SIW_WQE_IS_TX(wqe) 1 /* add BIND/FASTREG/INVAL_STAG */ 263 | 264 | 265 | enum siw_wr_state { 266 | SR_WR_IDLE = 0, 267 | SR_WR_QUEUED = 1, /* processing has not started yet */ 268 | SR_WR_INPROGRESS = 2, /* initiated processing of the WR */ 269 | SR_WR_DONE = 3 270 | }; 271 | 272 | union siw_mem_resolved { 273 | struct siw_mem *obj; /* reference to registered memory */ 274 | char *buf; /* linear kernel buffer */ 275 | }; 276 | 277 | struct siw_qp; 278 | 279 | struct siw_wqe { 280 | union { 281 | struct siw_sqe sqe; 282 | struct siw_rqe rqe; 283 | }; 284 | union siw_mem_resolved mem[SIW_MAX_SGE]; /* per sge's resolved mem */ 285 | 286 | enum siw_wr_state wr_status; 287 | enum siw_wc_status wc_status; 288 | u32 bytes; /* total bytes to process */ 289 | u32 processed; /* bytes processed */ 290 | int error; 291 | }; 292 | 293 | struct siw_cq { 294 | struct ib_cq ofa_cq; 295 | struct siw_objhdr hdr; 296 | enum siw_notify_flags *notify; 297 | spinlock_t lock; 298 | struct siw_cqe *queue; 299 | u32 cq_put; 300 | u32 cq_get; 301 | u32 num_cqe; 302 | int kernel_verbs; 303 | }; 304 | 305 | enum siw_qp_state { 306 | SIW_QP_STATE_IDLE = 0, 307 | SIW_QP_STATE_RTR = 1, 308 | SIW_QP_STATE_RTS = 2, 309 | SIW_QP_STATE_CLOSING = 3, 310 | SIW_QP_STATE_TERMINATE = 4, 311 | SIW_QP_STATE_ERROR = 5, 312 | SIW_QP_STATE_MORIBUND = 6, /* destroy called but still referenced */ 313 | SIW_QP_STATE_UNDEF = 7, 314 | SIW_QP_STATE_COUNT = 8 315 | }; 316 | 317 | enum siw_qp_flags { 318 | SIW_RDMA_BIND_ENABLED = (1 << 0), 319 | SIW_RDMA_WRITE_ENABLED = (1 << 1), 320 | SIW_RDMA_READ_ENABLED = (1 << 2), 321 | SIW_SIGNAL_ALL_WR = (1 << 3), 322 | /* 323 | * QP currently being destroyed 324 | */ 325 | SIW_QP_IN_DESTROY = (1 << 8) 326 | }; 327 | 328 | enum siw_qp_attr_mask { 329 | SIW_QP_ATTR_STATE = (1 << 0), 330 | SIW_QP_ATTR_ACCESS_FLAGS = (1 << 1), 331 | SIW_QP_ATTR_LLP_HANDLE = (1 << 2), 332 | SIW_QP_ATTR_ORD = (1 << 3), 333 | SIW_QP_ATTR_IRD = (1 << 4), 334 | SIW_QP_ATTR_SQ_SIZE = (1 << 5), 335 | SIW_QP_ATTR_RQ_SIZE = (1 << 6), 336 | SIW_QP_ATTR_MPA = (1 << 7) 337 | }; 338 | 339 | struct siw_mpa_attrs { 340 | __u8 marker_rcv; /* always 0 */ 341 | __u8 marker_snd; /* always 0, consider support */ 342 | __u8 crc; 343 | __u8 unused; 344 | }; 345 | 346 | struct siw_sk_upcalls { 347 | void (*sk_state_change)(struct sock *sk); 348 | void (*sk_data_ready)(struct sock *sk, int bytes); 349 | void (*sk_write_space)(struct sock *sk); 350 | void (*sk_error_report)(struct sock *sk); 351 | }; 352 | 353 | struct siw_sq_work { 354 | struct work_struct work; 355 | }; 356 | 357 | struct siw_srq { 358 | struct ib_srq ofa_srq; 359 | struct siw_pd *pd; 360 | atomic_t rq_index; 361 | spinlock_t lock; 362 | u32 max_sge; 363 | atomic_t space; /* current space for posting wqe's */ 364 | u32 limit; /* low watermark for async event */ 365 | struct siw_rqe *recvq; 366 | u32 rq_put; 367 | u32 rq_get; 368 | u32 num_rqe; /* max # of wqe's allowed */ 369 | char armed; /* inform user if limit hit */ 370 | char kernel_verbs; /* '1' if kernel client */ 371 | }; 372 | 373 | struct siw_qp_attrs { 374 | enum siw_qp_state state; 375 | char terminate_buffer[52]; 376 | u32 terminate_msg_length; 377 | u32 ddp_rdmap_version; /* 0 or 1 */ 378 | char *stream_msg_buf; 379 | u32 stream_msg_buf_length; 380 | u32 rq_hiwat; 381 | u32 sq_size; 382 | u32 rq_size; 383 | u32 orq_size; 384 | u32 irq_size; 385 | u32 sq_max_sges; 386 | u32 sq_max_sges_rdmaw; 387 | u32 rq_max_sges; 388 | struct siw_mpa_attrs mpa; 389 | enum siw_qp_flags flags; 390 | 391 | struct socket *llp_stream_handle; 392 | }; 393 | 394 | enum siw_tx_ctx { 395 | SIW_SEND_HDR = 0, /* start or continue sending HDR */ 396 | SIW_SEND_DATA = 1, /* start or continue sending DDP payload */ 397 | SIW_SEND_TRAILER = 2, /* start or continue sending TRAILER */ 398 | SIW_SEND_SHORT_FPDU = 3 /* send whole FPDU hdr|data|trailer at once */ 399 | }; 400 | 401 | enum siw_rx_state { 402 | SIW_GET_HDR = 0, /* await new hdr or within hdr */ 403 | SIW_GET_DATA_START = 1, /* start of inbound DDP payload */ 404 | SIW_GET_DATA_MORE = 2, /* continuation of (misaligned) DDP payload */ 405 | SIW_GET_TRAILER = 3 /* await new trailer or within trailer */ 406 | }; 407 | 408 | 409 | struct siw_iwarp_rx { 410 | struct sk_buff *skb; 411 | union iwarp_hdrs hdr; 412 | struct mpa_trailer trailer; 413 | /* 414 | * local destination memory of inbound iwarp operation. 415 | * valid, according to wqe->wr_status 416 | */ 417 | struct siw_wqe wqe_active; 418 | 419 | struct shash_desc mpa_crc_hd; 420 | /* 421 | * Next expected DDP MSN for each QN + 422 | * expected steering tag + 423 | * expected DDP tagget offset (all HBO) 424 | */ 425 | u32 ddp_msn[RDMAP_UNTAGGED_QN_COUNT]; 426 | u32 ddp_stag; 427 | u64 ddp_to; 428 | 429 | /* 430 | * For each FPDU, main RX loop runs through 3 stages: 431 | * Receiving protocol headers, placing DDP payload and receiving 432 | * trailer information (CRC + eventual padding). 433 | * Next two variables keep state on receive status of the 434 | * current FPDU part (hdr, data, trailer). 435 | */ 436 | int fpdu_part_rcvd;/* bytes in pkt part copied */ 437 | int fpdu_part_rem; /* bytes in pkt part not seen */ 438 | 439 | int skb_new; /* pending unread bytes in skb */ 440 | int skb_offset; /* offset in skb */ 441 | int skb_copied; /* processed bytes in skb */ 442 | 443 | int sge_idx; /* current sge in rx */ 444 | unsigned int sge_off; /* already rcvd in curr. sge */ 445 | 446 | enum siw_rx_state state; 447 | 448 | u8 crc_enabled:1, 449 | first_ddp_seg:1, /* this is first DDP seg */ 450 | more_ddp_segs:1, /* more DDP segs expected */ 451 | rx_suspend:1, /* stop rcv DDP segs. */ 452 | prev_rdmap_opcode:4; /* opcode of prev msg */ 453 | char pad; /* # of pad bytes expected */ 454 | }; 455 | 456 | #define siw_rx_data(qp, rctx) \ 457 | (iwarp_pktinfo[__rdmap_opcode(&rctx->hdr.ctrl)].proc_data(qp, rctx)) 458 | 459 | /* 460 | * Shorthands for short packets w/o payload 461 | * to be transmitted more efficient. 462 | */ 463 | struct siw_send_pkt { 464 | struct iwarp_send send; 465 | __be32 crc; 466 | }; 467 | 468 | struct siw_write_pkt { 469 | struct iwarp_rdma_write write; 470 | __be32 crc; 471 | }; 472 | 473 | struct siw_rreq_pkt { 474 | struct iwarp_rdma_rreq rreq; 475 | __be32 crc; 476 | }; 477 | 478 | struct siw_rresp_pkt { 479 | struct iwarp_rdma_rresp rresp; 480 | __be32 crc; 481 | }; 482 | 483 | struct siw_iwarp_tx { 484 | union { 485 | union iwarp_hdrs hdr; 486 | 487 | /* Generic part of FPDU header */ 488 | struct iwarp_ctrl ctrl; 489 | struct iwarp_ctrl_untagged c_untagged; 490 | struct iwarp_ctrl_tagged c_tagged; 491 | 492 | /* FPDU headers */ 493 | struct iwarp_rdma_write rwrite; 494 | struct iwarp_rdma_rreq rreq; 495 | struct iwarp_rdma_rresp rresp; 496 | struct iwarp_terminate terminate; 497 | struct iwarp_send send; 498 | struct iwarp_send_inv send_inv; 499 | 500 | /* complete short FPDUs */ 501 | struct siw_send_pkt send_pkt; 502 | struct siw_write_pkt write_pkt; 503 | struct siw_rreq_pkt rreq_pkt; 504 | struct siw_rresp_pkt rresp_pkt; 505 | } pkt; 506 | 507 | struct mpa_trailer trailer; 508 | /* DDP MSN for untagged messages */ 509 | u32 ddp_msn[RDMAP_UNTAGGED_QN_COUNT]; 510 | 511 | enum siw_tx_ctx state; 512 | wait_queue_head_t waitq; 513 | u16 ctrl_len; /* ddp+rdmap hdr */ 514 | u16 ctrl_sent; 515 | int burst; 516 | 517 | int bytes_unsent; /* ddp payload bytes */ 518 | 519 | struct shash_desc mpa_crc_hd; 520 | 521 | atomic_t in_use; /* tx currently under way */ 522 | 523 | u8 crc_enabled:1, /* compute and ship crc */ 524 | do_crc:1, /* do crc for segment */ 525 | use_sendpage:1, /* send w/o copy */ 526 | new_tcpseg:1, /* start new tcp segment */ 527 | tx_suspend:1, /* stop sending DDP segs. */ 528 | pad:2, /* # pad in current fpdu */ 529 | orq_fence:1; /* ORQ full or Send fenced */ 530 | 531 | u16 fpdu_len; /* len of FPDU to tx */ 532 | 533 | int tcp_seglen; /* remaining tcp seg space */ 534 | 535 | struct siw_wqe wqe_active; 536 | 537 | int sge_idx; /* current sge in tx */ 538 | u32 sge_off; /* already sent in curr. sge */ 539 | int in_syscall; /* TX out of user context */ 540 | }; 541 | 542 | #define USE_SQ_KTHREAD 543 | 544 | struct siw_qp { 545 | struct ib_qp ofa_qp; 546 | struct siw_objhdr hdr; 547 | struct list_head devq; 548 | int cpu; 549 | int kernel_verbs; 550 | struct siw_iwarp_rx rx_ctx; 551 | struct siw_iwarp_tx tx_ctx; 552 | 553 | struct siw_cep *cep; 554 | struct rw_semaphore state_lock; 555 | 556 | struct siw_pd *pd; 557 | struct siw_cq *scq; 558 | struct siw_cq *rcq; 559 | struct siw_srq *srq; 560 | 561 | struct siw_qp_attrs attrs; 562 | 563 | struct siw_sqe *sendq; /* send queue element array */ 564 | uint32_t sq_get; /* consumer index into sq array */ 565 | uint32_t sq_put; /* kernel prod. index into sq array */ 566 | #ifdef USE_SQ_KTHREAD 567 | struct llist_node tx_list; 568 | #endif 569 | 570 | struct siw_sqe *irq; /* inbound read queue element array */ 571 | uint32_t irq_get;/* consumer index into irq array */ 572 | uint32_t irq_put;/* producer index into irq array */ 573 | 574 | struct siw_rqe *recvq; /* recv queue element array */ 575 | uint32_t rq_get; /* consumer index into rq array */ 576 | uint32_t rq_put; /* kernel prod. index into rq array */ 577 | 578 | struct siw_sqe *orq; /* outbound read queue element array */ 579 | uint32_t orq_get;/* consumer index into orq array */ 580 | uint32_t orq_put;/* shared producer index for ORQ */ 581 | 582 | spinlock_t sq_lock; 583 | spinlock_t rq_lock; 584 | spinlock_t orq_lock; 585 | 586 | struct siw_sq_work sq_work; 587 | }; 588 | 589 | #define lock_sq(qp) spin_lock(&qp->sq_lock) 590 | #define unlock_sq(qp) spin_unlock(&qp->sq_lock) 591 | 592 | #ifdef LOCK_WO_FLAG 593 | #define lock_sq_rxsave(qp, flags) spin_lock_bh(&qp->sq_lock) 594 | #define unlock_sq_rxsave(qp, flags) spin_unlock_bh(&qp->sq_lock) 595 | #else 596 | #define lock_sq_rxsave(qp, flags) spin_lock_irqsave(&qp->sq_lock, flags) 597 | #define unlock_sq_rxsave(qp, flags) spin_unlock_irqrestore(&qp->sq_lock, flags) 598 | #endif 599 | 600 | #define lock_rq(qp) spin_lock(&qp->rq_lock) 601 | #define unlock_rq(qp) spin_unlock(&qp->rq_lock) 602 | 603 | #define lock_rq_rxsave(qp, flags) spin_lock_irqsave(&qp->rq_lock, flags) 604 | #define unlock_rq_rxsave(qp, flags) spin_unlock_irqrestore(&qp->rq_lock, flags) 605 | 606 | #define lock_srq(srq) spin_lock(&srq->lock) 607 | #define unlock_srq(srq) spin_unlock(&srq->lock) 608 | 609 | #define lock_srq_rxsave(srq, flags) spin_lock_irqsave(&srq->lock, flags) 610 | #define unlock_srq_rxsave(srq, flags) spin_unlock_irqrestore(&srq->lock, flags) 611 | 612 | #define lock_cq(cq) spin_lock(&cq->lock) 613 | #define unlock_cq(cq) spin_unlock(&cq->lock) 614 | 615 | #define lock_cq_rxsave(cq, flags) spin_lock_irqsave(&cq->lock, flags) 616 | #define unlock_cq_rxsave(cq, flags)\ 617 | spin_unlock_irqrestore(&cq->lock, flags) 618 | 619 | #define lock_orq(qp) spin_lock(&qp->orq_lock) 620 | #define unlock_orq(qp) spin_unlock(&qp->orq_lock) 621 | 622 | #ifdef LOCK_WO_FLAG 623 | #define lock_orq_rxsave(qp, flags) spin_lock_bh(&qp->orq_lock) 624 | #define unlock_orq_rxsave(qp, flags) spin_unlock_bh(&qp->orq_lock) 625 | #else 626 | #define lock_orq_rxsave(qp, flags) spin_lock_irqsave(&qp->orq_lock, flags) 627 | #define unlock_orq_rxsave(qp, flags)\ 628 | spin_unlock_irqrestore(&qp->orq_lock, flags) 629 | #endif 630 | 631 | #define RX_QP(rx) container_of(rx, struct siw_qp, rx_ctx) 632 | #define TX_QP(tx) container_of(tx, struct siw_qp, tx_ctx) 633 | #define QP_ID(qp) ((qp)->hdr.id) 634 | #define OBJ_ID(obj) ((obj)->hdr.id) 635 | #define RX_QPID(rx) QP_ID(RX_QP(rx)) 636 | #define TX_QPID(tx) QP_ID(TX_QP(tx)) 637 | 638 | /* helper macros */ 639 | #define tx_wqe(qp) (&(qp)->tx_ctx.wqe_active) 640 | #define rx_wqe(qp) (&(qp)->rx_ctx.wqe_active) 641 | #define rx_mem(qp) ((qp)->rx_ctx.wqe_active.mem[0].obj) 642 | #define tx_type(wqe) ((wqe)->sqe.opcode) 643 | #define rx_type(wqe) ((wqe)->rqe.opcode) 644 | #define tx_flags(wqe) ((wqe)->sqe.flags) 645 | #define rx_flags(wqe) ((wqe)->rqe.flags) 646 | #define list_entry_wqe(pos) list_entry(pos, struct siw_wqe, list) 647 | #define list_first_wqe(pos) list_first_entry(pos, struct siw_wqe, list) 648 | 649 | #define TX_ACTIVE(qp) (tx_wqe(qp).status != SIW_WR_IDLE) 650 | #define TX_ACTIVE_RRESP(qp) (TX_ACTIVE(qp) &&\ 651 | tx_type(tx_wqe(qp)) == SIW_OP_READ_RESP) 652 | 653 | #define TX_IDLE(qp) (!TX_ACTIVE(qp) && SQ_EMPTY(qp) && \ 654 | IRQ_EMPTY(qp) && ORQ_EMPTY(qp)) 655 | 656 | 657 | struct iwarp_msg_info { 658 | int hdr_len; 659 | struct iwarp_ctrl ctrl; 660 | int (*proc_data) (struct siw_qp *, struct siw_iwarp_rx *); 661 | }; 662 | 663 | extern struct iwarp_msg_info iwarp_pktinfo[RDMAP_TERMINATE + 1]; 664 | extern struct siw_dev *siw; 665 | 666 | 667 | /* QP general functions */ 668 | int siw_qp_modify(struct siw_qp *, struct siw_qp_attrs *, 669 | enum siw_qp_attr_mask); 670 | 671 | void siw_qp_llp_close(struct siw_qp *); 672 | void siw_qp_cm_drop(struct siw_qp *, int); 673 | 674 | 675 | struct ib_qp *siw_get_ofaqp(struct ib_device *, int); 676 | void siw_qp_get_ref(struct ib_qp *); 677 | void siw_qp_put_ref(struct ib_qp *); 678 | 679 | enum siw_qp_state siw_map_ibstate(enum ib_qp_state); 680 | 681 | int siw_check_mem(struct siw_pd *, struct siw_mem *, u64, 682 | enum siw_access_flags, int); 683 | int siw_check_sge(struct siw_pd *, struct siw_sge *, union siw_mem_resolved *, 684 | enum siw_access_flags, u32, int); 685 | int siw_check_sgl(struct siw_pd *, struct siw_wqe *, 686 | enum siw_access_flags); 687 | 688 | void siw_read_to_orq(struct siw_sqe *, struct siw_sqe *); 689 | 690 | int siw_sqe_complete(struct siw_qp *, struct siw_sqe *, u32, 691 | enum siw_wc_status); 692 | int siw_rqe_complete(struct siw_qp *, struct siw_rqe *, u32, 693 | enum siw_wc_status); 694 | 695 | 696 | /* SIW user memory management */ 697 | 698 | #define CHUNK_SHIFT 9 /* sets number of pages per chunk */ 699 | #define PAGES_PER_CHUNK (_AC(1, UL) << CHUNK_SHIFT) 700 | #define CHUNK_MASK (~(PAGES_PER_CHUNK - 1)) 701 | #define PAGE_CHUNK_SIZE (PAGES_PER_CHUNK * sizeof(struct page *)) 702 | 703 | /* 704 | * siw_get_upage() 705 | * 706 | * Get page pointer for address on given umem. 707 | * 708 | * @umem: two dimensional list of page pointers 709 | * @addr: user virtual address 710 | */ 711 | static inline struct page *siw_get_upage(struct siw_umem *umem, u64 addr) 712 | { 713 | unsigned int page_idx = (addr - umem->fp_addr) >> PAGE_SHIFT, 714 | chunk_idx = page_idx >> CHUNK_SHIFT, 715 | page_in_chunk = page_idx & ~CHUNK_MASK; 716 | 717 | if (likely(page_idx < umem->num_pages)) 718 | return umem->page_chunk[chunk_idx].p[page_in_chunk]; 719 | 720 | return NULL; 721 | } 722 | struct siw_umem *siw_umem_get(u64, u64); 723 | void siw_umem_release(struct siw_umem *); 724 | 725 | 726 | /* QP TX path functions */ 727 | int siw_qp_sq_process(struct siw_qp *); 728 | int siw_sq_worker_init(void); 729 | void siw_sq_worker_exit(void); 730 | int siw_sq_queue_work(struct siw_qp *qp); 731 | int siw_activate_tx(struct siw_qp *); 732 | 733 | /* QP RX path functions */ 734 | int siw_proc_send(struct siw_qp *, struct siw_iwarp_rx *); 735 | int siw_proc_rreq(struct siw_qp *, struct siw_iwarp_rx *); 736 | int siw_proc_rresp(struct siw_qp *, struct siw_iwarp_rx *); 737 | int siw_proc_write(struct siw_qp *, struct siw_iwarp_rx *); 738 | int siw_proc_terminate(struct siw_qp*, struct siw_iwarp_rx *); 739 | int siw_proc_unsupp(struct siw_qp *, struct siw_iwarp_rx *); 740 | 741 | int siw_tcp_rx_data(read_descriptor_t *rd_desc, struct sk_buff *skb, 742 | unsigned int off, size_t len); 743 | 744 | /* MPA utilities */ 745 | int siw_crc_array(struct shash_desc *, u8 *, size_t); 746 | int siw_crc_page(struct shash_desc *, struct page *, int, int); 747 | 748 | 749 | /* Varia */ 750 | void siw_cq_flush(struct siw_cq *); 751 | void siw_sq_flush(struct siw_qp *); 752 | void siw_rq_flush(struct siw_qp *); 753 | int siw_reap_cqe(struct siw_cq *, struct ib_wc *); 754 | 755 | /* RDMA core event dipatching */ 756 | void siw_qp_event(struct siw_qp *, enum ib_event_type); 757 | void siw_cq_event(struct siw_cq *, enum ib_event_type); 758 | void siw_srq_event(struct siw_srq *, enum ib_event_type); 759 | void siw_port_event(struct siw_dev *, u8, enum ib_event_type); 760 | 761 | 762 | static inline int siw_sq_empty(struct siw_qp *qp) 763 | { 764 | return qp->sendq[qp->sq_get % qp->attrs.sq_size].flags == 0; 765 | } 766 | 767 | static inline struct siw_sqe *sq_get_next(struct siw_qp *qp) 768 | { 769 | struct siw_sqe *sqe = &qp->sendq[qp->sq_get % qp->attrs.sq_size]; 770 | if (sqe->flags & SIW_WQE_VALID) 771 | return sqe; 772 | return NULL; 773 | } 774 | 775 | static inline struct siw_sqe *orq_get_tail(struct siw_qp *qp) 776 | { 777 | if (likely(qp->attrs.orq_size)) 778 | return &qp->orq[qp->orq_put % qp->attrs.orq_size]; 779 | 780 | pr_warn("QP[%d]: ORQ has zero length", QP_ID(qp)); 781 | return NULL; 782 | } 783 | 784 | static inline struct siw_sqe *orq_get_free(struct siw_qp *qp) 785 | { 786 | struct siw_sqe *orq_e = orq_get_tail(qp); 787 | 788 | if (orq_e && orq_e->flags == 0) 789 | return orq_e; 790 | 791 | return NULL; 792 | } 793 | 794 | static inline int siw_orq_empty(struct siw_qp *qp) 795 | { 796 | return qp->orq[qp->orq_get % qp->attrs.orq_size].flags == 0 ? 1 : 0; 797 | } 798 | 799 | static inline struct siw_sqe *irq_get_free(struct siw_qp *qp) 800 | { 801 | struct siw_sqe *irq_e = &qp->irq[qp->irq_put % qp->attrs.irq_size]; 802 | if (irq_e->flags == 0) 803 | return irq_e; 804 | return NULL; 805 | } 806 | 807 | static inline int siw_irq_empty(struct siw_qp *qp) 808 | { 809 | return qp->irq[qp->irq_get % qp->attrs.irq_size].flags == 0; 810 | } 811 | 812 | #define tx_more_wqe(qp) (!siw_sq_empty(qp) || !siw_irq_empty(qp)) 813 | 814 | 815 | static inline struct siw_mr *siw_mem2mr(struct siw_mem *m) 816 | { 817 | if (!SIW_MEM_IS_MW(m)) 818 | return container_of(m, struct siw_mr, mem); 819 | return m->mr; 820 | } 821 | 822 | #endif 823 | -------------------------------------------------------------------------------- /kernel/siw_ae.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Software iWARP device driver for Linux 3 | * 4 | * Authors: Bernard Metzler 5 | * 6 | * Copyright (c) 2008-2016, IBM Corporation 7 | * 8 | * This software is available to you under a choice of one of two 9 | * licenses. You may choose to be licensed under the terms of the GNU 10 | * General Public License (GPL) Version 2, available from the file 11 | * COPYING in the main directory of this source tree, or the 12 | * BSD license below: 13 | * 14 | * Redistribution and use in source and binary forms, with or 15 | * without modification, are permitted provided that the following 16 | * conditions are met: 17 | * 18 | * - Redistributions of source code must retain the above copyright notice, 19 | * this list of conditions and the following disclaimer. 20 | * 21 | * - Redistributions in binary form must reproduce the above copyright 22 | * notice, this list of conditions and the following disclaimer in the 23 | * documentation and/or other materials provided with the distribution. 24 | * 25 | * - Neither the name of IBM nor the names of its contributors may be 26 | * used to endorse or promote products derived from this software without 27 | * specific prior written permission. 28 | * 29 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 30 | * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 31 | * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 32 | * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 33 | * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 34 | * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 35 | * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 36 | * SOFTWARE. 37 | */ 38 | 39 | #include 40 | #include 41 | #include 42 | #include 43 | #include 44 | #include 45 | #include 46 | #include 47 | 48 | #include 49 | #include 50 | #include 51 | #include 52 | 53 | #include "siw.h" 54 | #include "siw_obj.h" 55 | #include "siw_cm.h" 56 | 57 | 58 | void siw_qp_event(struct siw_qp *qp, enum ib_event_type etype) 59 | { 60 | struct ib_event event; 61 | struct ib_qp *ofa_qp = &qp->ofa_qp; 62 | 63 | event.event = etype; 64 | event.device = ofa_qp->device; 65 | event.element.qp = ofa_qp; 66 | 67 | if (!(qp->attrs.flags & SIW_QP_IN_DESTROY) && ofa_qp->event_handler) { 68 | dprint(DBG_EH, ": reporting %d\n", etype); 69 | (*ofa_qp->event_handler)(&event, ofa_qp->qp_context); 70 | } 71 | } 72 | 73 | void siw_cq_event(struct siw_cq *cq, enum ib_event_type etype) 74 | { 75 | struct ib_event event; 76 | struct ib_cq *ofa_cq = &cq->ofa_cq; 77 | 78 | event.event = etype; 79 | event.device = ofa_cq->device; 80 | event.element.cq = ofa_cq; 81 | 82 | if (ofa_cq->event_handler) { 83 | dprint(DBG_EH, ": reporting %d\n", etype); 84 | (*ofa_cq->event_handler)(&event, ofa_cq->cq_context); 85 | } 86 | } 87 | 88 | void siw_srq_event(struct siw_srq *srq, enum ib_event_type etype) 89 | { 90 | struct ib_event event; 91 | struct ib_srq *ofa_srq = &srq->ofa_srq; 92 | 93 | event.event = etype; 94 | event.device = ofa_srq->device; 95 | event.element.srq = ofa_srq; 96 | 97 | if (ofa_srq->event_handler) { 98 | dprint(DBG_EH, ": reporting %d\n", etype); 99 | (*ofa_srq->event_handler)(&event, ofa_srq->srq_context); 100 | } 101 | } 102 | 103 | void siw_port_event(struct siw_dev *sdev, u8 port, enum ib_event_type etype) 104 | { 105 | struct ib_event event; 106 | 107 | event.event = etype; 108 | event.device = &sdev->ofa_dev; 109 | event.element.port_num = port; 110 | 111 | dprint(DBG_EH, ": reporting %d\n", etype); 112 | ib_dispatch_event(&event); 113 | } 114 | -------------------------------------------------------------------------------- /kernel/siw_cm.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Software iWARP device driver for Linux 3 | * 4 | * Authors: Bernard Metzler 5 | * 6 | * Copyright (c) 2008-2016, IBM Corporation 7 | * 8 | * This software is available to you under a choice of one of two 9 | * licenses. You may choose to be licensed under the terms of the GNU 10 | * General Public License (GPL) Version 2, available from the file 11 | * COPYING in the main directory of this source tree, or the 12 | * BSD license below: 13 | * 14 | * Redistribution and use in source and binary forms, with or 15 | * without modification, are permitted provided that the following 16 | * conditions are met: 17 | * 18 | * - Redistributions of source code must retain the above copyright notice, 19 | * this list of conditions and the following disclaimer. 20 | * 21 | * - Redistributions in binary form must reproduce the above copyright 22 | * notice, this list of conditions and the following disclaimer in the 23 | * documentation and/or other materials provided with the distribution. 24 | * 25 | * - Neither the name of IBM nor the names of its contributors may be 26 | * used to endorse or promote products derived from this software without 27 | * specific prior written permission. 28 | * 29 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 30 | * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 31 | * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 32 | * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 33 | * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 34 | * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 35 | * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 36 | * SOFTWARE. 37 | */ 38 | 39 | #ifndef _SIW_CM_H 40 | #define _SIW_CM_H 41 | 42 | #include 43 | #include 44 | 45 | #include 46 | 47 | 48 | enum siw_cep_state { 49 | SIW_EPSTATE_IDLE = 1, 50 | SIW_EPSTATE_LISTENING, 51 | SIW_EPSTATE_CONNECTING, 52 | SIW_EPSTATE_AWAIT_MPAREQ, 53 | SIW_EPSTATE_RECVD_MPAREQ, 54 | SIW_EPSTATE_AWAIT_MPAREP, 55 | SIW_EPSTATE_RDMA_MODE, 56 | SIW_EPSTATE_CLOSED 57 | }; 58 | 59 | struct siw_mpa_info { 60 | struct mpa_rr hdr; /* peer mpa hdr in host byte order */ 61 | char *pdata; 62 | int bytes_rcvd; 63 | }; 64 | 65 | struct siw_llp_info { 66 | struct socket *sock; 67 | struct sockaddr_in laddr; /* redundant with socket info above */ 68 | struct sockaddr_in raddr; /* dito, consider removal */ 69 | struct siw_sk_upcalls sk_def_upcalls; 70 | }; 71 | 72 | struct siw_dev; 73 | 74 | struct siw_cep { 75 | struct iw_cm_id *cm_id; 76 | struct siw_dev *sdev; 77 | 78 | struct list_head devq; 79 | /* 80 | * The provider_data element of a listener IWCM ID 81 | * refers to a list of one or more listener CEPs 82 | */ 83 | struct list_head listenq; 84 | struct siw_cep *listen_cep; 85 | struct siw_qp *qp; 86 | spinlock_t lock; 87 | wait_queue_head_t waitq; 88 | struct kref ref; 89 | enum siw_cep_state state; 90 | short in_use; 91 | struct siw_cm_work *mpa_timer; 92 | struct list_head work_freelist; 93 | struct siw_llp_info llp; 94 | struct siw_mpa_info mpa; 95 | int ord; 96 | int ird; 97 | int sk_error; /* not (yet) used XXX */ 98 | 99 | /* Saved upcalls of socket llp.sock */ 100 | void (*sk_state_change)(struct sock *sk); 101 | #if LINUX_VERSION_CODE < KERNEL_VERSION(3, 15, 0) 102 | void (*sk_data_ready)(struct sock *sk, int bytes); 103 | #else 104 | void (*sk_data_ready)(struct sock *sk); 105 | #endif 106 | void (*sk_write_space)(struct sock *sk); 107 | void (*sk_error_report)(struct sock *sk); 108 | }; 109 | 110 | #define MPAREQ_TIMEOUT (HZ*10) 111 | #define MPAREP_TIMEOUT (HZ*5) 112 | 113 | enum siw_work_type { 114 | SIW_CM_WORK_ACCEPT = 1, 115 | SIW_CM_WORK_READ_MPAHDR, 116 | SIW_CM_WORK_CLOSE_LLP, /* close socket */ 117 | SIW_CM_WORK_PEER_CLOSE, /* socket indicated peer close */ 118 | SIW_CM_WORK_MPATIMEOUT 119 | }; 120 | 121 | struct siw_cm_work { 122 | struct delayed_work work; 123 | struct list_head list; 124 | enum siw_work_type type; 125 | struct siw_cep *cep; 126 | }; 127 | 128 | /* 129 | * With kernel 3.12, OFA ddressing changed from sockaddr_in to 130 | * sockaddr_storage 131 | */ 132 | #define to_sockaddr_in(a) (*(struct sockaddr_in *)(&(a))) 133 | 134 | extern int siw_connect(struct iw_cm_id *, struct iw_cm_conn_param *); 135 | extern int siw_accept(struct iw_cm_id *, struct iw_cm_conn_param *); 136 | extern int siw_reject(struct iw_cm_id *, const void *, u8); 137 | extern int siw_create_listen(struct iw_cm_id *, int); 138 | extern int siw_destroy_listen(struct iw_cm_id *); 139 | 140 | extern void siw_cep_get(struct siw_cep *); 141 | extern void siw_cep_put(struct siw_cep *); 142 | extern int siw_cm_queue_work(struct siw_cep *, enum siw_work_type); 143 | 144 | extern int siw_cm_init(void); 145 | extern void siw_cm_exit(void); 146 | 147 | /* 148 | * TCP socket interface 149 | */ 150 | #define sk_to_qp(sk) (((struct siw_cep *)((sk)->sk_user_data))->qp) 151 | #define sk_to_cep(sk) ((struct siw_cep *)((sk)->sk_user_data)) 152 | 153 | /* 154 | * Should we use tcp_current_mss()? 155 | * But its not exported by kernel. 156 | */ 157 | static inline unsigned int get_tcp_mss(struct sock *sk) 158 | { 159 | struct tcp_sock *tp = tcp_sk(sk); 160 | 161 | #if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 19, 0) || defined(IS_RH_7_2) 162 | if (tp->gso_segs) 163 | return tp->gso_segs * tp->mss_cache; 164 | #else 165 | if (tp->xmit_size_goal_segs) 166 | return tp->xmit_size_goal_segs * tp->mss_cache; 167 | #endif 168 | else 169 | return tp->mss_cache; 170 | } 171 | 172 | #endif 173 | -------------------------------------------------------------------------------- /kernel/siw_cq.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Software iWARP device driver for Linux 3 | * 4 | * Authors: Bernard Metzler 5 | * 6 | * Copyright (c) 2008-2016, IBM Corporation 7 | * 8 | * This software is available to you under a choice of one of two 9 | * licenses. You may choose to be licensed under the terms of the GNU 10 | * General Public License (GPL) Version 2, available from the file 11 | * COPYING in the main directory of this source tree, or the 12 | * BSD license below: 13 | * 14 | * Redistribution and use in source and binary forms, with or 15 | * without modification, are permitted provided that the following 16 | * conditions are met: 17 | * 18 | * - Redistributions of source code must retain the above copyright notice, 19 | * this list of conditions and the following disclaimer. 20 | * 21 | * - Redistributions in binary form must reproduce the above copyright 22 | * notice, this list of conditions and the following disclaimer in the 23 | * documentation and/or other materials provided with the distribution. 24 | * 25 | * - Neither the name of IBM nor the names of its contributors may be 26 | * used to endorse or promote products derived from this software without 27 | * specific prior written permission. 28 | * 29 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 30 | * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 31 | * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 32 | * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 33 | * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 34 | * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 35 | * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 36 | * SOFTWARE. 37 | */ 38 | 39 | #include 40 | #include 41 | #include 42 | 43 | #include 44 | #include 45 | #include 46 | #include 47 | 48 | #include "siw.h" 49 | #include "siw_obj.h" 50 | #include "siw_cm.h" 51 | 52 | static int siw_wc_op_siw2ofa[SIW_NUM_OPCODES] = { 53 | [SIW_OP_WRITE] = IB_WC_RDMA_WRITE, 54 | [SIW_OP_SEND] = IB_WC_SEND, 55 | [SIW_OP_SEND_WITH_IMM] = IB_WC_SEND, 56 | [SIW_OP_READ] = IB_WC_RDMA_READ, 57 | [SIW_OP_COMP_AND_SWAP] = IB_WC_COMP_SWAP, 58 | [SIW_OP_FETCH_AND_ADD] = IB_WC_FETCH_ADD, 59 | [SIW_OP_INVAL_STAG] = IB_WC_LOCAL_INV, 60 | [SIW_OP_RECEIVE] = IB_WC_RECV, 61 | [SIW_OP_READ_RESPONSE] = -1 /* not used */ 62 | }; 63 | 64 | /* 65 | * translate wc into ofa syntax 66 | */ 67 | static void siw_wc_siw2ofa(struct siw_cqe *cqe, struct ib_wc *ofa_wc) 68 | { 69 | memset(ofa_wc, 0, sizeof *ofa_wc); 70 | 71 | ofa_wc->wr_id = cqe->id; 72 | ofa_wc->status = cqe->status; 73 | ofa_wc->byte_len = cqe->bytes; 74 | ofa_wc->qp = &((struct siw_qp *)cqe->qp)->ofa_qp; 75 | 76 | ofa_wc->opcode = siw_wc_op_siw2ofa[cqe->opcode]; 77 | /* 78 | * ofa_wc->imm_data = 0; 79 | * ofa_wc->vendor_err = 0; 80 | * ofa_wc->src_qp = 0; 81 | * ofa_wc->wc_flags = 0; ADD immediate data support 82 | * ofa_wc->pkey_index = 0; 83 | * ofa_wc->slid = 0; 84 | * ofa_wc->sl = 0; 85 | * ofa_wc->dlid_path_bits = 0; 86 | * ofa_wc->port_num = 0; 87 | */ 88 | } 89 | 90 | /* 91 | * Reap one CQE from the CQ. 92 | * 93 | * Caller must hold qp read lock 94 | * 95 | * TODO: Provide routine which can read more than one CQE 96 | */ 97 | int siw_reap_cqe(struct siw_cq *cq, struct ib_wc *ofa_wc) 98 | { 99 | struct siw_cqe *cqe; 100 | unsigned long flags; 101 | 102 | lock_cq_rxsave(cq, flags); 103 | 104 | cqe = &cq->queue[cq->cq_get % cq->num_cqe]; 105 | if (cqe->flags & SIW_WQE_VALID) { 106 | siw_wc_siw2ofa(cqe, ofa_wc); 107 | 108 | dprint(DBG_WR, " QP%d, CQ%d: Reap WQE type: %d, p: %p\n", 109 | QP_ID((struct siw_qp *)cqe->qp), OBJ_ID(cq), 110 | cqe->opcode, cqe); 111 | 112 | if (cq->kernel_verbs) 113 | siw_qp_put(cqe->qp); 114 | 115 | cqe->flags = 0; 116 | cq->cq_get++; 117 | 118 | smp_wmb(); 119 | 120 | unlock_cq_rxsave(cq, flags); 121 | return 1; 122 | } 123 | unlock_cq_rxsave(cq, flags); 124 | return 0; 125 | } 126 | 127 | /* 128 | * siw_cq_flush() 129 | * 130 | * Flush all CQ elements. No CQ lock is taken. 131 | */ 132 | void siw_cq_flush(struct siw_cq *cq) 133 | { 134 | dprint(DBG_CM|DBG_OBJ, "(CQ%d:) Enter\n", OBJ_ID(cq)); 135 | 136 | memset(cq->queue, 0, cq->num_cqe * sizeof(struct siw_cqe)); 137 | } 138 | -------------------------------------------------------------------------------- /kernel/siw_debug.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Software iWARP device driver for Linux 3 | * 4 | * Authors: Bernard Metzler 5 | * Fredy Neeser 6 | * 7 | * Copyright (c) 2008-2016, IBM Corporation 8 | * 9 | * This software is available to you under a choice of one of two 10 | * licenses. You may choose to be licensed under the terms of the GNU 11 | * General Public License (GPL) Version 2, available from the file 12 | * COPYING in the main directory of this source tree, or the 13 | * BSD license below: 14 | * 15 | * Redistribution and use in source and binary forms, with or 16 | * without modification, are permitted provided that the following 17 | * conditions are met: 18 | * 19 | * - Redistributions of source code must retain the above copyright notice, 20 | * this list of conditions and the following disclaimer. 21 | * 22 | * - Redistributions in binary form must reproduce the above copyright 23 | * notice, this list of conditions and the following disclaimer in the 24 | * documentation and/or other materials provided with the distribution. 25 | * 26 | * - Neither the name of IBM nor the names of its contributors may be 27 | * used to endorse or promote products derived from this software without 28 | * specific prior written permission. 29 | * 30 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 31 | * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 32 | * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 33 | * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 34 | * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 35 | * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 36 | * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 37 | * SOFTWARE. 38 | */ 39 | 40 | #include 41 | #include 42 | #include 43 | #include 44 | #include 45 | 46 | #include 47 | #include 48 | #include 49 | #include 50 | 51 | #include "siw.h" 52 | #include "siw_cm.h" 53 | #include "siw_obj.h" 54 | 55 | #if LINUX_VERSION_CODE < KERNEL_VERSION(3, 19, 0) 56 | #define FDENTRY(f) (f->f_dentry) 57 | #else 58 | #define FDENTRY(f) (f->f_path.dentry) 59 | #endif 60 | 61 | 62 | static struct dentry *siw_debugfs; 63 | 64 | static ssize_t siw_show_qps(struct file *f, char __user *buf, size_t space, 65 | loff_t *ppos) 66 | { 67 | struct siw_dev *sdev = FDENTRY(f)->d_inode->i_private; 68 | struct list_head *pos, *tmp; 69 | char *kbuf = NULL; 70 | int len = 0, n, num_qp; 71 | 72 | if (*ppos) 73 | goto out; 74 | 75 | kbuf = kmalloc(space, GFP_KERNEL); 76 | if (!kbuf) 77 | goto out; 78 | 79 | num_qp = atomic_read(&sdev->num_qp); 80 | if (!num_qp) 81 | goto out; 82 | 83 | len = snprintf(kbuf, space, "%s: %d QPs\n", sdev->ofa_dev.name, num_qp); 84 | if (len > space) { 85 | len = space; 86 | goto out; 87 | } 88 | space -= len; 89 | n = snprintf(kbuf + len, space, 90 | "%-7s%-6s%-6s%-5s%-5s%-5s%-5s%-5s%-20s%-20s\n", 91 | "QP-ID", "State", "Ref's", "SQ", "RQ", "IRQ", "ORQ", 92 | "s/r", "Sock", "CEP"); 93 | 94 | if (n > space) { 95 | len += space; 96 | goto out; 97 | } 98 | len += n; 99 | space -= n; 100 | 101 | list_for_each_safe(pos, tmp, &sdev->qp_list) { 102 | struct siw_qp *qp = list_entry(pos, struct siw_qp, devq); 103 | n = snprintf(kbuf + len, space, 104 | "%-7d%-6d%-6d%-5d%-5d%-5d%-5d%d/%-3d0x%-17p" 105 | " 0x%-18p\n", 106 | QP_ID(qp), 107 | qp->attrs.state, 108 | atomic_read(&qp->hdr.ref.refcount), 109 | qp->attrs.sq_size, 110 | qp->attrs.rq_size, 111 | qp->attrs.irq_size, 112 | qp->attrs.orq_size, 113 | tx_wqe(qp) ? 1 : 0, 114 | rx_wqe(qp) ? 1 : 0, 115 | qp->attrs.llp_stream_handle, 116 | qp->cep); 117 | if (n < space) { 118 | len += n; 119 | space -= n; 120 | } else { 121 | len += space; 122 | break; 123 | } 124 | } 125 | out: 126 | if (len) 127 | len = simple_read_from_buffer(buf, len, ppos, kbuf, len); 128 | 129 | kfree(kbuf); 130 | 131 | return len; 132 | }; 133 | 134 | static ssize_t siw_show_ceps(struct file *f, char __user *buf, size_t space, 135 | loff_t *ppos) 136 | { 137 | struct siw_dev *sdev = FDENTRY(f)->d_inode->i_private; 138 | struct list_head *pos, *tmp; 139 | char *kbuf = NULL; 140 | int len = 0, n, num_cep; 141 | 142 | if (*ppos) 143 | goto out; 144 | 145 | kbuf = kmalloc(space, GFP_KERNEL); 146 | if (!kbuf) 147 | goto out; 148 | 149 | num_cep = atomic_read(&sdev->num_cep); 150 | if (!num_cep) 151 | goto out; 152 | 153 | len = snprintf(kbuf, space, "%s: %d CEPs\n", sdev->ofa_dev.name, 154 | num_cep); 155 | if (len > space) { 156 | len = space; 157 | goto out; 158 | } 159 | space -= len; 160 | 161 | n = snprintf(kbuf + len, space, 162 | "%-20s%-6s%-6s%-7s%-3s%-3s%-4s%-21s%-9s\n", 163 | "CEP", "State", "Ref's", "QP-ID", "LQ", "LC", "U", "Sock", 164 | "CM-ID"); 165 | 166 | if (n > space) { 167 | len += space; 168 | goto out; 169 | } 170 | len += n; 171 | space -= n; 172 | 173 | list_for_each_safe(pos, tmp, &sdev->cep_list) { 174 | struct siw_cep *cep = list_entry(pos, struct siw_cep, devq); 175 | 176 | n = snprintf(kbuf + len, space, 177 | "0x%-18p%-6d%-6d%-7d%-3s%-3s%-4d0x%-18p" 178 | " 0x%-16p\n", 179 | cep, cep->state, 180 | atomic_read(&cep->ref.refcount), 181 | cep->qp ? QP_ID(cep->qp) : -1, 182 | list_empty(&cep->listenq) ? "n" : "y", 183 | cep->listen_cep ? "y" : "n", 184 | cep->in_use, 185 | cep->llp.sock, 186 | cep->cm_id); 187 | if (n < space) { 188 | len += n; 189 | space -= n; 190 | } else { 191 | len += space; 192 | break; 193 | } 194 | } 195 | out: 196 | if (len) 197 | len = simple_read_from_buffer(buf, len, ppos, kbuf, len); 198 | 199 | kfree(kbuf); 200 | 201 | return len; 202 | }; 203 | 204 | static ssize_t siw_show_stats(struct file *f, char __user *buf, size_t space, 205 | loff_t *ppos) 206 | { 207 | struct siw_dev *sdev = FDENTRY(f)->d_inode->i_private; 208 | char *kbuf = NULL; 209 | int len = 0; 210 | 211 | if (*ppos) 212 | goto out; 213 | 214 | kbuf = kmalloc(space, GFP_KERNEL); 215 | if (!kbuf) 216 | goto out; 217 | 218 | len = snprintf(kbuf, space, "Allocated SIW Objects:\n" 219 | #if DPRINT_MASK > 0 220 | "Global :\t%s: %d\n" 221 | #endif 222 | "Device %s (%s):\t" 223 | "%s: %d, %s %d, %s: %d, %s: %d, %s: %d, %s: %d, %s: %d\n", 224 | #if DPRINT_MASK > 0 225 | "WQEs", atomic_read(&siw_num_wqe), 226 | #endif 227 | sdev->ofa_dev.name, 228 | sdev->netdev->flags & IFF_UP ? "IFF_UP" : "IFF_DOWN", 229 | "CXs", atomic_read(&sdev->num_ctx), 230 | "PDs", atomic_read(&sdev->num_pd), 231 | "QPs", atomic_read(&sdev->num_qp), 232 | "CQs", atomic_read(&sdev->num_cq), 233 | "SRQs", atomic_read(&sdev->num_srq), 234 | "MRs", atomic_read(&sdev->num_mem), 235 | "CEPs", atomic_read(&sdev->num_cep)); 236 | if (len > space) 237 | len = space; 238 | out: 239 | if (len) 240 | len = simple_read_from_buffer(buf, len, ppos, kbuf, len); 241 | 242 | kfree(kbuf); 243 | return len; 244 | } 245 | 246 | static const struct file_operations siw_qp_debug_fops = { 247 | .owner = THIS_MODULE, 248 | .read = siw_show_qps 249 | }; 250 | 251 | static const struct file_operations siw_cep_debug_fops = { 252 | .owner = THIS_MODULE, 253 | .read = siw_show_ceps 254 | }; 255 | 256 | static const struct file_operations siw_stats_debug_fops = { 257 | .owner = THIS_MODULE, 258 | .read = siw_show_stats 259 | }; 260 | 261 | void siw_debugfs_add_device(struct siw_dev *sdev) 262 | { 263 | struct dentry *entry; 264 | 265 | if (!siw_debugfs) 266 | return; 267 | 268 | sdev->debugfs = debugfs_create_dir(sdev->ofa_dev.name, siw_debugfs); 269 | if (sdev->debugfs) { 270 | entry = debugfs_create_file("qp", S_IRUSR, sdev->debugfs, 271 | (void *)sdev, &siw_qp_debug_fops); 272 | if (!entry) 273 | dprint(DBG_DM, ": could not create 'qp' entry\n"); 274 | 275 | entry = debugfs_create_file("cep", S_IRUSR, sdev->debugfs, 276 | (void *)sdev, &siw_cep_debug_fops); 277 | if (!entry) 278 | dprint(DBG_DM, ": could not create 'cep' entry\n"); 279 | 280 | entry = debugfs_create_file("stats", S_IRUSR, sdev->debugfs, 281 | (void *)sdev, 282 | &siw_stats_debug_fops); 283 | if (!entry) 284 | dprint(DBG_DM, ": could not create 'stats' entry\n"); 285 | } 286 | } 287 | 288 | void siw_debugfs_del_device(struct siw_dev *sdev) 289 | { 290 | if (sdev->debugfs) { 291 | debugfs_remove_recursive(sdev->debugfs); 292 | sdev->debugfs = NULL; 293 | } 294 | } 295 | 296 | void siw_debug_init(void) 297 | { 298 | siw_debugfs = debugfs_create_dir("siw", NULL); 299 | 300 | if (!siw_debugfs || siw_debugfs == ERR_PTR(-ENODEV)) { 301 | dprint(DBG_DM, ": could not init debugfs\n"); 302 | siw_debugfs = NULL; 303 | } 304 | #if DPRINT_MASK > 0 305 | atomic_set(&siw_num_wqe, 0); 306 | #endif 307 | } 308 | 309 | void siw_debugfs_delete(void) 310 | { 311 | if (siw_debugfs) 312 | debugfs_remove_recursive(siw_debugfs); 313 | 314 | siw_debugfs = NULL; 315 | } 316 | 317 | void siw_print_qp_attr_mask(enum ib_qp_attr_mask attr_mask, char *msg) 318 | { 319 | pr_info("-------- %s -------\n", msg); 320 | if (IB_QP_STATE & attr_mask) 321 | pr_info("IB_QP_STATE\n"); 322 | if (IB_QP_CUR_STATE & attr_mask) 323 | pr_info("IB_QP_CUR_STATE\n"); 324 | if (IB_QP_EN_SQD_ASYNC_NOTIFY & attr_mask) 325 | pr_info("IB_QP_EN_SQD_ASYNC_NOTIFY\n"); 326 | if (IB_QP_ACCESS_FLAGS & attr_mask) 327 | pr_info("IB_QP_ACCESS_FLAGS\n"); 328 | if (IB_QP_PKEY_INDEX & attr_mask) 329 | pr_info("IB_QP_PKEY_INDEX\n"); 330 | if (IB_QP_PORT & attr_mask) 331 | pr_info("IB_QP_PORT\n"); 332 | if (IB_QP_QKEY & attr_mask) 333 | pr_info("IB_QP_QKEY\n"); 334 | if (IB_QP_AV & attr_mask) 335 | pr_info("IB_QP_AV\n"); 336 | if (IB_QP_PATH_MTU & attr_mask) 337 | pr_info("IB_QP_PATH_MTU\n"); 338 | if (IB_QP_TIMEOUT & attr_mask) 339 | pr_info("IB_QP_TIMEOUT\n"); 340 | if (IB_QP_RETRY_CNT & attr_mask) 341 | pr_info("IB_QP_RETRY_CNT\n"); 342 | if (IB_QP_RNR_RETRY & attr_mask) 343 | pr_info("IB_QP_RNR_RETRY\n"); 344 | if (IB_QP_RQ_PSN & attr_mask) 345 | pr_info("IB_QP_RQ_PSN\n"); 346 | if (IB_QP_MAX_QP_RD_ATOMIC & attr_mask) 347 | pr_info("IB_QP_MAX_QP_RD_ATOMIC\n"); 348 | if (IB_QP_ALT_PATH & attr_mask) 349 | pr_info("IB_QP_ALT_PATH\n"); 350 | if (IB_QP_MIN_RNR_TIMER & attr_mask) 351 | pr_info("IB_QP_MIN_RNR_TIMER\n"); 352 | if (IB_QP_SQ_PSN & attr_mask) 353 | pr_info("IB_QP_SQ_PSN\n"); 354 | if (IB_QP_MAX_DEST_RD_ATOMIC & attr_mask) 355 | pr_info("IB_QP_MAX_DEST_RD_ATOMIC\n"); 356 | if (IB_QP_PATH_MIG_STATE & attr_mask) 357 | pr_info("IB_QP_PATH_MIG_STATE\n"); 358 | if (IB_QP_CAP & attr_mask) 359 | pr_info("IB_QP_CAP\n"); 360 | if (IB_QP_DEST_QPN & attr_mask) 361 | pr_info("IB_QP_DEST_QPN\n"); 362 | pr_info("-------- %s -(end)-\n", msg); 363 | } 364 | 365 | 366 | void siw_print_hdr(union iwarp_hdrs *hdr, int qp_id, char *msg) 367 | { 368 | switch (__rdmap_opcode(&hdr->ctrl)) { 369 | 370 | case RDMAP_RDMA_WRITE: 371 | pr_info("QP%04d %s(WRITE, MPA len %d): " 372 | "%08x %016llx\n", 373 | qp_id, msg, ntohs(hdr->ctrl.mpa_len), 374 | hdr->rwrite.sink_stag, hdr->rwrite.sink_to); 375 | break; 376 | 377 | case RDMAP_RDMA_READ_REQ: 378 | pr_info("QP%04d %s(RREQ, MPA len %d): %08x %08x " 379 | "%08x %08x %016llx %08x %08x %016llx\n", qp_id, msg, 380 | ntohs(hdr->ctrl.mpa_len), 381 | hdr->rreq.ddp_qn, hdr->rreq.ddp_msn, 382 | hdr->rreq.ddp_mo, hdr->rreq.sink_stag, 383 | hdr->rreq.sink_to, hdr->rreq.read_size, 384 | hdr->rreq.source_stag, hdr->rreq.source_to); 385 | 386 | break; 387 | case RDMAP_RDMA_READ_RESP: 388 | pr_info("QP%04d %s(RRESP, MPA len %d):" 389 | " %08x %016llx\n", 390 | qp_id, msg, ntohs(hdr->ctrl.mpa_len), 391 | hdr->rresp.sink_stag, hdr->rresp.sink_to); 392 | break; 393 | 394 | case RDMAP_SEND: 395 | pr_info("QP%04d %s(SEND, MPA len %d): %08x %08x " 396 | "%08x\n", qp_id, msg, ntohs(hdr->ctrl.mpa_len), 397 | hdr->send.ddp_qn, hdr->send.ddp_msn, hdr->send.ddp_mo); 398 | break; 399 | 400 | case RDMAP_SEND_INVAL: 401 | pr_info("QP%04d %s(S_INV, MPA len %d): %08x %08x " 402 | "%08x\n", qp_id, msg, ntohs(hdr->ctrl.mpa_len), 403 | hdr->send.ddp_qn, hdr->send.ddp_msn, 404 | hdr->send.ddp_mo); 405 | break; 406 | 407 | case RDMAP_SEND_SE: 408 | pr_info("QP%04d %s(S_SE, MPA len %d): %08x %08x " 409 | "%08x\n", qp_id, msg, ntohs(hdr->ctrl.mpa_len), 410 | hdr->send.ddp_qn, hdr->send.ddp_msn, 411 | hdr->send.ddp_mo); 412 | break; 413 | 414 | case RDMAP_SEND_SE_INVAL: 415 | pr_info("QP%04d %s(S_SE_INV, MPA len %d): %08x %08x " 416 | "%08x\n", qp_id, msg, ntohs(hdr->ctrl.mpa_len), 417 | hdr->send.ddp_qn, hdr->send.ddp_msn, 418 | hdr->send.ddp_mo); 419 | break; 420 | 421 | case RDMAP_TERMINATE: 422 | pr_info("QP%04d %s(TERM, MPA len %d):\n", qp_id, msg, 423 | ntohs(hdr->ctrl.mpa_len)); 424 | break; 425 | 426 | default: 427 | pr_info("QP%04d %s ?????\n", qp_id, msg); 428 | break; 429 | } 430 | } 431 | 432 | void siw_print_rctx(struct siw_iwarp_rx *rctx) 433 | { 434 | pr_info("---RX Context-->\n"); 435 | siw_print_hdr(&rctx->hdr, RX_QPID(rctx), "\nCurrent Pkt:\t"); 436 | pr_info("Skbuf State:\tp:0x%p, new:%d, off:%d, copied:%d\n", 437 | rctx->skb, rctx->skb_new, rctx->skb_offset, rctx->skb_copied); 438 | pr_info("FPDU State:\trx_state:%d,\n\t\trcvd:%d, rem:%d, " 439 | "pad:%d\n", rctx->state, rctx->fpdu_part_rcvd, 440 | rctx->fpdu_part_rem, rctx->pad); 441 | pr_info("Rx Mem:\t\tp:0x%p, stag:0x%08x, mem_id:%d\n", 442 | &rctx->wqe_active, rctx->ddp_stag, rctx->ddp_stag >> 8); 443 | pr_info("DDP State:\tprev_op:%d, first_seg:%d, " 444 | "more_segs:%d\n", rctx->prev_rdmap_opcode, rctx->first_ddp_seg, 445 | rctx->more_ddp_segs); 446 | pr_info("MPA State:\tlen:%d, crc_enabled:%d, crc:0x%x\n", 447 | ntohs(rctx->hdr.ctrl.mpa_len), rctx->crc_enabled, 448 | rctx->trailer.crc); 449 | pr_info("<---------------\n"); 450 | } 451 | 452 | #if DPRINT_MASK > 0 453 | atomic_t siw_num_wqe; 454 | 455 | char ib_qp_state_to_string[IB_QPS_ERR+1][sizeof "RESET"] = { 456 | [IB_QPS_RESET] = "RESET", 457 | [IB_QPS_INIT] = "INIT", 458 | [IB_QPS_RTR] = "RTR", 459 | [IB_QPS_RTS] = "RTS", 460 | [IB_QPS_SQD] = "SQD", 461 | [IB_QPS_SQE] = "SQE", 462 | [IB_QPS_ERR] = "ERR" 463 | }; 464 | #endif 465 | -------------------------------------------------------------------------------- /kernel/siw_debug.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Software iWARP device driver for Linux 3 | * 4 | * Authors: Fredy Neeser 5 | * Bernard Metzler 6 | * 7 | * Copyright (c) 2008-2016, IBM Corporation 8 | * 9 | * This software is available to you under a choice of one of two 10 | * licenses. You may choose to be licensed under the terms of the GNU 11 | * General Public License (GPL) Version 2, available from the file 12 | * COPYING in the main directory of this source tree, or the 13 | * BSD license below: 14 | * 15 | * Redistribution and use in source and binary forms, with or 16 | * without modification, are permitted provided that the following 17 | * conditions are met: 18 | * 19 | * - Redistributions of source code must retain the above copyright notice, 20 | * this list of conditions and the following disclaimer. 21 | * 22 | * - Redistributions in binary form must reproduce the above copyright 23 | * notice, this list of conditions and the following disclaimer in the 24 | * documentation and/or other materials provided with the distribution. 25 | * 26 | * - Neither the name of IBM nor the names of its contributors may be 27 | * used to endorse or promote products derived from this software without 28 | * specific prior written permission. 29 | * 30 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 31 | * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 32 | * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 33 | * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 34 | * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 35 | * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 36 | * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 37 | * SOFTWARE. 38 | */ 39 | 40 | #ifndef _SIW_DEBUG_H 41 | #define _SIW_DEBUG_H 42 | 43 | #include 44 | #include /* in_interrupt() */ 45 | 46 | /* 47 | * dprint: Selective debug printing 48 | * 49 | * Use an OR combination of DBG_* as dbgcat in dprint*(dbgcat,...) 50 | * to assign debug messages to categories: 51 | * 52 | * dbgcat Debug message belongs to category 53 | * ---------------------------------------------------------------------------- 54 | * DBG_ON Always on, for really important events or error conditions 55 | * DBG_TMP Temporarily on for fine-grained debugging 56 | * DBQ_OBJ Object management (object construction/destruction/refcounting) 57 | * DBG_MM Memory management 58 | * DBG_EH Event handling (completion events and asynchronous events) 59 | * DBG_CM Connection management, QP states 60 | * DBG_WR Work requests 61 | * DBG_TX iWARP TX path 62 | * DBG_RX iWARP RX path 63 | * DBG_SK Socket operations 64 | * DBG_KT Kernel threads 65 | * DBG_IRQ Interrupt context (SoftIRQ or HardIRQ) 66 | * DBG_DM Device management 67 | * DBG_HDR Packet HDRs 68 | * DBG_ALL All categories above 69 | */ 70 | #define DBG_ON 0x00000001 71 | #define DBG_TMP 0x00000002 72 | #define DBG_OBJ 0x00000004 73 | #define DBG_MM 0x00000008 74 | #define DBG_EH 0x00000010 75 | #define DBG_CM 0x00000020 76 | #define DBG_WR 0x00000040 77 | #define DBG_TX 0x00000080 78 | #define DBG_RX 0x00000100 79 | #define DBG_SK 0x00000200 80 | #define DBG_KT 0x00000400 81 | #define DBG_IRQ 0x00000800 82 | #define DBG_DM 0x00001000 83 | #define DBG_HDR 0x00002000 84 | #define DBG_CQ 0x00004000 85 | #define DBG_ALL (DBG_IRQ|DBG_KT|DBG_SK|DBG_RX|DBG_TX|DBG_WR|\ 86 | DBG_CM|DBG_EH|DBG_MM|DBG_OBJ|DBG_TMP|DBG_DM|DBG_ON|DBG_HDR|DBG_CQ) 87 | #define DBG_ALL_NOHDR (DBG_IRQ|DBG_KT|DBG_SK|DBG_RX|DBG_TX|DBG_WR|\ 88 | DBG_CM|DBG_EH|DBG_MM|DBG_OBJ|DBG_TMP|DBG_DM|DBG_ON) 89 | #define DBG_CTRL (DBG_ON|DBG_CM|DBG_DM) 90 | 91 | /* 92 | * Set DPRINT_MASK to tailor your debugging needs: 93 | * 94 | * DPRINT_MASK value Enables debug messages for 95 | * --------------------------------------------------------------------- 96 | * DBG_ON Important events / error conditions only 97 | * (minimum number of debug messages) 98 | * OR-ed combination of DBG_* Selective debugging 99 | * DBG_KT|DBG_ON Kernel threads 100 | * DBG_ALL All categories 101 | */ 102 | #define DPRINT_MASK (DBG_ON) 103 | 104 | struct siw_dev; 105 | struct siw_iwarp_rx; 106 | union iwarp_hdrs; 107 | 108 | extern void siw_debug_init(void); 109 | extern void siw_debugfs_add_device(struct siw_dev *); 110 | extern void siw_debugfs_del_device(struct siw_dev *); 111 | extern void siw_debugfs_delete(void); 112 | 113 | extern void siw_print_hdr(union iwarp_hdrs *, int, char *); 114 | extern void siw_print_rctx(struct siw_iwarp_rx *); 115 | extern void siw_print_qp_attr_mask(enum ib_qp_attr_mask, char *); 116 | 117 | #undef DEBUG 118 | #define DEBUG_ORQ 119 | #undef DEBUG_ORQ 120 | 121 | #if DPRINT_MASK > 0 122 | 123 | /** 124 | * dprint - Selective debug print for process, SoftIRQ or HardIRQ context 125 | * 126 | * Debug print with selectable debug categories, 127 | * starting with header 128 | * - "( pid /cpu) __func__" for process context 129 | * - "( irq /cpu) __func__" for IRQ context 130 | * 131 | * @dbgcat : Set of debug categories (OR-ed combination of DBG_* above), 132 | * to which this debug message is assigned. 133 | * @fmt : printf compliant format string 134 | * @args : printf compliant argument list 135 | */ 136 | #define dprint(dbgcat, fmt, args...) \ 137 | do { \ 138 | if ((dbgcat) & DPRINT_MASK) { \ 139 | if (!in_interrupt()) \ 140 | pr_info("(%5d/%1d) %s" fmt, \ 141 | current->pid, \ 142 | current_thread_info()->cpu, \ 143 | __func__, ## args); \ 144 | else \ 145 | pr_info("( irq /%1d) %s" fmt, \ 146 | current_thread_info()->cpu, \ 147 | __func__, ## args); \ 148 | } \ 149 | } while (0) 150 | 151 | 152 | #define siw_dprint_rctx(r) siw_print_rctx(r) 153 | 154 | extern char ib_qp_state_to_string[IB_QPS_ERR+1][sizeof "RESET"]; 155 | extern atomic_t siw_num_wqe; 156 | 157 | #define SIW_INC_STAT_WQE atomic_inc(&siw_num_wqe) 158 | #define SIW_DEC_STAT_WQE atomic_dec(&siw_num_wqe) 159 | 160 | #else 161 | 162 | #define dprint(dbgcat, fmt, args...) do { } while (0) 163 | #define siw_dprint_rctx(r) do { } while (0) 164 | #define SIW_INC_STAT_WQE do { } while (0) 165 | #define SIW_DEC_STAT_WQE do { } while (0) 166 | #endif 167 | 168 | 169 | #if DPRINT_MASK & DBG_HDR 170 | #define siw_dprint_hdr(h, i, m) siw_print_hdr(h, i, m) 171 | #else 172 | #define siw_dprint_hdr(h, i, m) do { } while (0) 173 | #endif 174 | 175 | #if DPRINT_MASK & DBG_CM 176 | #define siw_dprint_qp_attr_mask(mask)\ 177 | siw_print_qp_attr_mask(mask, (char *)__func__) 178 | #else 179 | #define siw_dprint_qp_attr_mask(mask) do { } while (0) 180 | #endif 181 | 182 | #endif 183 | -------------------------------------------------------------------------------- /kernel/siw_main.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Software iWARP device driver for Linux 3 | * 4 | * Authors: Bernard Metzler 5 | * 6 | * Copyright (c) 2008-2016, IBM Corporation 7 | * 8 | * This software is available to you under a choice of one of two 9 | * licenses. You may choose to be licensed under the terms of the GNU 10 | * General Public License (GPL) Version 2, available from the file 11 | * COPYING in the main directory of this source tree, or the 12 | * BSD license below: 13 | * 14 | * Redistribution and use in source and binary forms, with or 15 | * without modification, are permitted provided that the following 16 | * conditions are met: 17 | * 18 | * - Redistributions of source code must retain the above copyright notice, 19 | * this list of conditions and the following disclaimer. 20 | * 21 | * - Redistributions in binary form must reproduce the above copyright 22 | * notice, this list of conditions and the following disclaimer in the 23 | * documentation and/or other materials provided with the distribution. 24 | * 25 | * - Neither the name of IBM nor the names of its contributors may be 26 | * used to endorse or promote products derived from this software without 27 | * specific prior written permission. 28 | * 29 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 30 | * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 31 | * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 32 | * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 33 | * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 34 | * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 35 | * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 36 | * SOFTWARE. 37 | */ 38 | 39 | #include 40 | #include 41 | #include 42 | #include 43 | #include 44 | #include 45 | #include 46 | #include 47 | #include 48 | 49 | #include 50 | #include 51 | #include 52 | 53 | #include "siw.h" 54 | #include "siw_obj.h" 55 | #include "siw_cm.h" 56 | #include "siw_verbs.h" 57 | #ifdef USE_SQ_KTHREAD 58 | #include 59 | #endif 60 | 61 | 62 | MODULE_AUTHOR("Bernard Metzler"); 63 | MODULE_DESCRIPTION("Software iWARP Driver"); 64 | MODULE_LICENSE("Dual BSD/GPL"); 65 | MODULE_VERSION("0.2"); 66 | 67 | #define SIW_MAX_IF 12 68 | static char *iface_list[SIW_MAX_IF]; 69 | module_param_array(iface_list, charp, NULL, 0444); 70 | MODULE_PARM_DESC(iface_list, "Interface list siw attaches to if present"); 71 | 72 | static bool loopback_enabled = 1; 73 | module_param(loopback_enabled, bool, 0644); 74 | MODULE_PARM_DESC(loopback_enabled, "enable_loopback"); 75 | 76 | LIST_HEAD(siw_devlist); 77 | DEFINE_SPINLOCK(siw_dev_lock); 78 | 79 | #ifdef USE_SQ_KTHREAD 80 | static char *tx_cpu_list[NR_CPUS]; 81 | module_param_array(tx_cpu_list, charp, NULL, 0444); 82 | MODULE_PARM_DESC(tx_cpu_list, "List of CPUs siw TX thread shall be bound to"); 83 | 84 | int default_tx_cpu = -1; 85 | static int tx_on_all_cpus = 1; 86 | extern int siw_run_sq(void *); 87 | struct task_struct *qp_tx_thread[NR_CPUS]; 88 | #endif 89 | 90 | #ifdef SIW_DB_SYSCALL 91 | extern long siw_doorbell(u32, u32, u32); 92 | long (*db_orig_call) (u32, u32, u32); 93 | #endif 94 | 95 | static ssize_t show_sw_version(struct device *dev, 96 | struct device_attribute *attr, char *buf) 97 | { 98 | struct siw_dev *sdev = container_of(dev, struct siw_dev, ofa_dev.dev); 99 | 100 | return sprintf(buf, "%x\n", sdev->attrs.version); 101 | } 102 | 103 | static ssize_t show_if_type(struct device *dev, 104 | struct device_attribute *attr, char *buf) 105 | { 106 | struct siw_dev *sdev = container_of(dev, struct siw_dev, ofa_dev.dev); 107 | 108 | return sprintf(buf, "%d\n", sdev->attrs.iftype); 109 | } 110 | 111 | static DEVICE_ATTR(sw_version, S_IRUGO, show_sw_version, NULL); 112 | static DEVICE_ATTR(if_type, S_IRUGO, show_if_type, NULL); 113 | 114 | static struct device_attribute *siw_dev_attributes[] = { 115 | &dev_attr_sw_version, 116 | &dev_attr_if_type 117 | }; 118 | 119 | static void siw_device_release(struct device *dev) 120 | { 121 | pr_info("%s device released\n", dev_name(dev)); 122 | } 123 | 124 | static struct device siw_generic_dma_device = { 125 | .archdata.dma_ops = &siw_dma_generic_ops, 126 | .init_name = "software-rdma-v2", 127 | .release = siw_device_release 128 | }; 129 | 130 | static struct bus_type siw_bus = { 131 | .name = "siw", 132 | }; 133 | 134 | static int siw_modify_port(struct ib_device *ofa_dev, u8 port, int mask, 135 | struct ib_port_modify *props) 136 | { 137 | return -EOPNOTSUPP; 138 | } 139 | 140 | 141 | static void siw_device_register(struct siw_dev *sdev) 142 | { 143 | struct ib_device *ofa_dev = &sdev->ofa_dev; 144 | int rv, i; 145 | static int dev_id = 1; 146 | 147 | #if LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 34) 148 | rv = ib_register_device(ofa_dev, NULL); 149 | #else 150 | rv = ib_register_device(ofa_dev); 151 | #endif 152 | if (rv) { 153 | dprint(DBG_DM|DBG_ON, "(dev=%s): " 154 | "ib_register_device failed: rv=%d\n", ofa_dev->name, rv); 155 | return; 156 | } 157 | 158 | for (i = 0; i < ARRAY_SIZE(siw_dev_attributes); ++i) { 159 | rv = device_create_file(&ofa_dev->dev, siw_dev_attributes[i]); 160 | if (rv) { 161 | dprint(DBG_DM|DBG_ON, "(dev=%s): " 162 | "device_create_file failed: i=%d, rv=%d\n", 163 | ofa_dev->name, i, rv); 164 | ib_unregister_device(ofa_dev); 165 | return; 166 | } 167 | } 168 | siw_debugfs_add_device(sdev); 169 | 170 | sdev->attrs.vendor_part_id = dev_id++; 171 | 172 | dprint(DBG_DM, ": Registered '%s' for interface '%s', " 173 | "HWaddr=%02x.%02x.%02x.%02x.%02x.%02x\n", 174 | ofa_dev->name, sdev->netdev->name, 175 | *(u8 *)sdev->netdev->dev_addr, 176 | *((u8 *)sdev->netdev->dev_addr + 1), 177 | *((u8 *)sdev->netdev->dev_addr + 2), 178 | *((u8 *)sdev->netdev->dev_addr + 3), 179 | *((u8 *)sdev->netdev->dev_addr + 4), 180 | *((u8 *)sdev->netdev->dev_addr + 5)); 181 | 182 | sdev->is_registered = 1; 183 | } 184 | 185 | static void siw_device_deregister(struct siw_dev *sdev) 186 | { 187 | int i; 188 | 189 | siw_debugfs_del_device(sdev); 190 | 191 | if (sdev->is_registered) { 192 | 193 | dprint(DBG_DM, ": deregister %s at %s\n", sdev->ofa_dev.name, 194 | sdev->netdev->name); 195 | 196 | for (i = 0; i < ARRAY_SIZE(siw_dev_attributes); ++i) 197 | device_remove_file(&sdev->ofa_dev.dev, 198 | siw_dev_attributes[i]); 199 | 200 | ib_unregister_device(&sdev->ofa_dev); 201 | } 202 | WARN_ON(atomic_read(&sdev->num_ctx)); 203 | WARN_ON(atomic_read(&sdev->num_srq)); 204 | WARN_ON(atomic_read(&sdev->num_qp)); 205 | WARN_ON(atomic_read(&sdev->num_cq)); 206 | WARN_ON(atomic_read(&sdev->num_mem)); 207 | WARN_ON(atomic_read(&sdev->num_pd)); 208 | WARN_ON(atomic_read(&sdev->num_cep)); 209 | 210 | i = 0; 211 | 212 | while (!list_empty(&sdev->cep_list)) { 213 | struct siw_cep *cep = list_entry(sdev->cep_list.next, 214 | struct siw_cep, devq); 215 | list_del(&cep->devq); 216 | dprint(DBG_ON, ": Free CEP (0x%p), state: %d\n", 217 | cep, cep->state); 218 | kfree(cep); 219 | i++; 220 | } 221 | if (i) 222 | pr_warning("siw_device_deregister: free'd %d CEPs\n", i); 223 | 224 | sdev->is_registered = 0; 225 | } 226 | 227 | static void siw_device_destroy(struct siw_dev *sdev) 228 | { 229 | dprint(DBG_DM, ": destroy siw device at %s\n", sdev->netdev->name); 230 | 231 | siw_idr_release(sdev); 232 | kfree(sdev->ofa_dev.iwcm); 233 | dev_put(sdev->netdev); 234 | ib_dealloc_device(&sdev->ofa_dev); 235 | } 236 | 237 | 238 | static int siw_match_iflist(struct net_device *dev) 239 | { 240 | int i = 0, found = *iface_list ? 0 : 1; 241 | 242 | while (iface_list[i]) { 243 | if (!strcmp(iface_list[i++], dev->name)) { 244 | found = 1; 245 | break; 246 | } 247 | } 248 | return found; 249 | } 250 | 251 | static struct siw_dev *siw_dev_from_netdev(struct net_device *dev) 252 | { 253 | if (!list_empty(&siw_devlist)) { 254 | struct list_head *pos; 255 | list_for_each(pos, &siw_devlist) { 256 | struct siw_dev *sdev = 257 | list_entry(pos, struct siw_dev, list); 258 | if (sdev->netdev == dev) 259 | return sdev; 260 | } 261 | } 262 | return NULL; 263 | } 264 | 265 | #ifdef USE_SQ_KTHREAD 266 | static int siw_tx_qualified(int cpu) 267 | { 268 | int i = 0; 269 | 270 | if (tx_on_all_cpus) 271 | return 1; 272 | 273 | for (i = 0; i < NR_CPUS; i++) { 274 | #if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 39) 275 | if (tx_cpu_list[i] && 276 | simple_strtoull(tx_cpu_list[i], NULL, 10) == cpu) 277 | #else 278 | int c; 279 | if (tx_cpu_list[i] && kstrtoint(tx_cpu_list[i], 0, &c) == 0 && 280 | cpu == c) 281 | #endif 282 | return 1; 283 | } 284 | return 0; 285 | } 286 | 287 | static int siw_create_tx_threads(int max_threads, int check_qualified) 288 | { 289 | int cpu, rv, assigned = 0; 290 | 291 | if (max_threads < 0 || max_threads > NR_CPUS) 292 | return 0; 293 | 294 | for_each_online_cpu(cpu) { 295 | if (check_qualified == 0 || siw_tx_qualified(cpu)) { 296 | qp_tx_thread[cpu] = 297 | kthread_create(siw_run_sq, 298 | (unsigned long *)(long)cpu, 299 | "qp_tx_thread/%d", cpu); 300 | kthread_bind(qp_tx_thread[cpu], cpu); 301 | if (IS_ERR(qp_tx_thread)) { 302 | rv = PTR_ERR(qp_tx_thread); 303 | qp_tx_thread[cpu] = NULL; 304 | pr_info("Binding TX thread to CPU %d failed", 305 | cpu); 306 | break; 307 | } 308 | wake_up_process(qp_tx_thread[cpu]); 309 | assigned++; 310 | if (default_tx_cpu < 0) 311 | default_tx_cpu = cpu; 312 | if (assigned >= max_threads) 313 | break; 314 | } 315 | } 316 | return assigned; 317 | } 318 | #endif 319 | 320 | static int siw_dev_qualified(struct net_device *netdev) 321 | { 322 | if (!siw_match_iflist(netdev)) { 323 | dprint(DBG_DM|DBG_ON, ": %s (not selected)\n", 324 | netdev->name); 325 | return 0; 326 | } 327 | /* 328 | * Additional hardware support can be added here 329 | * (e.g. ARPHRD_FDDI, ARPHRD_ATM, ...) - see 330 | * for type identifiers. 331 | */ 332 | if (netdev->type == ARPHRD_ETHER || 333 | netdev->type == ARPHRD_IEEE802 || 334 | netdev->type == ARPHRD_INFINIBAND || 335 | (netdev->type == ARPHRD_LOOPBACK && loopback_enabled)) 336 | return 1; 337 | 338 | return 0; 339 | } 340 | 341 | static struct siw_dev *siw_device_create(struct net_device *netdev) 342 | { 343 | struct siw_dev *sdev = (struct siw_dev *)ib_alloc_device(sizeof *sdev); 344 | struct ib_device *ofa_dev; 345 | 346 | if (!sdev) 347 | goto out; 348 | 349 | ofa_dev = &sdev->ofa_dev; 350 | 351 | ofa_dev->iwcm = kmalloc(sizeof(struct iw_cm_verbs), GFP_KERNEL); 352 | if (!ofa_dev->iwcm) { 353 | ib_dealloc_device(ofa_dev); 354 | sdev = NULL; 355 | goto out; 356 | } 357 | 358 | sdev->netdev = netdev; 359 | list_add_tail(&sdev->list, &siw_devlist); 360 | 361 | strcpy(ofa_dev->name, SIW_IBDEV_PREFIX); 362 | strlcpy(ofa_dev->name + strlen(SIW_IBDEV_PREFIX), netdev->name, 363 | IB_DEVICE_NAME_MAX - strlen(SIW_IBDEV_PREFIX)); 364 | 365 | memset(&ofa_dev->node_guid, 0, sizeof(ofa_dev->node_guid)); 366 | if (netdev->type != ARPHRD_LOOPBACK) 367 | memcpy(&ofa_dev->node_guid, netdev->dev_addr, 6); 368 | else { 369 | /* 370 | * The loopback device does not have a HW address, 371 | * but connection mangagement lib expects gid != 0 372 | */ 373 | size_t gidlen = min(strlen(ofa_dev->name), (size_t)6); 374 | memcpy(&ofa_dev->node_guid, ofa_dev->name, gidlen); 375 | } 376 | ofa_dev->owner = THIS_MODULE; 377 | 378 | ofa_dev->uverbs_cmd_mask = 379 | (1ull << IB_USER_VERBS_CMD_GET_CONTEXT) | 380 | (1ull << IB_USER_VERBS_CMD_QUERY_DEVICE) | 381 | (1ull << IB_USER_VERBS_CMD_QUERY_PORT) | 382 | (1ull << IB_USER_VERBS_CMD_ALLOC_PD) | 383 | (1ull << IB_USER_VERBS_CMD_DEALLOC_PD) | 384 | (1ull << IB_USER_VERBS_CMD_REG_MR) | 385 | (1ull << IB_USER_VERBS_CMD_DEREG_MR) | 386 | (1ull << IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL) | 387 | (1ull << IB_USER_VERBS_CMD_CREATE_CQ) | 388 | (1ull << IB_USER_VERBS_CMD_POLL_CQ) | 389 | (1ull << IB_USER_VERBS_CMD_REQ_NOTIFY_CQ) | 390 | (1ull << IB_USER_VERBS_CMD_DESTROY_CQ) | 391 | (1ull << IB_USER_VERBS_CMD_CREATE_QP) | 392 | (1ull << IB_USER_VERBS_CMD_QUERY_QP) | 393 | (1ull << IB_USER_VERBS_CMD_MODIFY_QP) | 394 | (1ull << IB_USER_VERBS_CMD_DESTROY_QP) | 395 | (1ull << IB_USER_VERBS_CMD_POST_SEND) | 396 | (1ull << IB_USER_VERBS_CMD_POST_RECV) | 397 | (1ull << IB_USER_VERBS_CMD_CREATE_SRQ) | 398 | (1ull << IB_USER_VERBS_CMD_MODIFY_SRQ) | 399 | (1ull << IB_USER_VERBS_CMD_QUERY_SRQ) | 400 | (1ull << IB_USER_VERBS_CMD_DESTROY_SRQ) | 401 | (1ull << IB_USER_VERBS_CMD_POST_SRQ_RECV); 402 | 403 | ofa_dev->node_type = RDMA_NODE_RNIC; 404 | memcpy(ofa_dev->node_desc, SIW_NODE_DESC_COMMON, sizeof(SIW_NODE_DESC_COMMON)); 405 | 406 | /* 407 | * Current model (one-to-one device association): 408 | * One Softiwarp device per net_device or, equivalently, 409 | * per physical port. 410 | */ 411 | ofa_dev->phys_port_cnt = 1; 412 | 413 | ofa_dev->num_comp_vectors = 1; 414 | ofa_dev->dma_device = &siw_generic_dma_device; 415 | ofa_dev->query_device = siw_query_device; 416 | ofa_dev->query_port = siw_query_port; 417 | #if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 2, 0) || defined(IS_RH_7_2) 418 | ofa_dev->get_port_immutable = siw_get_port_immutable; 419 | #endif 420 | ofa_dev->query_qp = siw_query_qp; 421 | ofa_dev->modify_port = siw_modify_port; 422 | ofa_dev->query_pkey = siw_query_pkey; 423 | ofa_dev->query_gid = siw_query_gid; 424 | ofa_dev->alloc_ucontext = siw_alloc_ucontext; 425 | ofa_dev->dealloc_ucontext = siw_dealloc_ucontext; 426 | ofa_dev->mmap = siw_mmap; 427 | ofa_dev->alloc_pd = siw_alloc_pd; 428 | ofa_dev->dealloc_pd = siw_dealloc_pd; 429 | ofa_dev->create_ah = siw_create_ah; 430 | ofa_dev->destroy_ah = siw_destroy_ah; 431 | ofa_dev->create_qp = siw_create_qp; 432 | ofa_dev->modify_qp = siw_ofed_modify_qp; 433 | ofa_dev->destroy_qp = siw_destroy_qp; 434 | ofa_dev->create_cq = siw_create_cq; 435 | ofa_dev->destroy_cq = siw_destroy_cq; 436 | ofa_dev->resize_cq = NULL; 437 | ofa_dev->poll_cq = siw_poll_cq; 438 | ofa_dev->get_dma_mr = siw_get_dma_mr; 439 | ofa_dev->reg_user_mr = siw_reg_user_mr; 440 | ofa_dev->dereg_mr = siw_dereg_mr; 441 | ofa_dev->alloc_mw = NULL; 442 | ofa_dev->dealloc_mw = NULL; 443 | 444 | ofa_dev->create_srq = siw_create_srq; 445 | ofa_dev->modify_srq = siw_modify_srq; 446 | ofa_dev->query_srq = siw_query_srq; 447 | ofa_dev->destroy_srq = siw_destroy_srq; 448 | ofa_dev->post_srq_recv = siw_post_srq_recv; 449 | 450 | ofa_dev->attach_mcast = NULL; 451 | ofa_dev->detach_mcast = NULL; 452 | ofa_dev->process_mad = siw_no_mad; 453 | 454 | ofa_dev->req_notify_cq = siw_req_notify_cq; 455 | ofa_dev->post_send = siw_post_send; 456 | ofa_dev->post_recv = siw_post_receive; 457 | 458 | ofa_dev->dma_ops = &siw_dma_mapping_ops; 459 | 460 | ofa_dev->iwcm->connect = siw_connect; 461 | ofa_dev->iwcm->accept = siw_accept; 462 | ofa_dev->iwcm->reject = siw_reject; 463 | ofa_dev->iwcm->create_listen = siw_create_listen; 464 | ofa_dev->iwcm->destroy_listen = siw_destroy_listen; 465 | ofa_dev->iwcm->add_ref = siw_qp_get_ref; 466 | ofa_dev->iwcm->rem_ref = siw_qp_put_ref; 467 | ofa_dev->iwcm->get_qp = siw_get_ofaqp; 468 | /* 469 | * set and register sw version + user if type 470 | */ 471 | sdev->attrs.version = VERSION_ID_SOFTIWARP; 472 | sdev->attrs.iftype = SIW_IF_MAPPED; 473 | 474 | sdev->attrs.vendor_id = SIW_VENDOR_ID; 475 | sdev->attrs.vendor_part_id = SIW_VENDORT_PART_ID; 476 | sdev->attrs.sw_version = VERSION_ID_SOFTIWARP; 477 | sdev->attrs.max_qp = SIW_MAX_QP; 478 | sdev->attrs.max_qp_wr = SIW_MAX_QP_WR; 479 | sdev->attrs.max_ord = SIW_MAX_ORD; 480 | sdev->attrs.max_ird = SIW_MAX_IRD; 481 | sdev->attrs.cap_flags = 0; 482 | sdev->attrs.max_sge = SIW_MAX_SGE; 483 | sdev->attrs.max_sge_rd = SIW_MAX_SGE_RD; 484 | sdev->attrs.max_cq = SIW_MAX_CQ; 485 | sdev->attrs.max_cqe = SIW_MAX_CQE; 486 | sdev->attrs.max_mr = SIW_MAX_MR; 487 | sdev->attrs.max_mr_size = rlimit(RLIMIT_MEMLOCK); 488 | sdev->attrs.max_pd = SIW_MAX_PD; 489 | sdev->attrs.max_mw = SIW_MAX_MW; 490 | sdev->attrs.max_fmr = SIW_MAX_FMR; 491 | sdev->attrs.max_srq = SIW_MAX_SRQ; 492 | sdev->attrs.max_srq_wr = SIW_MAX_SRQ_WR; 493 | sdev->attrs.max_srq_sge = SIW_MAX_SGE; 494 | 495 | siw_idr_init(sdev); 496 | INIT_LIST_HEAD(&sdev->cep_list); 497 | INIT_LIST_HEAD(&sdev->qp_list); 498 | 499 | atomic_set(&sdev->num_ctx, 0); 500 | atomic_set(&sdev->num_srq, 0); 501 | atomic_set(&sdev->num_qp, 0); 502 | atomic_set(&sdev->num_cq, 0); 503 | atomic_set(&sdev->num_mem, 0); 504 | atomic_set(&sdev->num_pd, 0); 505 | atomic_set(&sdev->num_cep, 0); 506 | 507 | sdev->is_registered = 0; 508 | out: 509 | if (sdev) 510 | dev_hold(netdev); 511 | 512 | return sdev; 513 | } 514 | 515 | 516 | 517 | static int siw_netdev_event(struct notifier_block *nb, unsigned long event, 518 | void *arg) 519 | { 520 | #if LINUX_VERSION_CODE < KERNEL_VERSION(3, 11, 0) 521 | struct net_device *netdev = arg; 522 | #else 523 | struct net_device *netdev = netdev_notifier_info_to_dev(arg); 524 | #endif 525 | struct in_device *in_dev; 526 | struct siw_dev *sdev; 527 | 528 | dprint(DBG_DM, " (dev=%s): Event %lu\n", netdev->name, event); 529 | 530 | if (dev_net(netdev) != &init_net) 531 | goto done; 532 | 533 | if (!spin_trylock(&siw_dev_lock)) 534 | /* The module is being removed */ 535 | goto done; 536 | 537 | sdev = siw_dev_from_netdev(netdev); 538 | 539 | switch (event) { 540 | 541 | case NETDEV_UP: 542 | if (!sdev) 543 | break; 544 | 545 | if (sdev->is_registered) { 546 | sdev->state = IB_PORT_ACTIVE; 547 | siw_port_event(sdev, 1, IB_EVENT_PORT_ACTIVE); 548 | break; 549 | } 550 | 551 | in_dev = in_dev_get(netdev); 552 | if (!in_dev) { 553 | dprint(DBG_DM, ": %s: no in_dev\n", netdev->name); 554 | sdev->state = IB_PORT_INIT; 555 | break; 556 | } 557 | 558 | if (in_dev->ifa_list) { 559 | sdev->state = IB_PORT_ACTIVE; 560 | siw_device_register(sdev); 561 | } else { 562 | dprint(DBG_DM, ": %s: no ifa\n", netdev->name); 563 | sdev->state = IB_PORT_INIT; 564 | } 565 | in_dev_put(in_dev); 566 | 567 | break; 568 | 569 | case NETDEV_DOWN: 570 | if (sdev && sdev->is_registered) { 571 | sdev->state = IB_PORT_DOWN; 572 | siw_port_event(sdev, 1, IB_EVENT_PORT_ERR); 573 | break; 574 | } 575 | break; 576 | 577 | case NETDEV_REGISTER: 578 | if (!sdev) { 579 | if (!siw_dev_qualified(netdev)) 580 | break; 581 | 582 | sdev = siw_device_create(netdev); 583 | if (sdev) { 584 | sdev->state = IB_PORT_INIT; 585 | dprint(DBG_DM, ": new siw device for %s\n", 586 | netdev->name); 587 | } 588 | } 589 | break; 590 | 591 | case NETDEV_UNREGISTER: 592 | if (sdev) { 593 | if (sdev->is_registered) 594 | siw_device_deregister(sdev); 595 | list_del(&sdev->list); 596 | siw_device_destroy(sdev); 597 | } 598 | break; 599 | 600 | case NETDEV_CHANGEADDR: 601 | if (sdev->is_registered) 602 | siw_port_event(sdev, 1, IB_EVENT_LID_CHANGE); 603 | 604 | break; 605 | /* 606 | * Todo: Below netdev events are currently not handled. 607 | */ 608 | case NETDEV_CHANGEMTU: 609 | case NETDEV_GOING_DOWN: 610 | case NETDEV_CHANGE: 611 | 612 | break; 613 | 614 | default: 615 | break; 616 | } 617 | spin_unlock(&siw_dev_lock); 618 | done: 619 | return NOTIFY_OK; 620 | } 621 | 622 | static struct notifier_block siw_netdev_nb = { 623 | .notifier_call = siw_netdev_event, 624 | }; 625 | #ifdef SIW_DB_SYSCALL 626 | extern long (*doorbell_call)(u32, u32, u32); 627 | #endif 628 | 629 | /* 630 | * siw_init_module - Initialize Softiwarp module and register with netdev 631 | * subsystem to create Softiwarp devices per net_device 632 | */ 633 | static __init int siw_init_module(void) 634 | { 635 | int rv; 636 | #ifdef USE_SQ_KTHREAD 637 | int nr_cpu; 638 | #endif 639 | 640 | if (SENDPAGE_THRESH < SIW_MAX_INLINE) { 641 | pr_info("SENDPAGE_THRESH: %d < SIW_MAX_INLINE: %d" 642 | " -- check SIW_MAX_SGE (%d)\n", 643 | (int)SENDPAGE_THRESH, (int)SIW_MAX_INLINE, 644 | (int)SIW_MAX_SGE); 645 | rv = EINVAL; 646 | goto out; 647 | } 648 | /* 649 | * The xprtrdma module needs at least some rudimentary bus to set 650 | * some devices path MTU. 651 | */ 652 | rv = bus_register(&siw_bus); 653 | if (rv) 654 | goto out_nobus; 655 | 656 | siw_generic_dma_device.bus = &siw_bus; 657 | 658 | rv = device_register(&siw_generic_dma_device); 659 | if (rv) 660 | goto out; 661 | 662 | rv = siw_cm_init(); 663 | if (rv) 664 | goto out_unregister; 665 | 666 | rv = siw_sq_worker_init(); 667 | if (rv) 668 | goto out_unregister; 669 | 670 | siw_debug_init(); 671 | 672 | rv = register_netdevice_notifier(&siw_netdev_nb); 673 | if (rv) { 674 | siw_debugfs_delete(); 675 | goto out_unregister; 676 | } 677 | #ifdef SIW_DB_SYSCALL 678 | db_orig_call = doorbell_call; 679 | doorbell_call = siw_doorbell; 680 | 681 | pr_info("SoftiWARP: doorbell call assigned, syscall # %d\n", 682 | __NR_rdma_db); 683 | #else 684 | pr_info("SoftiWARP: no doorbell call\n"); 685 | #endif 686 | 687 | #ifdef USE_SQ_KTHREAD 688 | for (nr_cpu = 0; nr_cpu < NR_CPUS; nr_cpu++) { 689 | qp_tx_thread[nr_cpu] = NULL; 690 | if (tx_cpu_list[nr_cpu]) 691 | tx_on_all_cpus = 0; 692 | } 693 | 694 | if (siw_create_tx_threads(NR_CPUS, 1) == 0) { 695 | pr_info("Try starting default TX thread\n"); 696 | if (siw_create_tx_threads(1, 0) == 0) { 697 | pr_info("Could not start any TX thread\n"); 698 | goto out_unregister; 699 | } 700 | } 701 | #endif 702 | pr_info("SoftiWARP attached\n"); 703 | return 0; 704 | 705 | out_unregister: 706 | #ifdef USE_SQ_KTHREAD 707 | for (nr_cpu = 0; nr_cpu < NR_CPUS; nr_cpu++) { 708 | if (qp_tx_thread[nr_cpu]) { 709 | kthread_stop(qp_tx_thread[nr_cpu]); 710 | qp_tx_thread[nr_cpu] = NULL; 711 | } 712 | } 713 | #endif 714 | device_unregister(&siw_generic_dma_device); 715 | 716 | out: 717 | bus_unregister(&siw_bus); 718 | out_nobus: 719 | pr_info("SoftIWARP attach failed. Error: %d\n", rv); 720 | siw_sq_worker_exit(); 721 | siw_cm_exit(); 722 | 723 | return rv; 724 | } 725 | 726 | 727 | static void __exit siw_exit_module(void) 728 | { 729 | #ifdef USE_SQ_KTHREAD 730 | int nr_cpu; 731 | 732 | for (nr_cpu = 0; nr_cpu < NR_CPUS; nr_cpu++) { 733 | if (qp_tx_thread[nr_cpu]) { 734 | kthread_stop(qp_tx_thread[nr_cpu]); 735 | qp_tx_thread[nr_cpu] = NULL; 736 | } 737 | } 738 | #endif 739 | 740 | spin_lock(&siw_dev_lock); 741 | unregister_netdevice_notifier(&siw_netdev_nb); 742 | spin_unlock(&siw_dev_lock); 743 | 744 | siw_sq_worker_exit(); 745 | siw_cm_exit(); 746 | 747 | #ifdef SIW_DB_SYSCALL 748 | doorbell_call = db_orig_call; 749 | #endif 750 | 751 | while (!list_empty(&siw_devlist)) { 752 | struct siw_dev *sdev = 753 | list_entry(siw_devlist.next, struct siw_dev, list); 754 | list_del(&sdev->list); 755 | if (sdev->is_registered) 756 | siw_device_deregister(sdev); 757 | 758 | siw_device_destroy(sdev); 759 | } 760 | siw_debugfs_delete(); 761 | 762 | device_unregister(&siw_generic_dma_device); 763 | 764 | bus_unregister(&siw_bus); 765 | 766 | pr_info("SoftiWARP detached\n"); 767 | } 768 | 769 | module_init(siw_init_module); 770 | module_exit(siw_exit_module); 771 | -------------------------------------------------------------------------------- /kernel/siw_mem.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Software iWARP device driver for Linux 3 | * 4 | * Authors: Animesh Trivedi 5 | * Bernard Metzler 6 | * 7 | * Copyright (c) 2008-2016, IBM Corporation 8 | * 9 | * This software is available to you under a choice of one of two 10 | * licenses. You may choose to be licensed under the terms of the GNU 11 | * General Public License (GPL) Version 2, available from the file 12 | * COPYING in the main directory of this source tree, or the 13 | * BSD license below: 14 | * 15 | * Redistribution and use in source and binary forms, with or 16 | * without modification, are permitted provided that the following 17 | * conditions are met: 18 | * 19 | * - Redistributions of source code must retain the above copyright notice, 20 | * this list of conditions and the following disclaimer. 21 | * 22 | * - Redistributions in binary form must reproduce the above copyright 23 | * notice, this list of conditions and the following disclaimer in the 24 | * documentation and/or other materials provided with the distribution. 25 | * 26 | * - Neither the name of IBM nor the names of its contributors may be 27 | * used to endorse or promote products derived from this software without 28 | * specific prior written permission. 29 | * 30 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 31 | * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 32 | * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 33 | * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 34 | * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 35 | * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 36 | * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 37 | * SOFTWARE. 38 | */ 39 | #include 40 | #include 41 | #include 42 | #include 43 | #include 44 | #include 45 | #include 46 | 47 | #include "siw.h" 48 | #include "siw_debug.h" 49 | 50 | static void siw_umem_update_stats(struct work_struct *work) 51 | { 52 | struct siw_umem *umem = container_of(work, struct siw_umem, work); 53 | struct mm_struct *mm_s = umem->mm_s; 54 | 55 | BUG_ON(!mm_s); 56 | 57 | down_write(&mm_s->mmap_sem); 58 | mm_s->pinned_vm -= umem->num_pages; 59 | up_write(&mm_s->mmap_sem); 60 | 61 | mmput(mm_s); 62 | 63 | kfree(umem->page_chunk); 64 | kfree(umem); 65 | } 66 | 67 | static void siw_free_chunk(struct siw_page_chunk *chunk, int num_pages) 68 | { 69 | struct page **p = chunk->p; 70 | 71 | while (num_pages--) { 72 | put_page(*p); 73 | p++; 74 | } 75 | } 76 | 77 | void siw_umem_release(struct siw_umem *umem) 78 | { 79 | struct task_struct *task = get_pid_task(umem->pid, PIDTYPE_PID); 80 | int i, num_pages = umem->num_pages; 81 | 82 | for (i = 0; num_pages; i++) { 83 | int to_free = min_t(int, PAGES_PER_CHUNK, num_pages); 84 | siw_free_chunk(&umem->page_chunk[i], to_free); 85 | kfree(umem->page_chunk[i].p); 86 | num_pages -= to_free; 87 | } 88 | put_pid(umem->pid); 89 | if (task) { 90 | struct mm_struct *mm_s = get_task_mm(task); 91 | put_task_struct(task); 92 | if (mm_s) { 93 | if (down_write_trylock(&mm_s->mmap_sem)) { 94 | mm_s->pinned_vm -= umem->num_pages; 95 | up_write(&mm_s->mmap_sem); 96 | mmput(mm_s); 97 | } else { 98 | /* 99 | * Schedule delayed accounting if 100 | * mm semaphore not available 101 | */ 102 | INIT_WORK(&umem->work, siw_umem_update_stats); 103 | umem->mm_s = mm_s; 104 | schedule_work(&umem->work); 105 | 106 | return; 107 | } 108 | } 109 | } 110 | kfree(umem->page_chunk); 111 | kfree(umem); 112 | } 113 | 114 | struct siw_umem *siw_umem_get(u64 start, u64 len) 115 | { 116 | struct siw_umem *umem; 117 | u64 first_page_va; 118 | unsigned long mlock_limit; 119 | int num_pages, num_chunks, i, rv = 0; 120 | 121 | if (!can_do_mlock()) 122 | return ERR_PTR(-EPERM); 123 | 124 | if (!len) 125 | return ERR_PTR(-EINVAL); 126 | 127 | first_page_va = start & PAGE_MASK; 128 | num_pages = PAGE_ALIGN(start + len - first_page_va) >> PAGE_SHIFT; 129 | num_chunks = (num_pages >> CHUNK_SHIFT) + 1; 130 | 131 | umem = kzalloc(sizeof *umem, GFP_KERNEL); 132 | if (!umem) 133 | return ERR_PTR(-ENOMEM); 134 | 135 | umem->pid = get_task_pid(current, PIDTYPE_PID); 136 | 137 | down_write(¤t->mm->mmap_sem); 138 | 139 | mlock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT; 140 | 141 | if (num_pages + current->mm->pinned_vm > mlock_limit) { 142 | dprint(DBG_ON|DBG_MM, 143 | ": pages req: %d, limit: %lu, pinned: %lu\n", 144 | num_pages, mlock_limit, current->mm->pinned_vm); 145 | rv = -ENOMEM; 146 | goto out; 147 | } 148 | umem->fp_addr = first_page_va; 149 | 150 | umem->page_chunk = kzalloc(num_chunks * sizeof(struct siw_page_chunk), 151 | GFP_KERNEL); 152 | if (!umem->page_chunk) { 153 | rv = -ENOMEM; 154 | goto out; 155 | } 156 | for (i = 0; num_pages; i++) { 157 | int got, nents = min_t(int, num_pages, PAGES_PER_CHUNK); 158 | umem->page_chunk[i].p = kzalloc(nents * sizeof(struct page *), 159 | GFP_KERNEL); 160 | got = 0; 161 | while (nents) { 162 | struct page **plist = &umem->page_chunk[i].p[got]; 163 | rv = get_user_pages(first_page_va, nents, 1, 1, plist, 164 | NULL); 165 | if (rv < 0 ) 166 | goto out; 167 | 168 | umem->num_pages += rv; 169 | current->mm->pinned_vm += rv; 170 | first_page_va += rv * PAGE_SIZE; 171 | nents -= rv; 172 | got += rv; 173 | } 174 | num_pages -= got; 175 | } 176 | out: 177 | up_write(¤t->mm->mmap_sem); 178 | 179 | if (rv > 0) 180 | return umem; 181 | 182 | siw_umem_release(umem); 183 | 184 | return ERR_PTR(rv); 185 | } 186 | 187 | 188 | /* 189 | * DMA mapping/address translation functions. 190 | * Used to populate siw private DMA mapping functions of 191 | * struct ib_dma_mapping_ops in struct ib_dev - see rdma/ib_verbs.h 192 | */ 193 | 194 | static int siw_mapping_error(struct ib_device *dev, u64 dma_addr) 195 | { 196 | return dma_addr == 0; 197 | } 198 | 199 | static u64 siw_dma_map_single(struct ib_device *dev, void *kva, size_t size, 200 | enum dma_data_direction dir) 201 | { 202 | /* siw uses kernel virtual addresses for data transfer */ 203 | return (u64) kva; 204 | } 205 | 206 | static void siw_dma_unmap_single(struct ib_device *dev, 207 | u64 addr, size_t size, 208 | enum dma_data_direction dir) 209 | { 210 | /* NOP */ 211 | } 212 | 213 | static u64 siw_dma_map_page(struct ib_device *dev, struct page *page, 214 | unsigned long offset, size_t size, 215 | enum dma_data_direction dir) 216 | { 217 | u64 kva = 0; 218 | 219 | BUG_ON(!valid_dma_direction(dir)); 220 | 221 | if (offset + size <= PAGE_SIZE) { 222 | kva = (u64) page_address(page); 223 | if (kva) 224 | kva += offset; 225 | } 226 | return kva; 227 | } 228 | 229 | static void siw_dma_unmap_page(struct ib_device *dev, 230 | u64 addr, size_t size, 231 | enum dma_data_direction dir) 232 | { 233 | /* NOP */ 234 | } 235 | 236 | static int siw_dma_map_sg(struct ib_device *dev, struct scatterlist *sgl, 237 | int n_sge, enum dma_data_direction dir) 238 | { 239 | struct scatterlist *sg; 240 | int i; 241 | 242 | BUG_ON(!valid_dma_direction(dir)); 243 | 244 | for_each_sg(sgl, sg, n_sge, i) { 245 | /* This is just a validity check */ 246 | if (unlikely(page_address(sg_page(sg)) == NULL)) { 247 | n_sge = 0; 248 | break; 249 | } 250 | sg->dma_address = (dma_addr_t) page_address(sg_page(sg)); 251 | sg_dma_len(sg) = sg->length; 252 | } 253 | return n_sge; 254 | } 255 | 256 | static void siw_dma_unmap_sg(struct ib_device *dev, struct scatterlist *sgl, 257 | int n_sge, enum dma_data_direction dir) 258 | { 259 | /* NOP */ 260 | } 261 | 262 | static void siw_sync_single_for_cpu(struct ib_device *dev, u64 addr, 263 | size_t size, enum dma_data_direction dir) 264 | { 265 | /* NOP */ 266 | } 267 | 268 | static void siw_sync_single_for_device(struct ib_device *dev, u64 addr, 269 | size_t size, 270 | enum dma_data_direction dir) 271 | { 272 | /* NOP */ 273 | } 274 | 275 | static void *siw_dma_alloc_coherent(struct ib_device *dev, size_t size, 276 | u64 *dma_addr, gfp_t flag) 277 | { 278 | struct page *page; 279 | void *kva = NULL; 280 | 281 | page = alloc_pages(flag, get_order(size)); 282 | if (page) 283 | kva = page_address(page); 284 | if (dma_addr) 285 | *dma_addr = (u64)kva; 286 | 287 | return kva; 288 | } 289 | 290 | static void siw_dma_free_coherent(struct ib_device *dev, size_t size, 291 | void *kva, u64 dma_addr) 292 | { 293 | free_pages((unsigned long) kva, get_order(size)); 294 | } 295 | 296 | struct ib_dma_mapping_ops siw_dma_mapping_ops = { 297 | .mapping_error = siw_mapping_error, 298 | .map_single = siw_dma_map_single, 299 | .unmap_single = siw_dma_unmap_single, 300 | .map_page = siw_dma_map_page, 301 | .unmap_page = siw_dma_unmap_page, 302 | .map_sg = siw_dma_map_sg, 303 | .unmap_sg = siw_dma_unmap_sg, 304 | .sync_single_for_cpu = siw_sync_single_for_cpu, 305 | .sync_single_for_device = siw_sync_single_for_device, 306 | .alloc_coherent = siw_dma_alloc_coherent, 307 | .free_coherent = siw_dma_free_coherent 308 | }; 309 | 310 | static void *siw_dma_generic_alloc(struct device *dev, size_t size, 311 | dma_addr_t *dma_handle, gfp_t gfp, 312 | unsigned long attrs) 313 | { 314 | return siw_dma_alloc_coherent(NULL, size, dma_handle, gfp); 315 | } 316 | 317 | static void siw_dma_generic_free(struct device *dev, size_t size, 318 | void *vaddr, dma_addr_t dma_handle, 319 | unsigned long attrs) 320 | { 321 | siw_dma_free_coherent(NULL, size, vaddr, dma_handle); 322 | } 323 | 324 | static dma_addr_t siw_dma_generic_map_page(struct device *dev, 325 | struct page *page, 326 | unsigned long offset, 327 | size_t size, 328 | enum dma_data_direction dir, 329 | unsigned long attrs) 330 | { 331 | return siw_dma_map_page(NULL, page, offset, size, dir); 332 | } 333 | 334 | static void siw_dma_generic_unmap_page(struct device *dev, 335 | dma_addr_t handle, 336 | size_t size, 337 | enum dma_data_direction dir, 338 | unsigned long attrs) 339 | { 340 | siw_dma_unmap_page(NULL, handle, size, dir); 341 | } 342 | 343 | static int siw_dma_generic_map_sg(struct device *dev, struct scatterlist *sg, 344 | int nents, enum dma_data_direction dir, 345 | unsigned long attrs) 346 | { 347 | return siw_dma_map_sg(NULL, sg, nents, dir); 348 | } 349 | 350 | static void siw_dma_generic_unmap_sg(struct device *dev, 351 | struct scatterlist *sg, 352 | int nents, 353 | enum dma_data_direction dir, 354 | unsigned long attrs) 355 | { 356 | siw_dma_unmap_sg(NULL, sg, nents, dir); 357 | } 358 | 359 | static void siw_generic_sync_single_for_cpu(struct device *dev, 360 | dma_addr_t dma_handle, 361 | size_t size, 362 | enum dma_data_direction dir) 363 | { 364 | siw_sync_single_for_cpu(NULL, dma_handle, size, dir); 365 | } 366 | 367 | 368 | static void siw_generic_sync_single_for_device(struct device *dev, 369 | dma_addr_t dma_handle, 370 | size_t size, 371 | enum dma_data_direction dir) 372 | { 373 | siw_sync_single_for_device(NULL, dma_handle, size, dir); 374 | } 375 | 376 | static void siw_generic_sync_sg_for_cpu(struct device *dev, 377 | struct scatterlist *sg, 378 | int nents, 379 | enum dma_data_direction dir) 380 | { 381 | /* NOP */ 382 | } 383 | 384 | static void siw_generic_sync_sg_for_device(struct device *dev, 385 | struct scatterlist *sg, 386 | int nents, 387 | enum dma_data_direction dir) 388 | { 389 | /* NOP */ 390 | } 391 | 392 | static int siw_dma_generic_mapping_error(struct device *dev, 393 | dma_addr_t dma_addr) 394 | { 395 | return siw_mapping_error(NULL, dma_addr); 396 | } 397 | 398 | static int siw_dma_generic_supported(struct device *dev, u64 mask) 399 | { 400 | return 1; 401 | } 402 | 403 | static int siw_dma_generic_set_mask(struct device *dev, u64 mask) 404 | { 405 | if (!dev->dma_mask || !dma_supported(dev, mask)) 406 | return -EIO; 407 | 408 | *dev->dma_mask = mask; 409 | 410 | return 0; 411 | } 412 | 413 | struct dma_map_ops siw_dma_generic_ops = { 414 | .alloc = siw_dma_generic_alloc, 415 | .free = siw_dma_generic_free, 416 | .map_page = siw_dma_generic_map_page, 417 | .unmap_page = siw_dma_generic_unmap_page, 418 | .map_sg = siw_dma_generic_map_sg, 419 | .unmap_sg = siw_dma_generic_unmap_sg, 420 | .sync_single_for_cpu = siw_generic_sync_single_for_cpu, 421 | .sync_single_for_device = siw_generic_sync_single_for_device, 422 | .sync_sg_for_cpu = siw_generic_sync_sg_for_cpu, 423 | .sync_sg_for_device = siw_generic_sync_sg_for_device, 424 | .mapping_error = siw_dma_generic_mapping_error, 425 | .dma_supported = siw_dma_generic_supported, 426 | .set_dma_mask = siw_dma_generic_set_mask, 427 | .is_phys = 1 428 | }; 429 | -------------------------------------------------------------------------------- /kernel/siw_obj.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Software iWARP device driver for Linux 3 | * 4 | * Authors: Bernard Metzler 5 | * 6 | * Copyright (c) 2008-2016, IBM Corporation 7 | * 8 | * This software is available to you under a choice of one of two 9 | * licenses. You may choose to be licensed under the terms of the GNU 10 | * General Public License (GPL) Version 2, available from the file 11 | * COPYING in the main directory of this source tree, or the 12 | * BSD license below: 13 | * 14 | * Redistribution and use in source and binary forms, with or 15 | * without modification, are permitted provided that the following 16 | * conditions are met: 17 | * 18 | * - Redistributions of source code must retain the above copyright notice, 19 | * this list of conditions and the following disclaimer. 20 | * 21 | * - Redistributions in binary form must reproduce the above copyright 22 | * notice, this list of conditions and the following disclaimer in the 23 | * documentation and/or other materials provided with the distribution. 24 | * 25 | * - Neither the name of IBM nor the names of its contributors may be 26 | * used to endorse or promote products derived from this software without 27 | * specific prior written permission. 28 | * 29 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 30 | * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 31 | * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 32 | * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 33 | * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 34 | * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 35 | * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 36 | * SOFTWARE. 37 | */ 38 | 39 | #include 40 | #include 41 | #include 42 | 43 | #include "siw.h" 44 | #include "siw_obj.h" 45 | #include "siw_cm.h" 46 | 47 | 48 | void siw_objhdr_init(struct siw_objhdr *hdr) 49 | { 50 | kref_init(&hdr->ref); 51 | } 52 | 53 | void siw_idr_init(struct siw_dev *sdev) 54 | { 55 | spin_lock_init(&sdev->idr_lock); 56 | 57 | idr_init(&sdev->qp_idr); 58 | idr_init(&sdev->cq_idr); 59 | idr_init(&sdev->pd_idr); 60 | idr_init(&sdev->mem_idr); 61 | } 62 | 63 | void siw_idr_release(struct siw_dev *sdev) 64 | { 65 | idr_destroy(&sdev->qp_idr); 66 | idr_destroy(&sdev->cq_idr); 67 | idr_destroy(&sdev->pd_idr); 68 | idr_destroy(&sdev->mem_idr); 69 | } 70 | 71 | #if LINUX_VERSION_CODE < KERNEL_VERSION(3, 9, 0) 72 | static inline int siw_add_obj(spinlock_t *lock, struct idr *idr, 73 | struct siw_objhdr *obj) 74 | { 75 | u32 pre_id, id; 76 | unsigned long flags; 77 | int rv; 78 | 79 | get_random_bytes(&pre_id, sizeof pre_id); 80 | pre_id &= 0xffff; 81 | again: 82 | do { 83 | if (!(idr_pre_get(idr, GFP_KERNEL))) 84 | return -ENOMEM; 85 | 86 | spin_lock_irqsave(lock, flags); 87 | rv = idr_get_new_above(idr, obj, pre_id, &id); 88 | spin_unlock_irqrestore(lock, flags); 89 | 90 | } while (rv == -EAGAIN); 91 | 92 | if (rv == 0) { 93 | siw_objhdr_init(obj); 94 | obj->id = id; 95 | dprint(DBG_OBJ, "(OBJ%d): IDR New Object\n", id); 96 | } else if (rv == -ENOSPC && pre_id != 1) { 97 | pre_id = 1; 98 | goto again; 99 | } else { 100 | dprint(DBG_OBJ|DBG_ON, "(OBJ??): IDR New Object failed!\n"); 101 | } 102 | return rv; 103 | } 104 | #else 105 | static inline int siw_add_obj(spinlock_t *lock, struct idr *idr, 106 | struct siw_objhdr *obj) 107 | { 108 | unsigned long flags; 109 | int id, pre_id; 110 | 111 | do { 112 | get_random_bytes(&pre_id, sizeof pre_id); 113 | pre_id &= 0xffffff; 114 | } while (pre_id == 0); 115 | again: 116 | spin_lock_irqsave(lock, flags); 117 | id = idr_alloc(idr, obj, pre_id, 0xffffff - 1, GFP_KERNEL); 118 | spin_unlock_irqrestore(lock, flags); 119 | 120 | if (id > 0) { 121 | siw_objhdr_init(obj); 122 | obj->id = id; 123 | dprint(DBG_OBJ, "(OBJ%d): IDR New Object\n", id); 124 | } else if (id == -ENOSPC && pre_id != 1) { 125 | pre_id = 1; 126 | goto again; 127 | } else { 128 | BUG_ON(id == 0); 129 | dprint(DBG_OBJ|DBG_ON, "(OBJ??): IDR New Object failed!\n"); 130 | } 131 | return id > 0 ? 0 : id; 132 | } 133 | #endif 134 | 135 | static inline struct siw_objhdr *siw_get_obj(struct idr *idr, int id) 136 | { 137 | struct siw_objhdr *obj; 138 | 139 | obj = idr_find(idr, id); 140 | if (obj) 141 | kref_get(&obj->ref); 142 | 143 | return obj; 144 | } 145 | 146 | struct siw_cq *siw_cq_id2obj(struct siw_dev *sdev, int id) 147 | { 148 | struct siw_objhdr *obj = siw_get_obj(&sdev->cq_idr, id); 149 | if (obj) 150 | return container_of(obj, struct siw_cq, hdr); 151 | 152 | return NULL; 153 | } 154 | 155 | struct siw_qp *siw_qp_id2obj(struct siw_dev *sdev, int id) 156 | { 157 | struct siw_objhdr *obj = siw_get_obj(&sdev->qp_idr, id); 158 | if (obj) 159 | return container_of(obj, struct siw_qp, hdr); 160 | 161 | return NULL; 162 | } 163 | 164 | /* 165 | * siw_mem_id2obj() 166 | * 167 | * resolves memory from stag given by id. might be called from: 168 | * o process context before sending out of sgl, or 169 | * o in softirq when resolving target memory 170 | */ 171 | struct siw_mem *siw_mem_id2obj(struct siw_dev *sdev, int id) 172 | { 173 | struct siw_objhdr *obj; 174 | 175 | rcu_read_lock(); 176 | obj = siw_get_obj(&sdev->mem_idr, id); 177 | rcu_read_unlock(); 178 | 179 | if (obj) { 180 | dprint(DBG_MM|DBG_OBJ, "(MEM%d): New refcount: %d\n", 181 | obj->id, obj->ref.refcount.counter); 182 | 183 | return container_of(obj, struct siw_mem, hdr); 184 | } 185 | dprint(DBG_MM|DBG_OBJ|DBG_ON, "(MEM%d): not found!\n", id); 186 | 187 | return NULL; 188 | } 189 | 190 | int siw_qp_add(struct siw_dev *sdev, struct siw_qp *qp) 191 | { 192 | int rv = siw_add_obj(&sdev->idr_lock, &sdev->qp_idr, &qp->hdr); 193 | if (!rv) { 194 | dprint(DBG_OBJ, "(QP%d): New Object\n", QP_ID(qp)); 195 | qp->hdr.sdev = sdev; 196 | } 197 | return rv; 198 | } 199 | 200 | int siw_cq_add(struct siw_dev *sdev, struct siw_cq *cq) 201 | { 202 | int rv = siw_add_obj(&sdev->idr_lock, &sdev->cq_idr, &cq->hdr); 203 | if (!rv) { 204 | dprint(DBG_OBJ, "(CQ%d): New Object\n", cq->hdr.id); 205 | cq->hdr.sdev = sdev; 206 | } 207 | return rv; 208 | } 209 | 210 | int siw_pd_add(struct siw_dev *sdev, struct siw_pd *pd) 211 | { 212 | int rv = siw_add_obj(&sdev->idr_lock, &sdev->pd_idr, &pd->hdr); 213 | if (!rv) { 214 | dprint(DBG_OBJ, "(PD%d): New Object\n", pd->hdr.id); 215 | pd->hdr.sdev = sdev; 216 | } 217 | return rv; 218 | } 219 | 220 | #if LINUX_VERSION_CODE < KERNEL_VERSION(3, 9, 0) 221 | /* 222 | * Stag lookup is based on its index part only (24 bits) 223 | * It is assumed that the idr_get_new_above(,,1,) function will 224 | * always return a new id within this range (0x1...0xffffff), 225 | * if one is available. 226 | * The code avoids special Stag of zero and tries to randomize 227 | * STag values. 228 | */ 229 | int siw_mem_add(struct siw_dev *sdev, struct siw_mem *m) 230 | { 231 | u32 id, pre_id; 232 | unsigned long flags; 233 | int rv; 234 | 235 | do { 236 | get_random_bytes(&pre_id, sizeof pre_id); 237 | pre_id &= 0xffff; 238 | } while (pre_id == 0); 239 | again: 240 | do { 241 | if (!(idr_pre_get(&sdev->mem_idr, GFP_KERNEL))) 242 | return -ENOMEM; 243 | 244 | spin_lock_irqsave(&sdev->idr_lock, flags); 245 | rv = idr_get_new_above(&sdev->mem_idr, m, pre_id, &id); 246 | spin_unlock_irqrestore(&sdev->idr_lock, flags); 247 | 248 | } while (rv == -EAGAIN); 249 | 250 | if (rv == -ENOSPC || (rv == 0 && id > SIW_STAG_MAX)) { 251 | if (rv == 0) { 252 | spin_lock_irqsave(&sdev->idr_lock, flags); 253 | idr_remove(&sdev->mem_idr, id); 254 | spin_unlock_irqrestore(&sdev->idr_lock, flags); 255 | } 256 | if (pre_id == 1) { 257 | dprint(DBG_OBJ|DBG_MM|DBG_ON, 258 | "(IDR): New Object failed: %d\n", pre_id); 259 | return -ENOSPC; 260 | } 261 | pre_id = 1; 262 | goto again; 263 | } else if (rv) { 264 | dprint(DBG_OBJ|DBG_MM|DBG_ON, 265 | "(IDR%d): New Object failed: rv %d\n", id, rv); 266 | return rv; 267 | } 268 | siw_objhdr_init(&m->hdr); 269 | m->hdr.id = id; 270 | m->hdr.sdev = sdev; 271 | dprint(DBG_OBJ|DBG_MM, "(IDR%d): New Object\n", id); 272 | 273 | return 0; 274 | } 275 | #else 276 | /* 277 | * Stag lookup is based on its index part only (24 bits). 278 | * The code avoids special Stag of zero and tries to randomize 279 | * STag values between 1 and SIW_STAG_MAX. 280 | */ 281 | int siw_mem_add(struct siw_dev *sdev, struct siw_mem *m) 282 | { 283 | unsigned long flags; 284 | int id, pre_id; 285 | 286 | do { 287 | get_random_bytes(&pre_id, sizeof pre_id); 288 | pre_id &= 0xffffff; 289 | } while (pre_id == 0); 290 | again: 291 | spin_lock_irqsave(&sdev->idr_lock, flags); 292 | id = idr_alloc(&sdev->mem_idr, m, pre_id, SIW_STAG_MAX, GFP_KERNEL); 293 | spin_unlock_irqrestore(&sdev->idr_lock, flags); 294 | 295 | if (id == -ENOSPC || id > SIW_STAG_MAX) { 296 | if (pre_id == 1) { 297 | dprint(DBG_OBJ|DBG_MM|DBG_ON, 298 | "(IDR): New Object failed: %d\n", pre_id); 299 | return -ENOSPC; 300 | } 301 | pre_id = 1; 302 | goto again; 303 | } 304 | siw_objhdr_init(&m->hdr); 305 | m->hdr.id = id; 306 | m->hdr.sdev = sdev; 307 | dprint(DBG_OBJ|DBG_MM, "(IDR%d): New Object\n", id); 308 | 309 | return 0; 310 | } 311 | #endif 312 | 313 | void siw_remove_obj(spinlock_t *lock, struct idr *idr, 314 | struct siw_objhdr *hdr) 315 | { 316 | unsigned long flags; 317 | 318 | dprint(DBG_OBJ, "(OBJ%d): IDR Remove Object\n", hdr->id); 319 | 320 | spin_lock_irqsave(lock, flags); 321 | idr_remove(idr, hdr->id); 322 | spin_unlock_irqrestore(lock, flags); 323 | } 324 | 325 | 326 | /********** routines to put objs back and free if no ref left *****/ 327 | 328 | static void siw_free_cq(struct kref *ref) 329 | { 330 | struct siw_cq *cq = 331 | (container_of(container_of(ref, struct siw_objhdr, ref), 332 | struct siw_cq, hdr)); 333 | 334 | dprint(DBG_OBJ, "(CQ%d): Free Object\n", cq->hdr.id); 335 | 336 | atomic_dec(&cq->hdr.sdev->num_cq); 337 | if (cq->queue) 338 | vfree(cq->queue); 339 | kfree(cq); 340 | } 341 | 342 | static void siw_free_qp(struct kref *ref) 343 | { 344 | struct siw_qp *qp = 345 | container_of(container_of(ref, struct siw_objhdr, ref), 346 | struct siw_qp, hdr); 347 | struct siw_dev *sdev = qp->hdr.sdev; 348 | unsigned long flags; 349 | 350 | dprint(DBG_OBJ|DBG_CM, "(QP%d): Free Object\n", QP_ID(qp)); 351 | 352 | if (qp->cep) 353 | siw_cep_put(qp->cep); 354 | 355 | siw_remove_obj(&sdev->idr_lock, &sdev->qp_idr, &qp->hdr); 356 | 357 | spin_lock_irqsave(&sdev->idr_lock, flags); 358 | list_del(&qp->devq); 359 | spin_unlock_irqrestore(&sdev->idr_lock, flags); 360 | 361 | if (qp->sendq) 362 | vfree(qp->sendq); 363 | if (qp->recvq) 364 | vfree(qp->recvq); 365 | if (qp->irq) 366 | vfree(qp->irq); 367 | if (qp->orq) 368 | vfree(qp->orq); 369 | 370 | atomic_dec(&sdev->num_qp); 371 | kfree(qp); 372 | } 373 | 374 | static void siw_free_pd(struct kref *ref) 375 | { 376 | struct siw_pd *pd = 377 | container_of(container_of(ref, struct siw_objhdr, ref), 378 | struct siw_pd, hdr); 379 | 380 | dprint(DBG_OBJ, "(PD%d): Free Object\n", pd->hdr.id); 381 | 382 | atomic_dec(&pd->hdr.sdev->num_pd); 383 | kfree(pd); 384 | } 385 | 386 | static void siw_free_mem(struct kref *ref) 387 | { 388 | struct siw_mem *m; 389 | 390 | m = container_of(container_of(ref, struct siw_objhdr, ref), 391 | struct siw_mem, hdr); 392 | 393 | dprint(DBG_MM|DBG_OBJ, "(MEM%d): Free Object\n", OBJ_ID(m)); 394 | 395 | atomic_dec(&m->hdr.sdev->num_mem); 396 | 397 | if (SIW_MEM_IS_MW(m)) { 398 | struct siw_mw *mw = container_of(m, struct siw_mw, mem); 399 | kfree_rcu(mw, rcu); 400 | } else { 401 | struct siw_mr *mr = container_of(m, struct siw_mr, mem); 402 | dprint(DBG_MM|DBG_OBJ, "(MEM%d): Release UMem\n", OBJ_ID(m)); 403 | if (mr->umem) 404 | siw_umem_release(mr->umem); 405 | kfree_rcu(mr, rcu); 406 | } 407 | } 408 | 409 | 410 | void siw_cq_put(struct siw_cq *cq) 411 | { 412 | dprint(DBG_OBJ, "(CQ%d): Old refcount: %d\n", 413 | OBJ_ID(cq), atomic_read(&cq->hdr.ref.refcount)); 414 | kref_put(&cq->hdr.ref, siw_free_cq); 415 | } 416 | 417 | void siw_qp_put(struct siw_qp *qp) 418 | { 419 | dprint(DBG_OBJ, "(QP%d): Old refcount: %d\n", 420 | QP_ID(qp), atomic_read(&qp->hdr.ref.refcount)); 421 | kref_put(&qp->hdr.ref, siw_free_qp); 422 | } 423 | 424 | void siw_pd_put(struct siw_pd *pd) 425 | { 426 | dprint(DBG_OBJ, "(PD%d): Old refcount: %d\n", 427 | OBJ_ID(pd), atomic_read(&pd->hdr.ref.refcount)); 428 | kref_put(&pd->hdr.ref, siw_free_pd); 429 | } 430 | 431 | void siw_mem_put(struct siw_mem *m) 432 | { 433 | dprint(DBG_MM|DBG_OBJ, "(MEM%d): Old refcount: %d\n", 434 | OBJ_ID(m), atomic_read(&m->hdr.ref.refcount)); 435 | kref_put(&m->hdr.ref, siw_free_mem); 436 | } 437 | 438 | 439 | /***** routines for WQE handling ***/ 440 | 441 | static inline void siw_unref_mem_sgl(union siw_mem_resolved *mem, int num_sge) 442 | { 443 | while (num_sge--) { 444 | if (mem->obj != NULL) { 445 | siw_mem_put(mem->obj); 446 | mem->obj = NULL; 447 | mem++; 448 | } else 449 | break; 450 | } 451 | } 452 | 453 | void siw_wqe_put_mem(struct siw_wqe *wqe, enum siw_opcode op) 454 | { 455 | switch (op) { 456 | 457 | case SIW_OP_SEND: 458 | case SIW_OP_WRITE: 459 | case SIW_OP_SEND_WITH_IMM: 460 | case SIW_OP_READ: 461 | if (!(wqe->sqe.flags & SIW_WQE_INLINE)) 462 | siw_unref_mem_sgl(wqe->mem, wqe->sqe.num_sge); 463 | break; 464 | 465 | case SIW_OP_RECEIVE: 466 | siw_unref_mem_sgl(wqe->mem, wqe->rqe.num_sge); 467 | break; 468 | 469 | case SIW_OP_READ_RESPONSE: 470 | siw_unref_mem_sgl(wqe->mem, 1); 471 | break; 472 | 473 | default: 474 | WARN_ON(1); 475 | } 476 | } 477 | -------------------------------------------------------------------------------- /kernel/siw_obj.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Software iWARP device driver for Linux 3 | * 4 | * Authors: Bernard Metzler 5 | * 6 | * Copyright (c) 2008-2016, IBM Corporation 7 | * 8 | * This software is available to you under a choice of one of two 9 | * licenses. You may choose to be licensed under the terms of the GNU 10 | * General Public License (GPL) Version 2, available from the file 11 | * COPYING in the main directory of this source tree, or the 12 | * BSD license below: 13 | * 14 | * Redistribution and use in source and binary forms, with or 15 | * without modification, are permitted provided that the following 16 | * conditions are met: 17 | * 18 | * - Redistributions of source code must retain the above copyright notice, 19 | * this list of conditions and the following disclaimer. 20 | * 21 | * - Redistributions in binary form must reproduce the above copyright 22 | * notice, this list of conditions and the following disclaimer in the 23 | * documentation and/or other materials provided with the distribution. 24 | * 25 | * - Neither the name of IBM nor the names of its contributors may be 26 | * used to endorse or promote products derived from this software without 27 | * specific prior written permission. 28 | * 29 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 30 | * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 31 | * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 32 | * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 33 | * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 34 | * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 35 | * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 36 | * SOFTWARE. 37 | */ 38 | 39 | #ifndef _SIW_OBJ_H 40 | #define _SIW_OBJ_H 41 | 42 | #include 43 | #include 44 | #include 45 | #include 46 | #include 47 | 48 | #include 49 | 50 | #include "siw_debug.h" 51 | 52 | 53 | static inline struct siw_dev *siw_dev_ofa2siw(struct ib_device *ofa_dev) 54 | { 55 | return container_of(ofa_dev, struct siw_dev, ofa_dev); 56 | } 57 | 58 | static inline void siw_cq_get(struct siw_cq *cq) 59 | { 60 | kref_get(&cq->hdr.ref); 61 | dprint(DBG_OBJ, "(CQ%d): New refcount: %d\n", 62 | OBJ_ID(cq), atomic_read(&cq->hdr.ref.refcount)); 63 | } 64 | static inline void siw_qp_get(struct siw_qp *qp) 65 | { 66 | kref_get(&qp->hdr.ref); 67 | dprint(DBG_OBJ, "(QP%d): New refcount: %d\n", 68 | OBJ_ID(qp), atomic_read(&qp->hdr.ref.refcount)); 69 | } 70 | static inline void siw_pd_get(struct siw_pd *pd) 71 | { 72 | kref_get(&pd->hdr.ref); 73 | dprint(DBG_OBJ, "(PD%d): New refcount: %d\n", 74 | OBJ_ID(pd), atomic_read(&pd->hdr.ref.refcount)); 75 | } 76 | static inline void siw_mem_get(struct siw_mem *mem) 77 | { 78 | kref_get(&mem->hdr.ref); 79 | dprint(DBG_OBJ|DBG_MM, "(MEM%d): New refcount: %d\n", 80 | OBJ_ID(mem), atomic_read(&mem->hdr.ref.refcount)); 81 | } 82 | 83 | extern void siw_remove_obj(spinlock_t *lock, struct idr *idr, 84 | struct siw_objhdr *hdr); 85 | 86 | extern void siw_objhdr_init(struct siw_objhdr *); 87 | extern void siw_idr_init(struct siw_dev *); 88 | extern void siw_idr_release(struct siw_dev *); 89 | 90 | extern struct siw_cq *siw_cq_id2obj(struct siw_dev *, int); 91 | extern struct siw_qp *siw_qp_id2obj(struct siw_dev *, int); 92 | extern struct siw_mem *siw_mem_id2obj(struct siw_dev *, int); 93 | 94 | extern int siw_qp_add(struct siw_dev *, struct siw_qp *); 95 | extern int siw_cq_add(struct siw_dev *, struct siw_cq *); 96 | extern int siw_pd_add(struct siw_dev *, struct siw_pd *); 97 | extern int siw_mem_add(struct siw_dev *, struct siw_mem *m); 98 | 99 | extern struct siw_wqe *siw_freeq_wqe_get(struct siw_qp *); 100 | 101 | extern void siw_cq_put(struct siw_cq *); 102 | extern void siw_qp_put(struct siw_qp *); 103 | extern void siw_pd_put(struct siw_pd *); 104 | extern void siw_mem_put(struct siw_mem *); 105 | extern void siw_wqe_put_mem(struct siw_wqe *, enum siw_opcode); 106 | 107 | #endif 108 | -------------------------------------------------------------------------------- /kernel/siw_qp.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Software iWARP device driver for Linux 3 | * 4 | * Authors: Bernard Metzler 5 | * Fredy Neeser 6 | * 7 | * Copyright (c) 2008-2016, IBM Corporation 8 | * 9 | * This software is available to you under a choice of one of two 10 | * licenses. You may choose to be licensed under the terms of the GNU 11 | * General Public License (GPL) Version 2, available from the file 12 | * COPYING in the main directory of this source tree, or the 13 | * BSD license below: 14 | * 15 | * Redistribution and use in source and binary forms, with or 16 | * without modification, are permitted provided that the following 17 | * conditions are met: 18 | * 19 | * - Redistributions of source code must retain the above copyright notice, 20 | * this list of conditions and the following disclaimer. 21 | * 22 | * - Redistributions in binary form must reproduce the above copyright 23 | * notice, this list of conditions and the following disclaimer in the 24 | * documentation and/or other materials provided with the distribution. 25 | * 26 | * - Neither the name of IBM nor the names of its contributors may be 27 | * used to endorse or promote products derived from this software without 28 | * specific prior written permission. 29 | * 30 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 31 | * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 32 | * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 33 | * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 34 | * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 35 | * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 36 | * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 37 | * SOFTWARE. 38 | */ 39 | 40 | #include 41 | #include 42 | #include 43 | #include 44 | #include 45 | #include 46 | #include 47 | #include 48 | #include 49 | #include 50 | #include 51 | 52 | #include 53 | #include 54 | #include 55 | #include 56 | 57 | #include "siw.h" 58 | #include "siw_obj.h" 59 | #include "siw_cm.h" 60 | 61 | 62 | #if DPRINT_MASK > 0 63 | static char siw_qp_state_to_string[SIW_QP_STATE_COUNT][sizeof "TERMINATE"] = { 64 | [SIW_QP_STATE_IDLE] = "IDLE", 65 | [SIW_QP_STATE_RTR] = "RTR", 66 | [SIW_QP_STATE_RTS] = "RTS", 67 | [SIW_QP_STATE_CLOSING] = "CLOSING", 68 | [SIW_QP_STATE_TERMINATE] = "TERMINATE", 69 | [SIW_QP_STATE_ERROR] = "ERROR", 70 | [SIW_QP_STATE_MORIBUND] = "MORIBUND", 71 | [SIW_QP_STATE_UNDEF] = "UNDEF" 72 | }; 73 | #endif 74 | 75 | /* 76 | * iWARP (RDMAP, DDP and MPA) parameters as well as Softiwarp settings on a 77 | * per-RDMAP message basis. Please keep order of initializer. All MPA len 78 | * is initialized to minimum packet size. 79 | */ 80 | struct iwarp_msg_info iwarp_pktinfo[RDMAP_TERMINATE + 1] = { { 81 | .hdr_len = sizeof(struct iwarp_rdma_write), 82 | .ctrl.mpa_len = htons(sizeof(struct iwarp_rdma_write) - 2), 83 | .ctrl.ddp_rdmap_ctrl = DDP_FLAG_TAGGED | DDP_FLAG_LAST 84 | | cpu_to_be16(DDP_VERSION << 8) 85 | | cpu_to_be16(RDMAP_VERSION << 6) 86 | | cpu_to_be16(RDMAP_RDMA_WRITE), 87 | .proc_data = siw_proc_write 88 | }, 89 | { 90 | .hdr_len = sizeof(struct iwarp_rdma_rreq), 91 | .ctrl.mpa_len = htons(sizeof(struct iwarp_rdma_rreq) - 2), 92 | .ctrl.ddp_rdmap_ctrl = DDP_FLAG_LAST 93 | | cpu_to_be16(DDP_VERSION << 8) 94 | | cpu_to_be16(RDMAP_VERSION << 6) 95 | | cpu_to_be16(RDMAP_RDMA_READ_REQ), 96 | .proc_data = siw_proc_rreq 97 | }, 98 | { 99 | .hdr_len = sizeof(struct iwarp_rdma_rresp), 100 | .ctrl.mpa_len = htons(sizeof(struct iwarp_rdma_rresp) - 2), 101 | .ctrl.ddp_rdmap_ctrl = DDP_FLAG_TAGGED | DDP_FLAG_LAST 102 | | cpu_to_be16(DDP_VERSION << 8) 103 | | cpu_to_be16(RDMAP_VERSION << 6) 104 | | cpu_to_be16(RDMAP_RDMA_READ_RESP), 105 | .proc_data = siw_proc_rresp 106 | }, 107 | { 108 | .hdr_len = sizeof(struct iwarp_send), 109 | .ctrl.mpa_len = htons(sizeof(struct iwarp_send) - 2), 110 | .ctrl.ddp_rdmap_ctrl = DDP_FLAG_LAST 111 | | cpu_to_be16(DDP_VERSION << 8) 112 | | cpu_to_be16(RDMAP_VERSION << 6) 113 | | cpu_to_be16(RDMAP_SEND), 114 | .proc_data = siw_proc_send 115 | }, 116 | { 117 | .hdr_len = sizeof(struct iwarp_send_inv), 118 | .ctrl.mpa_len = htons(sizeof(struct iwarp_send_inv) - 2), 119 | .ctrl.ddp_rdmap_ctrl = DDP_FLAG_LAST 120 | | cpu_to_be16(DDP_VERSION << 8) 121 | | cpu_to_be16(RDMAP_VERSION << 6) 122 | | cpu_to_be16(RDMAP_SEND_INVAL), 123 | .proc_data = siw_proc_unsupp 124 | }, 125 | { 126 | .hdr_len = sizeof(struct iwarp_send), 127 | .ctrl.mpa_len = htons(sizeof(struct iwarp_send) - 2), 128 | .ctrl.ddp_rdmap_ctrl = DDP_FLAG_LAST 129 | | cpu_to_be16(DDP_VERSION << 8) 130 | | cpu_to_be16(RDMAP_VERSION << 6) 131 | | cpu_to_be16(RDMAP_SEND_SE), 132 | .proc_data = siw_proc_send 133 | }, 134 | { 135 | .hdr_len = sizeof(struct iwarp_send_inv), 136 | .ctrl.mpa_len = htons(sizeof(struct iwarp_send_inv) - 2), 137 | .ctrl.ddp_rdmap_ctrl = DDP_FLAG_LAST 138 | | cpu_to_be16(DDP_VERSION << 8) 139 | | cpu_to_be16(RDMAP_VERSION << 6) 140 | | cpu_to_be16(RDMAP_SEND_SE_INVAL), 141 | .proc_data = siw_proc_unsupp 142 | }, 143 | { 144 | .hdr_len = sizeof(struct iwarp_terminate), 145 | .ctrl.mpa_len = htons(sizeof(struct iwarp_terminate) - 2), 146 | .ctrl.ddp_rdmap_ctrl = DDP_FLAG_LAST 147 | | cpu_to_be16(DDP_VERSION << 8) 148 | | cpu_to_be16(RDMAP_VERSION << 6) 149 | | cpu_to_be16(RDMAP_TERMINATE), 150 | .proc_data = siw_proc_terminate 151 | } }; 152 | 153 | #if LINUX_VERSION_CODE < KERNEL_VERSION(3, 15, 0) 154 | static void siw_qp_llp_data_ready(struct sock *sk, int flags) 155 | #else 156 | static void siw_qp_llp_data_ready(struct sock *sk) 157 | #endif 158 | { 159 | struct siw_qp *qp; 160 | 161 | read_lock(&sk->sk_callback_lock); 162 | 163 | if (unlikely(!sk->sk_user_data || !sk_to_qp(sk))) { 164 | dprint(DBG_ON, " No QP: %p\n", sk->sk_user_data); 165 | goto done; 166 | } 167 | qp = sk_to_qp(sk); 168 | 169 | if (likely(!qp->rx_ctx.rx_suspend && 170 | down_read_trylock(&qp->state_lock))) { 171 | read_descriptor_t rd_desc = {.arg.data = qp, .count = 1}; 172 | 173 | dprint(DBG_SK|DBG_RX, "(QP%d): " 174 | "state (before tcp_read_sock)=%d\n", 175 | QP_ID(qp), qp->attrs.state); 176 | 177 | if (likely(qp->attrs.state == SIW_QP_STATE_RTS)) 178 | /* 179 | * Implements data receive operation during 180 | * socket callback. TCP gracefully catches 181 | * the case where there is nothing to receive 182 | * (not calling siw_tcp_rx_data() then). 183 | */ 184 | tcp_read_sock(sk, &rd_desc, siw_tcp_rx_data); 185 | 186 | dprint(DBG_SK|DBG_RX, "(QP%d): " 187 | "state (after tcp_read_sock)=%d\n", 188 | QP_ID(qp), qp->attrs.state); 189 | 190 | up_read(&qp->state_lock); 191 | } else { 192 | dprint(DBG_SK|DBG_RX, "(QP%d): " 193 | "Unable to RX: rx_suspend: %d\n", 194 | QP_ID(qp), qp->rx_ctx.rx_suspend); 195 | } 196 | done: 197 | read_unlock(&sk->sk_callback_lock); 198 | } 199 | 200 | 201 | void siw_qp_llp_close(struct siw_qp *qp) 202 | { 203 | dprint(DBG_CM, "(QP%d): Enter: SIW QP state = %s, cep=0x%p\n", 204 | QP_ID(qp), siw_qp_state_to_string[qp->attrs.state], 205 | qp->cep); 206 | 207 | down_write(&qp->state_lock); 208 | 209 | dprint(DBG_CM, "(QP%d): state locked\n", QP_ID(qp)); 210 | 211 | qp->rx_ctx.rx_suspend = 1; 212 | qp->tx_ctx.tx_suspend = 1; 213 | qp->attrs.llp_stream_handle = NULL; 214 | 215 | switch (qp->attrs.state) { 216 | 217 | case SIW_QP_STATE_RTS: 218 | case SIW_QP_STATE_RTR: 219 | case SIW_QP_STATE_IDLE: 220 | case SIW_QP_STATE_TERMINATE: 221 | 222 | qp->attrs.state = SIW_QP_STATE_ERROR; 223 | 224 | break; 225 | /* 226 | * SIW_QP_STATE_CLOSING: 227 | * 228 | * This is a forced close. shall the QP be moved to 229 | * ERROR or IDLE ? 230 | */ 231 | case SIW_QP_STATE_CLOSING: 232 | if (tx_wqe(qp)->wr_status == SR_WR_IDLE) 233 | qp->attrs.state = SIW_QP_STATE_ERROR; 234 | else 235 | qp->attrs.state = SIW_QP_STATE_IDLE; 236 | 237 | break; 238 | 239 | default: 240 | dprint(DBG_CM, " No state transition needed: %d\n", 241 | qp->attrs.state); 242 | break; 243 | } 244 | siw_sq_flush(qp); 245 | siw_rq_flush(qp); 246 | 247 | /* 248 | * dereference closing CEP 249 | */ 250 | if (qp->cep) { 251 | siw_cep_put(qp->cep); 252 | qp->cep = NULL; 253 | } 254 | 255 | up_write(&qp->state_lock); 256 | dprint(DBG_CM, "(QP%d): Exit: SIW QP state = %s, cep=0x%p\n", 257 | QP_ID(qp), siw_qp_state_to_string[qp->attrs.state], 258 | qp->cep); 259 | } 260 | 261 | 262 | /* 263 | * socket callback routine informing about newly available send space. 264 | * Function schedules SQ work for processing SQ items. 265 | */ 266 | static void siw_qp_llp_write_space(struct sock *sk) 267 | { 268 | struct siw_qp *qp = sk_to_qp(sk); 269 | 270 | /* 271 | * TODO: 272 | * Resemble sk_stream_write_space() logic for iWARP constraints: 273 | * Clear SOCK_NOSPACE only if sendspace may hold some reasonable 274 | * sized FPDU. 275 | */ 276 | #ifdef SIW_TX_FULLSEGS 277 | struct socket *sock = sk->sk_socket; 278 | if (sk_stream_wspace(sk) >= (int)qp->tx_ctx.fpdu_len && sock) { 279 | clear_bit(SOCK_NOSPACE, &sock->flags); 280 | siw_sq_queue_work(qp); 281 | } 282 | #else 283 | sk_stream_write_space(sk); 284 | 285 | if (!test_bit(SOCK_NOSPACE, &sk->sk_socket->flags)) 286 | siw_sq_queue_work(qp); 287 | #endif 288 | } 289 | 290 | static void siw_qp_socket_assoc(struct socket *s, struct siw_qp *qp) 291 | { 292 | struct sock *sk = s->sk; 293 | 294 | write_lock_bh(&sk->sk_callback_lock); 295 | 296 | qp->attrs.llp_stream_handle = s; 297 | s->sk->sk_data_ready = siw_qp_llp_data_ready; 298 | s->sk->sk_write_space = siw_qp_llp_write_space; 299 | 300 | write_unlock_bh(&sk->sk_callback_lock); 301 | } 302 | 303 | 304 | static int siw_qp_readq_init(struct siw_qp *qp, int irq_size, int orq_size) 305 | { 306 | dprint(DBG_CM|DBG_WR, "(QP%d): %d %d\n", QP_ID(qp), irq_size, orq_size); 307 | 308 | if (!irq_size) 309 | irq_size = 1; 310 | if (!orq_size) 311 | orq_size = 1; 312 | 313 | qp->attrs.irq_size = irq_size; 314 | qp->attrs.orq_size = orq_size; 315 | 316 | qp->irq = vmalloc(irq_size * sizeof(struct siw_sqe)); 317 | if (!qp->irq) { 318 | dprint(DBG_ON, "(QP%d): Failed\n", QP_ID(qp)); 319 | qp->attrs.irq_size = 0; 320 | return -ENOMEM; 321 | } 322 | qp->orq = vmalloc(orq_size * sizeof(struct siw_sqe)); 323 | if (!qp->orq) { 324 | dprint(DBG_ON, "(QP%d): Failed\n", QP_ID(qp)); 325 | qp->attrs.orq_size = 0; 326 | qp->attrs.irq_size = 0; 327 | vfree(qp->irq); 328 | return -ENOMEM; 329 | } 330 | memset(qp->irq, 0, irq_size * sizeof(struct siw_sqe)); 331 | memset(qp->orq, 0, orq_size * sizeof(struct siw_sqe)); 332 | 333 | return 0; 334 | } 335 | 336 | 337 | static void siw_send_terminate(struct siw_qp *qp) 338 | { 339 | struct iwarp_terminate pkt; 340 | 341 | memset(&pkt, 0, sizeof pkt); 342 | /* 343 | * TODO: send TERMINATE 344 | */ 345 | dprint(DBG_CM, "(QP%d): Todo\n", QP_ID(qp)); 346 | } 347 | 348 | 349 | static int siw_qp_enable_crc(struct siw_qp *qp) 350 | { 351 | struct siw_iwarp_rx *c_rx = &qp->rx_ctx; 352 | struct siw_iwarp_tx *c_tx = &qp->tx_ctx; 353 | int rv = 0; 354 | 355 | c_tx->mpa_crc_hd.tfm = crypto_alloc_shash("crc32c", 0, 356 | CRYPTO_ALG_ASYNC); 357 | if (IS_ERR(c_tx->mpa_crc_hd.tfm)) { 358 | rv = -PTR_ERR(c_tx->mpa_crc_hd.tfm); 359 | goto out; 360 | } 361 | c_rx->mpa_crc_hd.tfm = crypto_alloc_shash("crc32c", 0, 362 | CRYPTO_ALG_ASYNC); 363 | if (IS_ERR(c_rx->mpa_crc_hd.tfm)) { 364 | rv = -PTR_ERR(c_rx->mpa_crc_hd.tfm); 365 | crypto_free_shash(c_tx->mpa_crc_hd.tfm); 366 | } 367 | out: 368 | if (rv) 369 | dprint(DBG_ON, "(QP%d): Failed loading crc32c: error=%d.", 370 | QP_ID(qp), rv); 371 | else 372 | c_tx->crc_enabled = c_rx->crc_enabled = 1; 373 | 374 | return rv; 375 | } 376 | 377 | 378 | /* 379 | * caller holds qp->state_lock 380 | */ 381 | int 382 | siw_qp_modify(struct siw_qp *qp, struct siw_qp_attrs *attrs, 383 | enum siw_qp_attr_mask mask) 384 | { 385 | int drop_conn = 0, rv = 0; 386 | 387 | if (!mask) 388 | return 0; 389 | 390 | dprint(DBG_CM, "(QP%d)\n", QP_ID(qp)); 391 | 392 | if (mask != SIW_QP_ATTR_STATE) { 393 | /* 394 | * changes of qp attributes (maybe state, too) 395 | */ 396 | if (mask & SIW_QP_ATTR_ACCESS_FLAGS) { 397 | 398 | if (attrs->flags & SIW_RDMA_BIND_ENABLED) 399 | qp->attrs.flags |= SIW_RDMA_BIND_ENABLED; 400 | else 401 | qp->attrs.flags &= ~SIW_RDMA_BIND_ENABLED; 402 | 403 | if (attrs->flags & SIW_RDMA_WRITE_ENABLED) 404 | qp->attrs.flags |= SIW_RDMA_WRITE_ENABLED; 405 | else 406 | qp->attrs.flags &= ~SIW_RDMA_WRITE_ENABLED; 407 | 408 | if (attrs->flags & SIW_RDMA_READ_ENABLED) 409 | qp->attrs.flags |= SIW_RDMA_READ_ENABLED; 410 | else 411 | qp->attrs.flags &= ~SIW_RDMA_READ_ENABLED; 412 | 413 | } 414 | /* 415 | * TODO: what else ?? 416 | */ 417 | } 418 | if (!(mask & SIW_QP_ATTR_STATE)) 419 | return 0; 420 | 421 | dprint(DBG_CM, "(QP%d): SIW QP state: %s => %s\n", QP_ID(qp), 422 | siw_qp_state_to_string[qp->attrs.state], 423 | siw_qp_state_to_string[attrs->state]); 424 | 425 | 426 | switch (qp->attrs.state) { 427 | 428 | case SIW_QP_STATE_IDLE: 429 | case SIW_QP_STATE_RTR: 430 | 431 | switch (attrs->state) { 432 | 433 | case SIW_QP_STATE_RTS: 434 | 435 | if (attrs->mpa.crc) { 436 | rv = siw_qp_enable_crc(qp); 437 | if (rv) 438 | break; 439 | } 440 | if (!(mask & SIW_QP_ATTR_LLP_HANDLE)) { 441 | dprint(DBG_ON, "(QP%d): socket?\n", QP_ID(qp)); 442 | rv = -EINVAL; 443 | break; 444 | } 445 | if (!(mask & SIW_QP_ATTR_MPA)) { 446 | dprint(DBG_ON, "(QP%d): MPA?\n", QP_ID(qp)); 447 | rv = -EINVAL; 448 | break; 449 | } 450 | dprint(DBG_CM, "(QP%d): Enter RTS: " 451 | "peer 0x%08x, local 0x%08x\n", QP_ID(qp), 452 | qp->cep->llp.raddr.sin_addr.s_addr, 453 | qp->cep->llp.laddr.sin_addr.s_addr); 454 | /* 455 | * Initialize global iWARP TX state 456 | */ 457 | qp->tx_ctx.ddp_msn[RDMAP_UNTAGGED_QN_SEND] = 0; 458 | qp->tx_ctx.ddp_msn[RDMAP_UNTAGGED_QN_RDMA_READ] = 0; 459 | qp->tx_ctx.ddp_msn[RDMAP_UNTAGGED_QN_TERMINATE] = 0; 460 | 461 | /* 462 | * Initialize global iWARP RX state 463 | */ 464 | qp->rx_ctx.ddp_msn[RDMAP_UNTAGGED_QN_SEND] = 1; 465 | qp->rx_ctx.ddp_msn[RDMAP_UNTAGGED_QN_RDMA_READ] = 1; 466 | qp->rx_ctx.ddp_msn[RDMAP_UNTAGGED_QN_TERMINATE] = 1; 467 | 468 | /* 469 | * init IRD free queue, caller has already checked 470 | * limits. 471 | */ 472 | rv = siw_qp_readq_init(qp, attrs->irq_size, 473 | attrs->orq_size); 474 | if (rv) 475 | break; 476 | 477 | qp->attrs.mpa = attrs->mpa; 478 | /* 479 | * move socket rx and tx under qp's control 480 | */ 481 | siw_qp_socket_assoc(attrs->llp_stream_handle, qp); 482 | 483 | qp->attrs.state = SIW_QP_STATE_RTS; 484 | /* 485 | * set initial mss 486 | */ 487 | qp->tx_ctx.tcp_seglen = 488 | get_tcp_mss(attrs->llp_stream_handle->sk); 489 | 490 | break; 491 | 492 | case SIW_QP_STATE_ERROR: 493 | siw_rq_flush(qp); 494 | qp->attrs.state = SIW_QP_STATE_ERROR; 495 | if (qp->cep) { 496 | siw_cep_put(qp->cep); 497 | qp->cep = NULL; 498 | } 499 | break; 500 | 501 | case SIW_QP_STATE_RTR: 502 | /* ignore */ 503 | break; 504 | 505 | default: 506 | dprint(DBG_CM, 507 | " QP state transition undefined: %s => %s\n", 508 | siw_qp_state_to_string[qp->attrs.state], 509 | siw_qp_state_to_string[attrs->state]); 510 | break; 511 | } 512 | break; 513 | 514 | case SIW_QP_STATE_RTS: 515 | 516 | switch (attrs->state) { 517 | 518 | case SIW_QP_STATE_CLOSING: 519 | /* 520 | * Verbs: move to IDLE if SQ and ORQ are empty. 521 | * Move to ERROR otherwise. But first of all we must 522 | * close the connection. So we keep CLOSING or ERROR 523 | * as a transient state, schedule connection drop work 524 | * and wait for the socket state change upcall to 525 | * come back closed. 526 | */ 527 | if (tx_wqe(qp)->wr_status == SR_WR_IDLE) 528 | qp->attrs.state = SIW_QP_STATE_CLOSING; 529 | else { 530 | qp->attrs.state = SIW_QP_STATE_ERROR; 531 | siw_sq_flush(qp); 532 | } 533 | siw_rq_flush(qp); 534 | 535 | drop_conn = 1; 536 | break; 537 | 538 | case SIW_QP_STATE_TERMINATE: 539 | qp->attrs.state = SIW_QP_STATE_TERMINATE; 540 | siw_send_terminate(qp); 541 | drop_conn = 1; 542 | 543 | break; 544 | 545 | case SIW_QP_STATE_ERROR: 546 | /* 547 | * This is an emergency close. 548 | * 549 | * Any in progress transmit operation will get 550 | * cancelled. 551 | * This will likely result in a protocol failure, 552 | * if a TX operation is in transit. The caller 553 | * could unconditional wait to give the current 554 | * operation a chance to complete. 555 | * Esp., how to handle the non-empty IRQ case? 556 | * The peer was asking for data transfer at a valid 557 | * point in time. 558 | */ 559 | siw_sq_flush(qp); 560 | siw_rq_flush(qp); 561 | qp->attrs.state = SIW_QP_STATE_ERROR; 562 | drop_conn = 1; 563 | 564 | break; 565 | 566 | default: 567 | dprint(DBG_ON, 568 | " QP state transition undefined: %s => %s\n", 569 | siw_qp_state_to_string[qp->attrs.state], 570 | siw_qp_state_to_string[attrs->state]); 571 | break; 572 | } 573 | break; 574 | 575 | case SIW_QP_STATE_TERMINATE: 576 | 577 | switch (attrs->state) { 578 | 579 | case SIW_QP_STATE_ERROR: 580 | siw_rq_flush(qp); 581 | qp->attrs.state = SIW_QP_STATE_ERROR; 582 | 583 | if (tx_wqe(qp)->wr_status != SR_WR_IDLE) 584 | siw_sq_flush(qp); 585 | 586 | break; 587 | 588 | default: 589 | dprint(DBG_ON, 590 | " QP state transition undefined: %s => %s\n", 591 | siw_qp_state_to_string[qp->attrs.state], 592 | siw_qp_state_to_string[attrs->state]); 593 | } 594 | break; 595 | 596 | case SIW_QP_STATE_CLOSING: 597 | 598 | switch (attrs->state) { 599 | 600 | case SIW_QP_STATE_IDLE: 601 | BUG_ON(tx_wqe(qp)->wr_status != SR_WR_IDLE); 602 | qp->attrs.state = SIW_QP_STATE_IDLE; 603 | 604 | break; 605 | 606 | case SIW_QP_STATE_CLOSING: 607 | /* 608 | * The LLP may already moved the QP to closing 609 | * due to graceful peer close init 610 | */ 611 | break; 612 | 613 | case SIW_QP_STATE_ERROR: 614 | /* 615 | * QP was moved to CLOSING by LLP event 616 | * not yet seen by user. 617 | */ 618 | qp->attrs.state = SIW_QP_STATE_ERROR; 619 | 620 | if (tx_wqe(qp)->wr_status != SR_WR_IDLE) 621 | siw_sq_flush(qp); 622 | 623 | siw_rq_flush(qp); 624 | 625 | break; 626 | 627 | default: 628 | dprint(DBG_CM, 629 | " QP state transition undefined: %s => %s\n", 630 | siw_qp_state_to_string[qp->attrs.state], 631 | siw_qp_state_to_string[attrs->state]); 632 | return -ECONNABORTED; 633 | } 634 | break; 635 | 636 | default: 637 | dprint(DBG_CM, " NOP: State: %d\n", qp->attrs.state); 638 | break; 639 | } 640 | if (drop_conn) 641 | siw_qp_cm_drop(qp, 0); 642 | 643 | return rv; 644 | } 645 | 646 | struct ib_qp *siw_get_ofaqp(struct ib_device *ofa_dev, int id) 647 | { 648 | struct siw_qp *qp = siw_qp_id2obj(siw_dev_ofa2siw(ofa_dev), id); 649 | 650 | dprint(DBG_OBJ, ": dev_name: %s, OFA QPID: %d, QP: %p\n", 651 | ofa_dev->name, id, qp); 652 | if (qp) { 653 | /* 654 | * siw_qp_id2obj() increments object reference count 655 | */ 656 | siw_qp_put(qp); 657 | dprint(DBG_OBJ, " QPID: %d\n", QP_ID(qp)); 658 | return &qp->ofa_qp; 659 | } 660 | return (struct ib_qp *)NULL; 661 | } 662 | 663 | /* 664 | * siw_check_mem() 665 | * 666 | * Check protection domain, STAG state, access permissions and 667 | * address range for memory object. 668 | * 669 | * @pd: Protection Domain memory should belong to 670 | * @mem: memory to be checked 671 | * @addr: starting addr of mem 672 | * @perms: requested access permissions 673 | * @len: len of memory interval to be checked 674 | * 675 | */ 676 | int siw_check_mem(struct siw_pd *pd, struct siw_mem *mem, u64 addr, 677 | enum siw_access_flags perms, int len) 678 | { 679 | if (siw_mem2mr(mem)->pd != pd) { 680 | dprint(DBG_WR|DBG_ON, "(PD%d): PD mismatch %p : %p\n", 681 | OBJ_ID(pd), 682 | siw_mem2mr(mem)->pd, pd); 683 | 684 | return -EINVAL; 685 | } 686 | if (mem->stag_state == STAG_INVALID) { 687 | dprint(DBG_WR|DBG_ON, "(PD%d): STAG 0x%08x invalid\n", 688 | OBJ_ID(pd), OBJ_ID(mem)); 689 | return -EPERM; 690 | } 691 | /* 692 | * check access permissions 693 | */ 694 | if ((mem->perms & perms) < perms) { 695 | dprint(DBG_WR|DBG_ON, "(PD%d): " 696 | "INSUFFICIENT permissions 0x%08x : 0x%08x\n", 697 | OBJ_ID(pd), mem->perms, perms); 698 | return -EPERM; 699 | } 700 | /* 701 | * Check address interval: we relax check to allow memory shrinked 702 | * from the start address _after_ placing or fetching len bytes. 703 | * TODO: this relaxation is probably overdone 704 | */ 705 | if (addr < mem->va || addr + len > mem->va + mem->len) { 706 | dprint(DBG_WR|DBG_ON, "(PD%d): MEM interval len %d " 707 | "[0x%016llx, 0x%016llx) out of bounds " 708 | "[0x%016llx, 0x%016llx) for LKey=0x%08x\n", 709 | OBJ_ID(pd), len, (unsigned long long)addr, 710 | (unsigned long long)(addr + len), 711 | (unsigned long long)mem->va, 712 | (unsigned long long)(mem->va + mem->len), 713 | OBJ_ID(mem)); 714 | 715 | return -EINVAL; 716 | } 717 | return 0; 718 | } 719 | 720 | /* 721 | * siw_check_sge() 722 | * 723 | * Check SGE for access rights in given interval 724 | * 725 | * @pd: Protection Domain memory should belong to 726 | * @sge: SGE to be checked 727 | * @mem: resulting memory reference if successful 728 | * @perms: requested access permissions 729 | * @off: starting offset in SGE 730 | * @len: len of memory interval to be checked 731 | * 732 | * NOTE: Function references SGE's memory object (mem->obj) 733 | * if not yet done. New reference is kept if check went ok and 734 | * released if check failed. If mem->obj is already valid, no new 735 | * lookup is being done and mem is not released it check fails. 736 | */ 737 | int 738 | siw_check_sge(struct siw_pd *pd, struct siw_sge *sge, 739 | union siw_mem_resolved *mem, enum siw_access_flags perms, 740 | u32 off, int len) 741 | { 742 | struct siw_dev *sdev = pd->hdr.sdev; 743 | int new_ref = 0, rv = 0; 744 | 745 | if (len + off > sge->length) { 746 | rv = -EPERM; 747 | goto fail; 748 | } 749 | if (mem->obj == NULL) { 750 | mem->obj = siw_mem_id2obj(sdev, sge->lkey >> 8); 751 | if (mem->obj == NULL) { 752 | rv = -EINVAL; 753 | goto fail; 754 | } 755 | new_ref = 1; 756 | } 757 | 758 | rv = siw_check_mem(pd, mem->obj, sge->laddr + off, perms, len); 759 | if (rv) 760 | goto fail; 761 | 762 | return 0; 763 | 764 | fail: 765 | if (new_ref) { 766 | siw_mem_put(mem->obj); 767 | mem->obj = NULL; 768 | } 769 | return rv; 770 | } 771 | 772 | void siw_read_to_orq(struct siw_sqe *rreq, struct siw_sqe *sqe) 773 | { 774 | rreq->id = sqe->id; 775 | rreq->opcode = SIW_OP_READ; 776 | rreq->sge[0].laddr = sqe->sge[0].laddr; 777 | rreq->sge[0].length = sqe->sge[0].length; 778 | rreq->sge[0].lkey = sqe->sge[0].lkey; 779 | rreq->flags = sqe->flags | SIW_WQE_VALID; 780 | rreq->num_sge = 1; 781 | } 782 | 783 | 784 | /* 785 | * Must be called with SQ locked 786 | */ 787 | int siw_activate_tx(struct siw_qp *qp) 788 | { 789 | struct siw_sqe *sqe; 790 | struct siw_wqe *wqe = tx_wqe(qp); 791 | int rv = 1; 792 | 793 | if (unlikely(wqe->wr_status != SR_WR_IDLE)) { 794 | WARN_ON(1); 795 | return -1; 796 | } 797 | /* 798 | * This codes prefers pending READ Responses over SQ processing 799 | */ 800 | sqe = &qp->irq[qp->irq_get % qp->attrs.irq_size]; 801 | 802 | if (sqe->flags & SIW_WQE_VALID) { 803 | memset(wqe->mem, 0, sizeof *wqe->mem * SIW_MAX_SGE); 804 | wqe->wr_status = SR_WR_QUEUED; 805 | 806 | /* start READ RESPONSE */ 807 | wqe->sqe.opcode = SIW_OP_READ_RESPONSE; 808 | wqe->sqe.flags = 0; 809 | wqe->sqe.num_sge = 1; 810 | wqe->sqe.sge[0].length = sqe->sge[0].length; 811 | wqe->sqe.sge[0].laddr = sqe->sge[0].laddr; 812 | wqe->sqe.sge[0].lkey = sqe->sge[0].lkey; 813 | wqe->sqe.rkey = sqe->rkey; 814 | wqe->sqe.raddr = sqe->raddr; 815 | 816 | wqe->processed = 0; 817 | qp->irq_get++; 818 | smp_store_mb(sqe->flags, 0); 819 | 820 | goto out; 821 | } 822 | 823 | sqe = sq_get_next(qp); 824 | if (sqe) { 825 | unsigned long flags; 826 | 827 | memset(wqe->mem, 0, sizeof *wqe->mem * SIW_MAX_SGE); 828 | wqe->wr_status = SR_WR_QUEUED; 829 | 830 | /* First copy SQE to kernel private memory */ 831 | memcpy(&wqe->sqe, sqe, sizeof *sqe); 832 | 833 | if (wqe->sqe.opcode > SIW_OP_SEND) { 834 | rv = -EINVAL; 835 | goto out; 836 | } 837 | 838 | if (wqe->sqe.flags & SIW_WQE_INLINE) { 839 | if (wqe->sqe.opcode != SIW_OP_SEND && 840 | wqe->sqe.opcode != SIW_OP_WRITE) { 841 | rv = -EINVAL; 842 | goto out; 843 | } 844 | if (wqe->sqe.sge[0].length > SIW_MAX_INLINE) { 845 | rv = -EINVAL; 846 | goto out; 847 | } 848 | wqe->sqe.sge[0].laddr = (u64)&wqe->sqe.sge[1]; 849 | wqe->sqe.sge[0].lkey = 0; 850 | wqe->sqe.num_sge = 1; 851 | } 852 | 853 | if (wqe->sqe.flags & SIW_WQE_READ_FENCE) { 854 | /* Only WRITE and SEND can be READ fenced */ 855 | if (unlikely(wqe->sqe.opcode != SIW_OP_WRITE && 856 | wqe->sqe.opcode != SIW_OP_SEND)) { 857 | pr_info("QP[%d]: cannot fence %d\n", 858 | QP_ID(qp), wqe->sqe.opcode); 859 | rv = -EINVAL; 860 | goto out; 861 | } 862 | lock_orq_rxsave(qp, flags); 863 | 864 | if (!siw_orq_empty(qp)) { 865 | qp->tx_ctx.orq_fence = 1; 866 | rv = 0; 867 | } 868 | unlock_orq_rxsave(qp, flags); 869 | 870 | } else if (wqe->sqe.opcode == SIW_OP_READ) { 871 | struct siw_sqe *rreq; 872 | 873 | wqe->sqe.num_sge = 1; 874 | 875 | lock_orq_rxsave(qp, flags); 876 | 877 | rreq = orq_get_free(qp); 878 | if (rreq) { 879 | /* 880 | * Make an immediate copy in ORQ to be ready 881 | * to process loopback READ reply 882 | */ 883 | siw_read_to_orq(rreq, &wqe->sqe); 884 | qp->orq_put++; 885 | } else { 886 | qp->tx_ctx.orq_fence = 1; 887 | rv = 0; 888 | } 889 | unlock_orq_rxsave(qp, flags); 890 | } 891 | 892 | /* Clear SQE, can be re-used by application */ 893 | smp_store_mb(sqe->flags, 0); 894 | qp->sq_get++; 895 | } else 896 | rv = 0; 897 | 898 | out: 899 | if (unlikely(rv < 0)) { 900 | pr_warn("QP[%d]: error %d in activate_tx\n", QP_ID(qp), rv); 901 | wqe->wr_status = SR_WR_IDLE; 902 | } 903 | return rv; 904 | } 905 | 906 | int siw_crc_array(struct shash_desc *desc, u8 *start, size_t len) 907 | { 908 | return crypto_shash_update(desc, start, len); 909 | } 910 | 911 | int siw_crc_page(struct shash_desc *desc, struct page *p, int off, int len) 912 | { 913 | int rv; 914 | struct scatterlist t_sg; 915 | 916 | sg_init_table(&t_sg, 1); 917 | sg_set_page(&t_sg, p, len, off); 918 | rv = crypto_shash_update(desc, sg_virt(&t_sg), len); 919 | 920 | return rv; 921 | } 922 | 923 | static void siw_cq_notify(struct siw_cq *cq, u32 flags) 924 | { 925 | u32 cq_notify; 926 | 927 | if (unlikely(!cq->ofa_cq.comp_handler)) 928 | return; 929 | 930 | cq_notify = _load_shared(*cq->notify); 931 | 932 | if ((cq_notify & SIW_NOTIFY_NEXT_COMPLETION) || 933 | ((cq_notify & SIW_NOTIFY_SOLICITED) && 934 | (flags & SIW_WQE_SOLICITED))) { 935 | smp_store_mb(*cq->notify, SIW_NOTIFY_NOT); 936 | (*cq->ofa_cq.comp_handler)(&cq->ofa_cq, cq->ofa_cq.cq_context); 937 | } 938 | } 939 | 940 | int siw_sqe_complete(struct siw_qp *qp, struct siw_sqe *sqe, u32 bytes, 941 | enum siw_wc_status status) 942 | { 943 | struct siw_cq *cq = qp->scq; 944 | struct siw_cqe *cqe; 945 | unsigned long flags; 946 | u32 idx; 947 | int rv = 0; 948 | 949 | if (cq) { 950 | u32 sqe_flags = sqe->flags; 951 | 952 | lock_cq_rxsave(cq, flags); 953 | 954 | idx = cq->cq_put % cq->num_cqe; 955 | cqe = &cq->queue[idx]; 956 | 957 | if (!cqe->flags) { 958 | cqe->id = sqe->id; 959 | cqe->opcode = sqe->opcode; 960 | cqe->status = status; 961 | cqe->imm_data = 0; 962 | cqe->bytes = bytes; 963 | 964 | if (cq->kernel_verbs) { 965 | siw_qp_get(qp); 966 | cqe->qp = qp; 967 | } else 968 | cqe->qp_id = QP_ID(qp); 969 | 970 | smp_store_mb(cqe->flags, SIW_WQE_VALID); 971 | smp_store_mb(sqe->flags, 0); 972 | 973 | cq->cq_put++; 974 | unlock_cq_rxsave(cq, flags); 975 | siw_cq_notify(cq, sqe_flags); 976 | } else { 977 | unlock_cq_rxsave(cq, flags); 978 | rv = -ENOMEM; 979 | siw_cq_event(cq, IB_EVENT_CQ_ERR); 980 | } 981 | } else 982 | smp_store_mb(sqe->flags, 0); 983 | 984 | return rv; 985 | } 986 | 987 | int siw_rqe_complete(struct siw_qp *qp, struct siw_rqe *rqe, u32 bytes, 988 | enum siw_wc_status status) 989 | { 990 | struct siw_cq *cq = qp->rcq; 991 | struct siw_cqe *cqe; 992 | unsigned long flags; 993 | u32 idx; 994 | int rv = 0; 995 | 996 | if (cq) { 997 | u32 rqe_flags = rqe->flags; 998 | 999 | lock_cq_rxsave(cq, flags); 1000 | 1001 | idx = cq->cq_put % cq->num_cqe; 1002 | cqe = &cq->queue[idx]; 1003 | 1004 | if (!cqe->flags) { 1005 | cqe->id = rqe->id; 1006 | cqe->opcode = SIW_OP_RECEIVE; 1007 | cqe->status = status; 1008 | cqe->imm_data = 0; 1009 | cqe->bytes = bytes; 1010 | 1011 | if (cq->kernel_verbs) { 1012 | siw_qp_get(qp); 1013 | cqe->qp = qp; 1014 | } else 1015 | cqe->qp_id = QP_ID(qp); 1016 | 1017 | smp_store_mb(cqe->flags, SIW_WQE_VALID); 1018 | smp_store_mb(rqe->flags, 0); 1019 | 1020 | cq->cq_put++; 1021 | unlock_cq_rxsave(cq, flags); 1022 | siw_cq_notify(cq, rqe_flags); 1023 | } else { 1024 | unlock_cq_rxsave(cq, flags); 1025 | rv = -ENOMEM; 1026 | siw_cq_event(cq, IB_EVENT_CQ_ERR); 1027 | } 1028 | } else 1029 | smp_store_mb(rqe->flags, 0); 1030 | 1031 | return rv; 1032 | } 1033 | 1034 | /* 1035 | * siw_sq_flush() 1036 | * 1037 | * Flush SQ and ORRQ entries to CQ. 1038 | * IRRQ entries are silently dropped. 1039 | * 1040 | * TODO: Add termination code for in-progress WQE. 1041 | * TODO: an in-progress WQE may have been partially 1042 | * processed. It should be enforced, that transmission 1043 | * of a started DDP segment must be completed if possible 1044 | * by any chance. 1045 | * 1046 | * Must be called with qp state write lock held. 1047 | * Therefore, SQ and ORQ lock must not be taken. 1048 | */ 1049 | void siw_sq_flush(struct siw_qp *qp) 1050 | { 1051 | struct siw_sqe *sqe; 1052 | struct siw_wqe *wqe = tx_wqe(qp); 1053 | unsigned long flags; 1054 | int async_event = 0; 1055 | 1056 | dprint(DBG_OBJ|DBG_CM|DBG_WR, "(QP%d): Enter\n", QP_ID(qp)); 1057 | /* 1058 | * Start with completing any work currently on the ORQ 1059 | */ 1060 | lock_orq_rxsave(qp, flags); 1061 | 1062 | while (qp->attrs.orq_size) { 1063 | sqe = &qp->orq[qp->orq_get % qp->attrs.orq_size]; 1064 | if (!sqe->flags) 1065 | break; 1066 | 1067 | if (siw_sqe_complete(qp, sqe, 0, 1068 | SIW_WC_WR_FLUSH_ERR) != 0) 1069 | break; 1070 | 1071 | qp->orq_get++; 1072 | } 1073 | unlock_orq_rxsave(qp, flags); 1074 | /* 1075 | * Flush the in-progress wqe, if there. 1076 | */ 1077 | if (wqe->wr_status != SR_WR_IDLE) { 1078 | /* 1079 | * TODO: Add iWARP Termination code 1080 | */ 1081 | dprint(DBG_WR, 1082 | " (QP%d): Flush current WQE %p, type %d, status %d\n", 1083 | QP_ID(qp), wqe, tx_type(wqe), wqe->wr_status); 1084 | 1085 | siw_wqe_put_mem(wqe, wqe->sqe.opcode); 1086 | 1087 | if (wqe->sqe.opcode != SIW_OP_READ_RESPONSE && 1088 | (wqe->sqe.opcode != SIW_OP_READ || 1089 | wqe->wr_status == SR_WR_QUEUED)) 1090 | /* 1091 | * An in-progress RREQUEST is already in 1092 | * the ORQ 1093 | */ 1094 | siw_sqe_complete(qp, &wqe->sqe, wqe->bytes, 1095 | SIW_WC_WR_FLUSH_ERR); 1096 | 1097 | wqe->wr_status = SR_WR_IDLE; 1098 | } 1099 | /* 1100 | * Flush the Send Queue 1101 | */ 1102 | while (qp->attrs.sq_size) { 1103 | sqe = &qp->sendq[qp->sq_get % qp->attrs.sq_size]; 1104 | if (!sqe->flags) 1105 | break; 1106 | 1107 | async_event = 1; 1108 | if (siw_sqe_complete(qp, sqe, 0, SIW_WC_WR_FLUSH_ERR) != 0) 1109 | /* Shall IB_EVENT_SQ_DRAINED be supressed ? */ 1110 | break; 1111 | 1112 | sqe->flags = 0; 1113 | qp->sq_get++; 1114 | } 1115 | if (async_event) 1116 | siw_qp_event(qp, IB_EVENT_SQ_DRAINED); 1117 | } 1118 | 1119 | /* 1120 | * siw_rq_flush() 1121 | * 1122 | * Flush recv queue entries to cq. An in-progress WQE may have some bytes 1123 | * processed (wqe->processed). 1124 | * 1125 | * Must be called with qp state write lock held. 1126 | * Therefore, RQ lock must not be taken. 1127 | */ 1128 | void siw_rq_flush(struct siw_qp *qp) 1129 | { 1130 | struct siw_wqe *wqe = rx_wqe(qp); 1131 | 1132 | dprint(DBG_OBJ|DBG_CM|DBG_WR, "(QP%d): Enter\n", QP_ID(qp)); 1133 | 1134 | /* 1135 | * Flush an in-progess WQE if present 1136 | */ 1137 | if (wqe->wr_status != SR_WR_IDLE) { 1138 | if (__rdmap_opcode(&qp->rx_ctx.hdr.ctrl) != RDMAP_RDMA_WRITE) { 1139 | siw_wqe_put_mem(wqe, SIW_OP_RECEIVE); 1140 | siw_rqe_complete(qp, &wqe->rqe, wqe->bytes, 1141 | SIW_WC_WR_FLUSH_ERR); 1142 | } else 1143 | siw_mem_put(rx_mem(qp)); 1144 | 1145 | wqe->wr_status = SR_WR_IDLE; 1146 | } 1147 | 1148 | while (qp->recvq && qp->attrs.rq_size) { 1149 | struct siw_rqe *rqe = 1150 | &qp->recvq[qp->rq_get % qp->attrs.rq_size]; 1151 | 1152 | if (!rqe->flags) 1153 | break; 1154 | 1155 | if (siw_rqe_complete(qp, rqe, 0, SIW_WC_WR_FLUSH_ERR) != 0) 1156 | break; 1157 | rqe->flags = 0; 1158 | 1159 | qp->rq_get++; 1160 | } 1161 | } 1162 | -------------------------------------------------------------------------------- /kernel/siw_verbs.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Software iWARP device driver for Linux 3 | * 4 | * Authors: Bernard Metzler 5 | * 6 | * Copyright (c) 2008-2016, IBM Corporation 7 | * 8 | * This software is available to you under a choice of one of two 9 | * licenses. You may choose to be licensed under the terms of the GNU 10 | * General Public License (GPL) Version 2, available from the file 11 | * COPYING in the main directory of this source tree, or the 12 | * BSD license below: 13 | * 14 | * Redistribution and use in source and binary forms, with or 15 | * without modification, are permitted provided that the following 16 | * conditions are met: 17 | * 18 | * - Redistributions of source code must retain the above copyright notice, 19 | * this list of conditions and the following disclaimer. 20 | * 21 | * - Redistributions in binary form must reproduce the above copyright 22 | * notice, this list of conditions and the following disclaimer in the 23 | * documentation and/or other materials provided with the distribution. 24 | * 25 | * - Neither the name of IBM nor the names of its contributors may be 26 | * used to endorse or promote products derived from this software without 27 | * specific prior written permission. 28 | * 29 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 30 | * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 31 | * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 32 | * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 33 | * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 34 | * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 35 | * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 36 | * SOFTWARE. 37 | */ 38 | 39 | #ifndef _SIW_VERBS_H 40 | #define _SIW_VERBS_H 41 | 42 | #include 43 | 44 | #include 45 | #include 46 | #include 47 | #include 48 | 49 | #include "siw.h" 50 | #include "siw_cm.h" 51 | 52 | 53 | extern struct ib_ucontext *siw_alloc_ucontext(struct ib_device *, 54 | struct ib_udata *); 55 | extern int siw_dealloc_ucontext(struct ib_ucontext *); 56 | extern int siw_query_port(struct ib_device *, u8, struct ib_port_attr *); 57 | #if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 2, 0) || defined(IS_RH_7_2) 58 | extern int siw_get_port_immutable(struct ib_device *, u8, 59 | struct ib_port_immutable *); 60 | extern int siw_query_device(struct ib_device *, struct ib_device_attr *, 61 | struct ib_udata *); 62 | extern struct ib_cq *siw_create_cq(struct ib_device *, 63 | const struct ib_cq_init_attr *, 64 | struct ib_ucontext *, struct ib_udata *); 65 | int siw_no_mad(struct ib_device *, int, u8, const struct ib_wc *, 66 | const struct ib_grh *, const struct ib_mad_hdr *, size_t, 67 | struct ib_mad_hdr *, size_t *, u16 *); 68 | #else 69 | extern int siw_query_device(struct ib_device *, struct ib_device_attr *); 70 | extern struct ib_cq *siw_create_cq(struct ib_device *, int, int, 71 | struct ib_ucontext *, struct ib_udata *); 72 | int siw_no_mad(struct ib_device *, int, u8, struct ib_wc *, struct ib_grh *, 73 | struct ib_mad *, struct ib_mad *); 74 | #endif 75 | extern int siw_query_port(struct ib_device *, u8, struct ib_port_attr *); 76 | extern int siw_query_pkey(struct ib_device *, u8, u16, u16 *); 77 | extern int siw_query_gid(struct ib_device *, u8, int, union ib_gid *); 78 | 79 | extern struct ib_pd *siw_alloc_pd(struct ib_device *, struct ib_ucontext *, 80 | struct ib_udata *); 81 | extern int siw_dealloc_pd(struct ib_pd *); 82 | extern struct ib_ah *siw_create_ah(struct ib_pd *, struct ib_ah_attr *); 83 | extern int siw_destroy_ah(struct ib_ah *); 84 | extern struct ib_qp *siw_create_qp(struct ib_pd *, struct ib_qp_init_attr *, 85 | struct ib_udata *); 86 | extern int siw_query_qp(struct ib_qp *, struct ib_qp_attr *, int, 87 | struct ib_qp_init_attr *); 88 | extern int siw_ofed_modify_qp(struct ib_qp *, struct ib_qp_attr *, int, 89 | struct ib_udata *); 90 | extern int siw_destroy_qp(struct ib_qp *); 91 | extern int siw_post_send(struct ib_qp *, struct ib_send_wr *, 92 | struct ib_send_wr **); 93 | extern int siw_post_receive(struct ib_qp *, struct ib_recv_wr *, 94 | struct ib_recv_wr **); 95 | extern int siw_destroy_cq(struct ib_cq *); 96 | extern int siw_poll_cq(struct ib_cq *, int num_entries, struct ib_wc *); 97 | extern int siw_req_notify_cq(struct ib_cq *, enum ib_cq_notify_flags); 98 | extern struct ib_mr *siw_reg_user_mr(struct ib_pd *, u64, u64, u64, int, 99 | struct ib_udata *); 100 | extern struct ib_mr *siw_get_dma_mr(struct ib_pd *, int); 101 | extern int siw_dereg_mr(struct ib_mr *); 102 | extern struct ib_srq *siw_create_srq(struct ib_pd *, struct ib_srq_init_attr *, 103 | struct ib_udata *); 104 | extern int siw_modify_srq(struct ib_srq *, struct ib_srq_attr *, 105 | enum ib_srq_attr_mask, struct ib_udata *); 106 | extern int siw_query_srq(struct ib_srq *, struct ib_srq_attr *); 107 | extern int siw_destroy_srq(struct ib_srq *); 108 | extern int siw_post_srq_recv(struct ib_srq *, struct ib_recv_wr *, 109 | struct ib_recv_wr **); 110 | extern int siw_mmap(struct ib_ucontext *, struct vm_area_struct *); 111 | 112 | extern struct dma_map_ops siw_dma_generic_ops; 113 | extern struct ib_dma_mapping_ops siw_dma_mapping_ops; 114 | 115 | #endif 116 | -------------------------------------------------------------------------------- /userlib/Makefile.am: -------------------------------------------------------------------------------- 1 | 2 | lib_LTLIBRARIES = src/libsiw.la 3 | 4 | AM_CFLAGS = -g -Wall -D_GNU_SOURCE 5 | AM_CFLAGS += -I../common 6 | 7 | if HAVE_LD_VERSION_SCRIPT 8 | siw_version_script = -Wl,--version-script=$(srcdir)/src/siw.map 9 | else 10 | siw_version_script = 11 | endif 12 | 13 | src_libsiw_la_SOURCES = src/siw_verbs.c src/siw.c src/siw_uverbs.c 14 | 15 | src_libsiw_la_LDFLAGS = -avoid-version -release @IBV_DEVICE_LIBRARY_EXTENSION@ \ 16 | $(siw_version_script) 17 | 18 | siwconfdir = $(sysconfdir)/libibverbs.d 19 | siwconf_DATA = siw.driver 20 | 21 | EXTRA_DIST = src/siw.h src/siw_abi.h src/siw.map libsiw.spec.in \ 22 | siw.driver 23 | 24 | dist-hook: libsiw.spec 25 | cp libsiw.spec $(distdir) 26 | -------------------------------------------------------------------------------- /userlib/autogen.sh: -------------------------------------------------------------------------------- 1 | #! /bin/sh 2 | 3 | set -x 4 | aclocal -I config 5 | libtoolize --force --copy 6 | autoheader 7 | automake --foreign --add-missing --copy 8 | autoconf 9 | -------------------------------------------------------------------------------- /userlib/configure.in: -------------------------------------------------------------------------------- 1 | dnl Process this file with autoconf to produce a configure script. 2 | 3 | AC_PREREQ(2.57) 4 | AC_INIT(libsiw, 0.9, bmt@zurich.ibm.com) 5 | AC_CONFIG_SRCDIR([src/siw.h]) 6 | AC_CONFIG_AUX_DIR(config) 7 | AM_CONFIG_HEADER(config.h) 8 | AM_INIT_AUTOMAKE(libsiw, 0.9) 9 | AM_PROG_LIBTOOL 10 | 11 | AC_ARG_ENABLE(libcheck, [ --disable-libcheck do not test for presence of ib libraries], 12 | [ if test x$enableval = xno ; then 13 | disable_libcheck=yes 14 | fi 15 | ]) 16 | 17 | dnl Checks for programs 18 | AC_PROG_CC 19 | AC_CHECK_SIZEOF(long) 20 | 21 | dnl Checks for libraries 22 | if test "$disable_libcheck" != "yes" 23 | then 24 | AC_CHECK_LIB(ibverbs, ibv_get_device_list, [], 25 | AC_MSG_ERROR([ibv_get_device_list() not found. libsiw requires libibverbs.])) 26 | fi 27 | 28 | dnl Checks for header files. 29 | AC_CHECK_HEADERS(sysfs/libsysfs.h) 30 | 31 | if test "$disable_libcheck" != "yes" 32 | then 33 | AC_CHECK_HEADER(infiniband/driver.h, [], 34 | AC_MSG_ERROR([ not found. Is libibverbs installed?])) 35 | AC_HEADER_STDC 36 | fi 37 | 38 | dnl Checks for typedefs, structures, and compiler characteristics. 39 | AC_C_CONST 40 | 41 | dnl Checks for library functions 42 | AC_CHECK_FUNCS(ibv_read_sysfs_file) 43 | 44 | dnl Now check if for libibverbs device library extension 45 | dummy=if$$ 46 | cat < $dummy.c 47 | #include 48 | IBV_DEVICE_LIBRARY_EXTENSION 49 | IBV_VERSION 50 | IBV_DEVICE_LIBRARY_EXTENSION=`$CC $CPPFLAGS -E $dummy.c 2> /dev/null | tail -1` 51 | rm -f $dummy.c 52 | if test $IBV_DEVICE_LIBRARY_EXTENSION = IBV_DEVICE_LIBRARY_EXTENSION; then 53 | AC_MSG_ERROR([IBV_DEVICE_LIBRARY_EXTENSION not defined. Is libibverbs new enough?]) 54 | fi 55 | AC_SUBST(IBV_DEVICE_LIBRARY_EXTENSION) 56 | 57 | AC_CACHE_CHECK(whether ld accepts --version-script, ac_cv_version_script, 58 | if test -n "`$LD --help < /dev/null 2>/dev/null | grep version-script`"; then 59 | ac_cv_version_script=yes 60 | else 61 | ac_cv_version_script=no 62 | fi) 63 | 64 | AM_CONDITIONAL(HAVE_LD_VERSION_SCRIPT, test "$ac_cv_version_script" = "yes") 65 | 66 | AC_CONFIG_FILES([Makefile libsiw.spec]) 67 | AC_OUTPUT 68 | -------------------------------------------------------------------------------- /userlib/libsiw.spec.in: -------------------------------------------------------------------------------- 1 | %define ver @VERSION@ 2 | 3 | Name: libsiw 4 | Version: 0.9 5 | Release: 1%{?dist} 6 | Summary: Software iWARP user Lib 7 | 8 | Group: System Environment/Libraries 9 | License: GPL/BSD 10 | Url: https://github.com/zrlio/softiwarp 11 | Source: https://github.com/zrlio/softiwarp/tree/master/userlib 12 | BuildRoot: %{_tmppath}/%{name}-%{version}-%{release}-root-%(%{__id_u} -n) 13 | 14 | BuildRequires: libibverbs-devel 15 | 16 | %description 17 | libsiw provides a userspace driver for Linux Kernel SoftiWARP stack 18 | for use with the libibverbs and librdmacm libraries. 19 | 20 | %package devel 21 | Summary: Development files for the libsiw driver 22 | Group: System Environment/Libraries 23 | Requires: %{name} = %{version}-%{release} 24 | 25 | %description devel 26 | Static version of libsiw that may be linked directly to an 27 | application, which may be useful for debugging. 28 | 29 | %prep 30 | %setup -q -n %{name}-%{ver} 31 | 32 | %build 33 | %configure 34 | make %{?_smp_mflags} 35 | 36 | %install 37 | rm -rf $RPM_BUILD_ROOT 38 | %makeinstall 39 | # remove unpackaged files from the buildroot 40 | rm -f $RPM_BUILD_ROOT%{_libdir}/*.la 41 | 42 | %clean 43 | rm -rf $RPM_BUILD_ROOT 44 | 45 | %files 46 | %defattr(-,root,root,-) 47 | %{_libdir}/libsiw*.so 48 | %doc AUTHORS COPYING ChangeLog README 49 | %config %{_sysconfdir}/libibverbs.d/libsiw.driver 50 | 51 | %files devel 52 | %defattr(-,root,root,-) 53 | %{_libdir}/libsiw*.a 54 | 55 | %changelog 56 | -------------------------------------------------------------------------------- /userlib/siw.driver: -------------------------------------------------------------------------------- 1 | driver siw 2 | -------------------------------------------------------------------------------- /userlib/src/siw.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Software iWARP library for Linux 3 | * 4 | * Authors: Bernard Metzler 5 | * 6 | * Copyright (c) 2008-2016, IBM Corporation 7 | * 8 | * This software is available to you under a choice of one of two 9 | * licenses. You may choose to be licensed under the terms of the GNU 10 | * General Public License (GPL) Version 2, available from the file 11 | * COPYING in the main directory of this source tree, or the 12 | * BSD license below: 13 | * 14 | * Redistribution and use in source and binary forms, with or 15 | * without modification, are permitted provided that the following 16 | * conditions are met: 17 | * 18 | * - Redistributions of source code must retain the above copyright notice, 19 | * this list of conditions and the following disclaimer. 20 | * 21 | * - Redistributions in binary form must reproduce the above copyright 22 | * notice, this list of conditions and the following disclaimer in the 23 | * documentation and/or other materials provided with the distribution. 24 | * 25 | * - Neither the name of IBM nor the names of its contributors may be 26 | * used to endorse or promote products derived from this software without 27 | * specific prior written permission. 28 | * 29 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 30 | * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 31 | * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 32 | * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 33 | * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 34 | * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 35 | * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 36 | * SOFTWARE. 37 | */ 38 | 39 | #if HAVE_CONFIG_H 40 | # include 41 | #endif /* HAVE_CONFIG_H */ 42 | 43 | #include 44 | #include 45 | #include 46 | #include 47 | #include 48 | #include 49 | #include 50 | #include 51 | 52 | #include "siw.h" 53 | #include "siw_abi.h" 54 | 55 | 56 | int rdma_db_nr = -1; 57 | extern const int siw_debug; 58 | 59 | static struct ibv_context_ops siw_context_ops = { 60 | .query_device = siw_query_device, 61 | .query_port = siw_query_port, 62 | .query_qp = siw_query_qp, 63 | .alloc_pd = siw_alloc_pd, 64 | .dealloc_pd = siw_free_pd, 65 | .reg_mr = siw_reg_mr, 66 | .dereg_mr = siw_dereg_mr, 67 | .create_cq = siw_create_cq, 68 | .resize_cq = siw_resize_cq, 69 | .destroy_cq = siw_destroy_cq, 70 | .create_srq = siw_create_srq, 71 | .modify_srq = siw_modify_srq, 72 | .destroy_srq = siw_destroy_srq, 73 | .create_qp = siw_create_qp, 74 | .modify_qp = siw_modify_qp, 75 | .destroy_qp = siw_destroy_qp, 76 | .create_ah = siw_create_ah, 77 | .destroy_ah = siw_destroy_ah, 78 | .attach_mcast = NULL, 79 | .detach_mcast = NULL, 80 | .req_notify_cq = siw_notify_cq, 81 | }; 82 | 83 | static struct ibv_context *siw_alloc_context(struct ibv_device *ofa_dev, int fd) 84 | { 85 | struct siw_context *context; 86 | struct ibv_get_context cmd; 87 | struct siw_alloc_ucontext_resp resp; 88 | struct siw_device *siw_dev = dev_ofa2siw(ofa_dev); 89 | 90 | context = malloc(sizeof *context); 91 | 92 | if (!context) 93 | return NULL; 94 | 95 | context->ofa_ctx.cmd_fd = fd; 96 | 97 | if (ibv_cmd_get_context(&context->ofa_ctx, &cmd, sizeof cmd, 98 | &resp.ofa, sizeof resp)) { 99 | free(context); 100 | return NULL; 101 | } 102 | context->ofa_ctx.device = ofa_dev; 103 | context->ofa_ctx.ops = siw_context_ops; 104 | context->dev_id = resp.siw.dev_id; 105 | rdma_db_nr = resp.siw.rdma_db_nr; 106 | 107 | /* 108 | * here we take the chance to put in two versions of fast path 109 | * operations: private or via OFED 110 | */ 111 | switch (siw_dev->if_type) { 112 | 113 | case SIW_IF_OFED: 114 | context->ofa_ctx.ops.async_event = siw_async_event; 115 | context->ofa_ctx.ops.post_send = siw_post_send_ofed; 116 | context->ofa_ctx.ops.post_recv = siw_post_recv_ofed; 117 | context->ofa_ctx.ops.post_srq_recv = siw_post_srq_recv_ofed; 118 | context->ofa_ctx.ops.poll_cq = siw_poll_cq_ofed; 119 | 120 | break; 121 | 122 | case SIW_IF_MAPPED: 123 | context->ofa_ctx.ops.async_event = siw_async_event; 124 | context->ofa_ctx.ops.post_send = siw_post_send_mapped; 125 | context->ofa_ctx.ops.post_recv = siw_post_recv_mapped; 126 | context->ofa_ctx.ops.post_srq_recv = siw_post_srq_recv_mapped; 127 | context->ofa_ctx.ops.poll_cq = siw_poll_cq_mapped; 128 | 129 | break; 130 | 131 | default: 132 | printf("SIW IF type %d not supported\n", siw_dev->if_type); 133 | free(context); 134 | return NULL; 135 | } 136 | 137 | return &context->ofa_ctx; 138 | } 139 | 140 | static void siw_free_context(struct ibv_context *ofa_ctx) 141 | { 142 | struct siw_context *ctx = ctx_ofa2siw(ofa_ctx); 143 | 144 | free(ctx); 145 | } 146 | 147 | static struct ibv_device_ops siw_dev_ops = { 148 | .alloc_context = siw_alloc_context, 149 | .free_context = siw_free_context 150 | }; 151 | 152 | static struct ibv_device *siw_driver_init(const char *uverbs_sys_path, 153 | int abi_version) 154 | { 155 | char value[IFNAMSIZ + sizeof(SIW_IBDEV_PREFIX)], 156 | siw_devpath[IBV_SYSFS_PATH_MAX], 157 | node_desc[24]; 158 | struct siw_device *dev; 159 | int version, if_type; 160 | 161 | /* 162 | * software iwarp does not have own PCI device or 163 | * vendor ID, so nothing to check in that respect. 164 | * We just check for kernel modul code version 165 | * and for fast path interface type. 166 | */ 167 | if (ibv_read_sysfs_file(uverbs_sys_path, "ibdev", 168 | value, sizeof value) < 0) 169 | return NULL; 170 | 171 | memset(siw_devpath, 0, IBV_SYSFS_PATH_MAX); 172 | 173 | snprintf(siw_devpath, IBV_SYSFS_PATH_MAX, "%s/class/infiniband/%s", 174 | ibv_get_sysfs_path(), value); 175 | 176 | if (ibv_read_sysfs_file(siw_devpath, "node_desc", 177 | node_desc, sizeof node_desc) < 0) 178 | return NULL; 179 | 180 | if (strncmp(SIW_NODE_DESC_COMMON, node_desc, 181 | strlen(SIW_NODE_DESC_COMMON))) 182 | return NULL; 183 | 184 | if (ibv_read_sysfs_file(siw_devpath, "sw_version", 185 | value, sizeof value) < 0) 186 | return NULL; 187 | 188 | sscanf(value, "%i", &version); 189 | 190 | if (ibv_read_sysfs_file(siw_devpath, "if_type", 191 | value, sizeof value) < 0) 192 | return NULL; 193 | 194 | sscanf(value, "%i", &if_type); 195 | 196 | if (version != VERSION_ID_SOFTIWARP || 197 | (if_type != SIW_IF_OFED && if_type != SIW_IF_MAPPED)) 198 | return NULL; 199 | 200 | dev = malloc(sizeof *dev); 201 | if (!dev) 202 | return NULL; 203 | 204 | pthread_spin_init(&dev->lock, PTHREAD_PROCESS_PRIVATE); 205 | dev->ofa_dev.ops = siw_dev_ops; 206 | dev->if_type = if_type; 207 | 208 | return &dev->ofa_dev; 209 | } 210 | 211 | static __attribute__((constructor)) void siw_register_driver(void) 212 | { 213 | ibv_register_driver("siw", siw_driver_init); 214 | } 215 | -------------------------------------------------------------------------------- /userlib/src/siw.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Software iWARP library for Linux 3 | * 4 | * Authors: Bernard Metzler 5 | * 6 | * Copyright (c) 2008-2016, IBM Corporation 7 | * 8 | * This software is available to you under a choice of one of two 9 | * licenses. You may choose to be licensed under the terms of the GNU 10 | * General Public License (GPL) Version 2, available from the file 11 | * COPYING in the main directory of this source tree, or the 12 | * BSD license below: 13 | * 14 | * Redistribution and use in source and binary forms, with or 15 | * without modification, are permitted provided that the following 16 | * conditions are met: 17 | * 18 | * - Redistributions of source code must retain the above copyright notice, 19 | * this list of conditions and the following disclaimer. 20 | * 21 | * - Redistributions in binary form must reproduce the above copyright 22 | * notice, this list of conditions and the following disclaimer in the 23 | * documentation and/or other materials provided with the distribution. 24 | * 25 | * - Neither the name of IBM nor the names of its contributors may be 26 | * used to endorse or promote products derived from this software without 27 | * specific prior written permission. 28 | * 29 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 30 | * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 31 | * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 32 | * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 33 | * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 34 | * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 35 | * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 36 | * SOFTWARE. 37 | */ 38 | 39 | #ifndef _SIW_H 40 | #define _SIW_H 41 | 42 | #include 43 | #include 44 | #include 45 | 46 | #include 47 | #include 48 | 49 | #define _NR_SYSCALL_DB 313 50 | 51 | 52 | enum siw_if_type { 53 | SIW_IF_OFED = 0, /* only via standard ofed syscall if */ 54 | SIW_IF_MAPPED = 1 /* private qp and cq mapping */ 55 | }; 56 | 57 | struct siw_device { 58 | struct ibv_device ofa_dev; 59 | enum siw_if_type if_type; /* private fast path or ofed generic */ 60 | pthread_spinlock_t lock; 61 | }; 62 | 63 | struct siw_pd { 64 | struct ibv_pd ofa_pd; 65 | }; 66 | 67 | struct siw_srq { 68 | struct ibv_srq ofa_srq; 69 | struct siw_rqe *recvq; 70 | uint32_t rq_put; 71 | uint32_t num_rqe; 72 | pthread_spinlock_t lock; 73 | }; 74 | 75 | struct siw_mr { 76 | struct ibv_mr ofa_mr; 77 | uint64_t fbo; 78 | uint32_t pbl_addr; 79 | uint32_t len; 80 | }; 81 | 82 | struct siw_qp { 83 | struct ibv_qp ofa_qp; 84 | struct siw_device *siw_dev; 85 | 86 | uint32_t id; 87 | uint32_t dev_id; 88 | 89 | uint32_t num_sqe; 90 | uint32_t sq_put; 91 | struct siw_sqe *sendq; 92 | pthread_spinlock_t sq_lock; 93 | 94 | uint32_t num_rqe; 95 | uint32_t rq_put; 96 | struct siw_rqe *recvq; 97 | pthread_spinlock_t rq_lock; 98 | 99 | struct siw_srq *srq; 100 | 101 | int sq_sig_all; 102 | }; 103 | 104 | struct siw_cq { 105 | struct ibv_cq ofa_cq; 106 | struct siw_device *siw_dev; 107 | uint32_t id; 108 | 109 | /* Points to kernel shared control 110 | * object at the end of CQE array */ 111 | struct siw_cq_ctrl *ctrl; 112 | 113 | int num_cqe; 114 | uint32_t cq_get; 115 | struct siw_cqe *queue; 116 | pthread_spinlock_t lock; 117 | }; 118 | 119 | 120 | struct siw_context { 121 | struct ibv_context ofa_ctx; 122 | uint32_t dev_id; 123 | }; 124 | 125 | 126 | #undef offsetof 127 | #ifdef __compiler_offsetof 128 | #define offsetof(TYPE,MEMBER) __compiler_offsetof(TYPE,MEMBER) 129 | #else 130 | #define offsetof(TYPE, MEMBER) ((size_t) &((TYPE *)0)->MEMBER) 131 | #endif 132 | 133 | 134 | /* from */ 135 | #ifndef container_of 136 | #define container_of(ptr, type, member) ({ \ 137 | const typeof( ((type *)0)->member ) *__mptr = (ptr); \ 138 | (type *)( (char *)__mptr - offsetof(type,member) );}) 139 | #endif 140 | 141 | #define ctx_ofa2siw(ibv_ctx) container_of(ibv_ctx, struct siw_context, ofa_ctx) 142 | #define dev_ofa2siw(ibv_dev) container_of(ibv_dev, struct siw_device, ofa_dev) 143 | #define qp_ofa2siw(ibv_qp) container_of(ibv_qp, struct siw_qp, ofa_qp) 144 | #define cq_ofa2siw(ibv_cq) container_of(ibv_cq, struct siw_cq, ofa_cq) 145 | #define mr_ofa2siw(ibv_mr) container_of(ibv_mr, struct siw_mr, ofa_mr) 146 | #define srq_ofa2siw(ibv_srq) container_of(ibv_srq, struct siw_srq, ofa_srq) 147 | 148 | extern int siw_query_device(struct ibv_context *, struct ibv_device_attr *); 149 | extern int siw_query_port(struct ibv_context *, uint8_t, struct ibv_port_attr *); 150 | /* 151 | * atr: Adding support for ibv_query_qp 152 | */ 153 | extern int siw_query_qp(struct ibv_qp *qp, struct ibv_qp_attr *, 154 | int attr_mask, struct ibv_qp_init_attr *); 155 | 156 | extern struct ibv_pd *siw_alloc_pd(struct ibv_context *); 157 | extern int siw_free_pd(struct ibv_pd *); 158 | extern struct ibv_mr *siw_reg_mr(struct ibv_pd *, void *, size_t, int); 159 | extern int siw_dereg_mr(struct ibv_mr *); 160 | extern struct ibv_cq *siw_create_cq(struct ibv_context *, int, 161 | struct ibv_comp_channel *, int); 162 | extern int siw_resize_cq(struct ibv_cq *, int); 163 | extern int siw_destroy_cq(struct ibv_cq *); 164 | extern int siw_notify_cq(struct ibv_cq *, int); 165 | extern int siw_poll_cq_ofed(struct ibv_cq *, int, struct ibv_wc *); 166 | extern int siw_poll_cq_mapped(struct ibv_cq *, int, struct ibv_wc *); 167 | 168 | extern struct ibv_srq *siw_create_srq(struct ibv_pd *, 169 | struct ibv_srq_init_attr *); 170 | extern int siw_modify_srq(struct ibv_srq *, struct ibv_srq_attr *, int); 171 | extern int siw_destroy_srq(struct ibv_srq *); 172 | 173 | extern int siw_post_srq_recv(struct ibv_srq *, struct ibv_recv_wr *, 174 | struct ibv_recv_wr **); 175 | extern struct ibv_qp *siw_create_qp(struct ibv_pd *, struct ibv_qp_init_attr *); 176 | extern int siw_modify_qp(struct ibv_qp *, struct ibv_qp_attr *, int); 177 | extern int siw_destroy_qp(struct ibv_qp *); 178 | 179 | extern int siw_post_send_ofed(struct ibv_qp *, struct ibv_send_wr *, 180 | struct ibv_send_wr **); 181 | extern int siw_post_send_mapped(struct ibv_qp *, struct ibv_send_wr *, 182 | struct ibv_send_wr **); 183 | extern int siw_post_recv_ofed(struct ibv_qp *, struct ibv_recv_wr *, 184 | struct ibv_recv_wr **); 185 | extern int siw_post_recv_mapped(struct ibv_qp *, struct ibv_recv_wr *, 186 | struct ibv_recv_wr **); 187 | extern int siw_post_srq_recv_ofed(struct ibv_srq *, struct ibv_recv_wr *, 188 | struct ibv_recv_wr **); 189 | extern int siw_post_srq_recv_mapped(struct ibv_srq *, struct ibv_recv_wr *, 190 | struct ibv_recv_wr **); 191 | 192 | extern struct ibv_ah *siw_create_ah(struct ibv_pd *, struct ibv_ah_attr *); 193 | extern int siw_destroy_ah(struct ibv_ah *); 194 | 195 | extern void siw_async_event(struct ibv_async_event *); 196 | 197 | 198 | #endif /* _SIW_H */ 199 | -------------------------------------------------------------------------------- /userlib/src/siw_abi.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Software iWARP library for Linux 3 | * 4 | * Authors: Bernard Metzler 5 | * 6 | * Copyright (c) 2008-2016, IBM Corporation 7 | * 8 | * This software is available to you under a choice of one of two 9 | * licenses. You may choose to be licensed under the terms of the GNU 10 | * General Public License (GPL) Version 2, available from the file 11 | * COPYING in the main directory of this source tree, or the 12 | * BSD license below: 13 | * 14 | * Redistribution and use in source and binary forms, with or 15 | * without modification, are permitted provided that the following 16 | * conditions are met: 17 | * 18 | * - Redistributions of source code must retain the above copyright notice, 19 | * this list of conditions and the following disclaimer. 20 | * 21 | * - Redistributions in binary form must reproduce the above copyright 22 | * notice, this list of conditions and the following disclaimer in the 23 | * documentation and/or other materials provided with the distribution. 24 | * 25 | * - Neither the name of IBM nor the names of its contributors may be 26 | * used to endorse or promote products derived from this software without 27 | * specific prior written permission. 28 | * 29 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 30 | * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 31 | * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 32 | * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 33 | * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 34 | * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 35 | * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 36 | * SOFTWARE. 37 | */ 38 | 39 | #ifndef _SIW_ABI_H 40 | #define _SIW_ABI_H 41 | 42 | 43 | #include 44 | #include "siw_user.h" 45 | 46 | /* 47 | * response structures for resource allocation calls 48 | */ 49 | 50 | struct siw_alloc_pd { 51 | struct ibv_alloc_pd ofa; 52 | }; 53 | 54 | struct siw_alloc_pd_resp { 55 | struct ibv_alloc_pd_resp ofa; 56 | uint32_t pd_id; 57 | }; 58 | 59 | struct siw_alloc_ucontext_resp { 60 | struct ibv_get_context_resp ofa; 61 | struct siw_uresp_alloc_ctx siw; 62 | }; 63 | 64 | struct siw_cmd_reg_umr_req { 65 | struct ibv_reg_mr ofa; 66 | uint8_t stag_key; 67 | uint8_t reserved[3]; 68 | }; 69 | 70 | struct siw_cmd_reg_umr_resp { 71 | struct ibv_reg_mr_resp ofa; 72 | uint32_t stag; 73 | }; 74 | 75 | struct siw_cmd_create_cq { 76 | struct ibv_create_cq ofa; 77 | }; 78 | 79 | struct siw_cmd_create_cq_resp { 80 | struct ibv_create_cq_resp ofa; 81 | struct siw_uresp_create_cq siw; 82 | }; 83 | 84 | struct siw_cmd_create_qp { 85 | struct ibv_create_qp ofa; 86 | }; 87 | 88 | struct siw_cmd_create_qp_resp { 89 | struct ibv_create_qp_resp ofa; 90 | struct siw_uresp_create_qp siw; 91 | }; 92 | 93 | struct siw_cmd_create_srq { 94 | struct ibv_create_srq ofa; 95 | }; 96 | 97 | struct siw_cmd_create_srq_resp { 98 | struct ibv_create_srq_resp ofa; 99 | struct siw_uresp_create_srq siw; 100 | }; 101 | #endif /* _SIW_ABI_H */ 102 | -------------------------------------------------------------------------------- /userlib/src/siw_uverbs.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Software iWARP library for Linux 3 | * 4 | * Authors: Bernard Metzler 5 | * 6 | * Copyright (c) 2008-2016, IBM Corporation 7 | * 8 | * This software is available to you under a choice of one of two 9 | * licenses. You may choose to be licensed under the terms of the GNU 10 | * General Public License (GPL) Version 2, available from the file 11 | * COPYING in the main directory of this source tree, or the 12 | * BSD license below: 13 | * 14 | * Redistribution and use in source and binary forms, with or 15 | * without modification, are permitted provided that the following 16 | * conditions are met: 17 | * 18 | * - Redistributions of source code must retain the above copyright notice, 19 | * this list of conditions and the following disclaimer. 20 | * 21 | * - Redistributions in binary form must reproduce the above copyright 22 | * notice, this list of conditions and the following disclaimer in the 23 | * documentation and/or other materials provided with the distribution. 24 | * 25 | * - Neither the name of IBM nor the names of its contributors may be 26 | * used to endorse or promote products derived from this software without 27 | * specific prior written permission. 28 | * 29 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 30 | * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 31 | * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 32 | * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 33 | * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 34 | * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 35 | * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 36 | * SOFTWARE. 37 | */ 38 | 39 | #if HAVE_CONFIG_H 40 | # include 41 | #endif /* HAVE_CONFIG_H */ 42 | 43 | #include 44 | #include 45 | #include 46 | #include 47 | #include 48 | #include 49 | #include 50 | 51 | #include 52 | #include "siw.h" 53 | #include "siw_abi.h" 54 | 55 | 56 | #define _load_shared(a) (*(volatile typeof(a) *)&(a)) 57 | #define _store_shared(a, b) do { \ 58 | _load_shared(a) = (b); wmb(); \ 59 | } while (0) 60 | 61 | extern const int siw_debug; 62 | extern int rdma_db_nr; 63 | 64 | int siw_notify_cq(struct ibv_cq *ibcq, int solicited) 65 | { 66 | struct siw_cq *cq = cq_ofa2siw(ibcq); 67 | int rv = 0; 68 | 69 | if (cq->ctrl) { 70 | if (solicited) 71 | _store_shared(cq->ctrl->notify, SIW_NOTIFY_SOLICITED); 72 | else 73 | _store_shared(cq->ctrl->notify, SIW_NOTIFY_SOLICITED | 74 | SIW_NOTIFY_NEXT_COMPLETION); 75 | 76 | } else { 77 | pthread_spin_lock(&cq->lock); 78 | rv = ibv_cmd_req_notify_cq(ibcq, solicited); 79 | pthread_spin_unlock(&cq->lock); 80 | } 81 | return rv; 82 | } 83 | 84 | 85 | int siw_post_send_ofed(struct ibv_qp *ofa_qp, struct ibv_send_wr *wr, 86 | struct ibv_send_wr **bad_wr) 87 | { 88 | struct siw_qp *qp = qp_ofa2siw(ofa_qp); 89 | int rv; 90 | 91 | pthread_spin_lock(&qp->sq_lock); 92 | rv = ibv_cmd_post_send(ofa_qp, wr, bad_wr); 93 | pthread_spin_unlock(&qp->sq_lock); 94 | 95 | return rv; 96 | } 97 | 98 | int siw_post_recv_ofed(struct ibv_qp *ofa_qp, struct ibv_recv_wr *wr, 99 | struct ibv_recv_wr **bad_wr) 100 | { 101 | struct siw_qp *qp = qp_ofa2siw(ofa_qp); 102 | int rv; 103 | 104 | pthread_spin_lock(&qp->rq_lock); 105 | rv = ibv_cmd_post_recv(ofa_qp, wr, bad_wr); 106 | pthread_spin_unlock(&qp->rq_lock); 107 | 108 | return rv; 109 | } 110 | 111 | int siw_post_srq_recv_ofed(struct ibv_srq *ofa_srq, struct ibv_recv_wr *wr, 112 | struct ibv_recv_wr **bad_wr) 113 | { 114 | struct siw_srq *srq = srq_ofa2siw(ofa_srq); 115 | int rv; 116 | 117 | pthread_spin_lock(&srq->lock); 118 | rv = ibv_cmd_post_srq_recv(ofa_srq, wr, bad_wr); 119 | pthread_spin_unlock(&srq->lock); 120 | 121 | return rv; 122 | } 123 | 124 | int siw_poll_cq_ofed(struct ibv_cq *ibcq, int num_entries, struct ibv_wc *wc) 125 | { 126 | struct siw_cq *cq = cq_ofa2siw(ibcq); 127 | int rv; 128 | 129 | pthread_spin_lock(&cq->lock); 130 | rv = ibv_cmd_poll_cq(ibcq, num_entries, wc); 131 | pthread_spin_unlock(&cq->lock); 132 | 133 | return rv; 134 | } 135 | 136 | static enum siw_opcode map_send_opcode(enum ibv_wr_opcode ibv_op) 137 | { 138 | switch (ibv_op) { 139 | 140 | case IBV_WR_SEND: return SIW_OP_SEND; 141 | case IBV_WR_RDMA_WRITE: return SIW_OP_WRITE; 142 | case IBV_WR_RDMA_READ: return SIW_OP_READ; 143 | default: 144 | printf("op %d not supported\n", ibv_op); 145 | } 146 | return SIW_NUM_OPCODES + 1; 147 | } 148 | 149 | static inline uint16_t map_send_flags(int ibv_flags) 150 | { 151 | uint16_t flags = SIW_WQE_VALID; 152 | 153 | if (ibv_flags & IBV_SEND_SIGNALED) flags |= SIW_WQE_SIGNALLED; 154 | if (ibv_flags & IBV_SEND_SOLICITED) flags |= SIW_WQE_SOLICITED; 155 | if (ibv_flags & IBV_SEND_INLINE) flags |= SIW_WQE_INLINE; 156 | if (ibv_flags & IBV_SEND_FENCE) flags |= SIW_WQE_READ_FENCE; 157 | 158 | return flags; 159 | } 160 | 161 | static inline int push_send_wqe(struct ibv_send_wr *ofa_wr, 162 | struct siw_sqe *siw_sqe, int sig_all) 163 | { 164 | uint32_t flags = map_send_flags(ofa_wr->send_flags); 165 | 166 | siw_sqe->id = ofa_wr->wr_id; 167 | siw_sqe->num_sge = ofa_wr->num_sge; 168 | siw_sqe->raddr = ofa_wr->wr.rdma.remote_addr; 169 | siw_sqe->rkey = ofa_wr->wr.rdma.rkey; 170 | 171 | siw_sqe->opcode = map_send_opcode(ofa_wr->opcode); 172 | 173 | if (sig_all) 174 | flags |= SIW_WQE_SIGNALLED; 175 | 176 | if (flags & SIW_WQE_INLINE) { 177 | char *db = (char *)&siw_sqe->sge[1]; 178 | int bytes = 0, i = 0; 179 | 180 | if (ofa_wr->num_sge > SIW_MAX_SGE) { 181 | if (siw_debug) 182 | printf("too many SGEs: %d\n", ofa_wr->num_sge); 183 | return -EINVAL; 184 | } 185 | while (i < ofa_wr->num_sge) { 186 | 187 | bytes += ofa_wr->sg_list[i].length; 188 | if (bytes > (int)SIW_MAX_INLINE) { 189 | if (siw_debug) 190 | printf("inline data to long: %d:%d\n", 191 | bytes, (int)SIW_MAX_INLINE); 192 | return EINVAL; 193 | } 194 | memcpy(db, (void *)ofa_wr->sg_list[i].addr, 195 | ofa_wr->sg_list[i].length); 196 | db += ofa_wr->sg_list[i++].length; 197 | } 198 | siw_sqe->sge[0].length = bytes; 199 | 200 | } else if (ofa_wr->num_sge == 1) { 201 | siw_sqe->sge[0].laddr = ofa_wr->sg_list[0].addr; 202 | siw_sqe->sge[0].length = ofa_wr->sg_list[0].length; 203 | siw_sqe->sge[0].lkey = ofa_wr->sg_list[0].lkey; 204 | } else if (ofa_wr->num_sge && ofa_wr->num_sge <= SIW_MAX_SGE) 205 | /* this assumes same layout of siw and ofa SGE */ 206 | memcpy(siw_sqe->sge, ofa_wr->sg_list, 207 | siw_sqe->num_sge * sizeof(struct ibv_sge)); 208 | else 209 | return 1; 210 | 211 | /* TODO: handle inline data */ 212 | 213 | if (siw_debug) 214 | printf("push SQ len %u, id %lx, op %d, num_sge %d, addr %lx\n", 215 | siw_sqe->sge[0].length, siw_sqe->id, siw_sqe->opcode, 216 | siw_sqe->num_sge, siw_sqe->sge[0].laddr); 217 | 218 | _store_shared(siw_sqe->flags, flags); 219 | 220 | return 0; 221 | } 222 | 223 | static int siw_db_ofa(struct ibv_qp *ofa_qp) 224 | { 225 | struct ibv_post_send req; 226 | struct ibv_post_send_resp resp; 227 | int rv; 228 | 229 | req.command = IB_USER_VERBS_CMD_POST_SEND; 230 | req.in_words = (sizeof req) / 4; 231 | req.out_words = (sizeof resp) / 4; 232 | req.response = (uintptr_t)&resp; 233 | req.qp_handle = ofa_qp->handle; 234 | req.wr_count = 0; 235 | req.sge_count = 0; 236 | req.wqe_size = sizeof(struct ibv_send_wr); 237 | 238 | rv = write(ofa_qp->context->cmd_fd, &req, sizeof req); 239 | if (rv == sizeof req) 240 | rv = 0; 241 | else 242 | perror("write: "); 243 | 244 | return rv; 245 | } 246 | 247 | int siw_post_send_mapped(struct ibv_qp *ofa_qp, struct ibv_send_wr *wr, 248 | struct ibv_send_wr **bad_wr) 249 | { 250 | struct siw_qp *qp = qp_ofa2siw(ofa_qp); 251 | uint32_t sq_put; 252 | int rv = 0; 253 | 254 | pthread_spin_lock(&qp->sq_lock); 255 | 256 | *bad_wr = NULL; 257 | sq_put = qp->sq_put; 258 | 259 | /* 260 | * push all work requests into mapped SQ and ring DB 261 | * via empty OFA call 262 | */ 263 | while (wr) { 264 | int idx = sq_put % qp->num_sqe; 265 | struct siw_sqe *sqe = &qp->sendq[idx]; 266 | uint16_t sqe_flags = _load_shared(sqe->flags); 267 | 268 | rmb(); 269 | 270 | if (!(sqe_flags & SIW_WQE_VALID)) { 271 | if (push_send_wqe(wr, sqe, qp->sq_sig_all)) { 272 | rv = -ENOMEM; 273 | *bad_wr = wr; 274 | break; 275 | } 276 | } else { 277 | if (siw_debug) 278 | printf("QP[%d]: SQ overflow, idx %d\n", 279 | qp->id, idx); 280 | rv = -ENOMEM; 281 | *bad_wr = wr; 282 | break; 283 | } 284 | sq_put++; 285 | wr = wr->next; 286 | } 287 | if (sq_put != qp->sq_put) { 288 | if (rdma_db_nr > 0) 289 | rv = syscall(rdma_db_nr, SIW_DB_SQ, 290 | qp->dev_id, qp->id); 291 | else 292 | rv = siw_db_ofa(ofa_qp); 293 | if (rv) 294 | *bad_wr = wr; 295 | 296 | qp->sq_put = sq_put; 297 | } 298 | pthread_spin_unlock(&qp->sq_lock); 299 | 300 | return rv; 301 | } 302 | 303 | 304 | static inline int push_recv_wqe(struct ibv_recv_wr *ofa_wr, 305 | struct siw_rqe *siw_rqe) 306 | { 307 | siw_rqe->id = ofa_wr->wr_id; 308 | siw_rqe->num_sge = ofa_wr->num_sge; 309 | 310 | if (ofa_wr->num_sge == 1) { 311 | siw_rqe->sge[0].laddr = ofa_wr->sg_list[0].addr; 312 | siw_rqe->sge[0].length = ofa_wr->sg_list[0].length; 313 | siw_rqe->sge[0].lkey = ofa_wr->sg_list[0].lkey; 314 | } else if (ofa_wr->num_sge && ofa_wr->num_sge <= SIW_MAX_SGE) 315 | /* this assumes same layout of siw and ofa SGE */ 316 | memcpy(siw_rqe->sge, ofa_wr->sg_list, 317 | sizeof(struct ibv_sge) * ofa_wr->num_sge); 318 | else 319 | return 1; 320 | 321 | if (siw_debug) 322 | printf("push RQ len %u, id %lx, num_sge %d\n", 323 | siw_rqe->sge[0].length, siw_rqe->id, siw_rqe->num_sge); 324 | 325 | _store_shared(siw_rqe->flags, SIW_WQE_VALID); 326 | 327 | return 0; 328 | } 329 | 330 | int siw_post_recv_mapped(struct ibv_qp *ofa_qp, struct ibv_recv_wr *wr, 331 | struct ibv_recv_wr **bad_wr) 332 | { 333 | struct siw_qp *qp = qp_ofa2siw(ofa_qp); 334 | uint32_t rq_put; 335 | int rv = 0; 336 | 337 | pthread_spin_lock(&qp->rq_lock); 338 | 339 | rq_put = qp->rq_put; 340 | 341 | while (wr) { 342 | int idx = rq_put % qp->num_rqe; 343 | struct siw_rqe *rqe = &qp->recvq[idx]; 344 | uint32_t rqe_flags = _load_shared(rqe->flags); 345 | 346 | rmb(); 347 | 348 | if (!(rqe_flags & SIW_WQE_VALID)) { 349 | if (push_recv_wqe(wr, rqe)) { 350 | *bad_wr = wr; 351 | rv = -EINVAL; 352 | break; 353 | } 354 | } else { 355 | if (siw_debug) 356 | printf("QP[%d]: RQ overflow, idx %d\n", 357 | qp->id, idx); 358 | rv = -ENOMEM; 359 | *bad_wr = wr; 360 | break; 361 | } 362 | rq_put++; 363 | wr = wr->next; 364 | } 365 | qp->rq_put = rq_put; 366 | 367 | pthread_spin_unlock(&qp->rq_lock); 368 | 369 | return rv; 370 | } 371 | 372 | int siw_post_srq_recv_mapped(struct ibv_srq *ofa_srq, struct ibv_recv_wr *wr, 373 | struct ibv_recv_wr **bad_wr) 374 | { 375 | struct siw_srq *srq = srq_ofa2siw(ofa_srq); 376 | uint32_t srq_put; 377 | int rv = 0; 378 | 379 | pthread_spin_lock(&srq->lock); 380 | 381 | srq_put = srq->rq_put; 382 | 383 | while (wr) { 384 | int idx = srq_put % srq->num_rqe; 385 | struct siw_rqe *rqe = &srq->recvq[idx]; 386 | uint32_t rqe_flags = _load_shared(rqe->flags); 387 | 388 | rmb(); 389 | 390 | if (!(rqe_flags & SIW_WQE_VALID)) { 391 | if (push_recv_wqe(wr, rqe)) { 392 | *bad_wr = wr; 393 | rv = -EINVAL; 394 | break; 395 | } 396 | } else { 397 | if (siw_debug) 398 | printf("SRQ[%p]: SRQ overflow\n", srq); 399 | rv = -ENOMEM; 400 | *bad_wr = wr; 401 | break; 402 | } 403 | srq_put++; 404 | wr = wr->next; 405 | 406 | } 407 | srq->rq_put = srq_put; 408 | 409 | pthread_spin_unlock(&srq->lock); 410 | 411 | return rv; 412 | } 413 | 414 | 415 | static struct { 416 | enum siw_opcode siw; 417 | enum ibv_wc_opcode ofa; 418 | } map_cqe_opcode [SIW_NUM_OPCODES] = { 419 | {SIW_OP_WRITE, IBV_WC_RDMA_WRITE}, 420 | {SIW_OP_READ, IBV_WC_RDMA_READ}, 421 | {SIW_OP_SEND, IBV_WC_SEND}, 422 | {SIW_OP_SEND_WITH_IMM, -1}, 423 | /* Unsupported */ 424 | {SIW_OP_FETCH_AND_ADD, IBV_WC_FETCH_ADD}, 425 | {SIW_OP_COMP_AND_SWAP, IBV_WC_COMP_SWAP}, 426 | {SIW_OP_INVAL_STAG, -1}, 427 | {SIW_OP_RECEIVE, IBV_WC_RECV} 428 | }; 429 | 430 | static struct { 431 | enum siw_opcode siw; 432 | enum ibv_wc_opcode ofa; 433 | } map_cqe_status [SIW_NUM_WC_STATUS] = { 434 | {SIW_WC_SUCCESS, IBV_WC_SUCCESS}, 435 | {SIW_WC_LOC_LEN_ERR, IBV_WC_LOC_LEN_ERR}, 436 | {SIW_WC_LOC_PROT_ERR, IBV_WC_LOC_PROT_ERR}, 437 | {SIW_WC_LOC_QP_OP_ERR, IBV_WC_LOC_QP_OP_ERR}, 438 | {SIW_WC_WR_FLUSH_ERR, IBV_WC_WR_FLUSH_ERR}, 439 | {SIW_WC_BAD_RESP_ERR, IBV_WC_BAD_RESP_ERR}, 440 | {SIW_WC_LOC_ACCESS_ERR, IBV_WC_LOC_ACCESS_ERR}, 441 | {SIW_WC_REM_ACCESS_ERR, IBV_WC_REM_ACCESS_ERR}, 442 | {SIW_WC_GENERAL_ERR, IBV_WC_GENERAL_ERR} 443 | }; 444 | 445 | static inline void copy_cqe(struct siw_cqe *cqe, struct ibv_wc *wc) 446 | { 447 | if (siw_debug) 448 | printf("report CQE len %u, id %lx, op %d, status %d, QP %u\n", 449 | cqe->bytes, cqe->id, cqe->opcode, cqe->status, 450 | (uint32_t)cqe->qp_id); 451 | 452 | wc->wr_id = cqe->id; 453 | wc->byte_len = cqe->bytes; 454 | 455 | /* No immediate data supported yet */ 456 | wc->wc_flags = 0; 457 | wc->imm_data = 0; 458 | 459 | wc->vendor_err = 0; 460 | wc->opcode = map_cqe_opcode[cqe->opcode].ofa; 461 | wc->status = map_cqe_status[cqe->status].ofa; 462 | wc->qp_num = (uint32_t)cqe->qp_id; 463 | 464 | wmb(); 465 | _store_shared(cqe->flags, 0); 466 | } 467 | 468 | int siw_poll_cq_mapped(struct ibv_cq *ibcq, int num_entries, struct ibv_wc *wc) 469 | { 470 | struct siw_cq *cq = cq_ofa2siw(ibcq); 471 | int new = 0; 472 | 473 | 474 | for (; num_entries--; wc++) { 475 | struct siw_cqe *cqe; 476 | 477 | pthread_spin_lock(&cq->lock); 478 | 479 | cqe = &cq->queue[cq->cq_get % cq->num_cqe]; 480 | 481 | if (_load_shared(cqe->flags) & SIW_WQE_VALID) { 482 | copy_cqe(cqe, wc); 483 | ++cq->cq_get; 484 | pthread_spin_unlock(&cq->lock); 485 | } else { 486 | pthread_spin_unlock(&cq->lock); 487 | break; 488 | } 489 | new++; 490 | } 491 | return new; 492 | } 493 | 494 | -------------------------------------------------------------------------------- /userlib/src/siw_verbs.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Software iWARP library for Linux 3 | * 4 | * Authors: Bernard Metzler 5 | * 6 | * Copyright (c) 2008-2016, IBM Corporation 7 | * 8 | * This software is available to you under a choice of one of two 9 | * licenses. You may choose to be licensed under the terms of the GNU 10 | * General Public License (GPL) Version 2, available from the file 11 | * COPYING in the main directory of this source tree, or the 12 | * BSD license below: 13 | * 14 | * Redistribution and use in source and binary forms, with or 15 | * without modification, are permitted provided that the following 16 | * conditions are met: 17 | * 18 | * - Redistributions of source code must retain the above copyright notice, 19 | * this list of conditions and the following disclaimer. 20 | * 21 | * - Redistributions in binary form must reproduce the above copyright 22 | * notice, this list of conditions and the following disclaimer in the 23 | * documentation and/or other materials provided with the distribution. 24 | * 25 | * - Neither the name of IBM nor the names of its contributors may be 26 | * used to endorse or promote products derived from this software without 27 | * specific prior written permission. 28 | * 29 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 30 | * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 31 | * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 32 | * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 33 | * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 34 | * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 35 | * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 36 | * SOFTWARE. 37 | */ 38 | 39 | #if HAVE_CONFIG_H 40 | # include 41 | #endif 42 | 43 | #include 44 | #include 45 | #include 46 | #include 47 | #include 48 | #include 49 | #include 50 | 51 | #include 52 | #include "siw.h" 53 | #include "siw_abi.h" 54 | 55 | const int siw_debug = 0; 56 | 57 | int siw_query_device(struct ibv_context *ctx, struct ibv_device_attr *attr) 58 | { 59 | struct ibv_query_device cmd; 60 | uint64_t raw_fw_ver; 61 | unsigned major, minor, sub_minor; 62 | int rv; 63 | 64 | rv = ibv_cmd_query_device(ctx, attr, &raw_fw_ver, &cmd, 65 | sizeof cmd); 66 | if (rv) 67 | return rv; 68 | 69 | major = (raw_fw_ver >> 32) & 0xffff; 70 | minor = (raw_fw_ver >> 16) & 0xffff; 71 | sub_minor = raw_fw_ver & 0xffff; 72 | 73 | snprintf(attr->fw_ver, sizeof attr->fw_ver, 74 | "%d.%d.%d", major, minor, sub_minor); 75 | 76 | return 0; 77 | } 78 | 79 | int siw_query_port(struct ibv_context *ctx, uint8_t port, 80 | struct ibv_port_attr *attr) 81 | { 82 | struct ibv_query_port cmd; 83 | 84 | return ibv_cmd_query_port(ctx, port, attr, &cmd, sizeof cmd); 85 | } 86 | 87 | 88 | int siw_query_qp(struct ibv_qp *qp, struct ibv_qp_attr *attr, 89 | int attr_mask, struct ibv_qp_init_attr *init_attr) 90 | { 91 | struct ibv_query_qp cmd; 92 | 93 | return ibv_cmd_query_qp(qp, attr, attr_mask, init_attr, &cmd, sizeof(cmd)); 94 | } 95 | 96 | 97 | struct ibv_pd *siw_alloc_pd(struct ibv_context *ctx) 98 | { 99 | struct ibv_alloc_pd cmd; 100 | struct siw_alloc_pd_resp resp; 101 | struct siw_pd *pd; 102 | 103 | pd = malloc(sizeof *pd); 104 | if (!pd) 105 | return NULL; 106 | 107 | if (ibv_cmd_alloc_pd(ctx, &pd->ofa_pd, &cmd, sizeof cmd, 108 | &resp.ofa, sizeof resp)) { 109 | free(pd); 110 | return NULL; 111 | } 112 | return &pd->ofa_pd; 113 | } 114 | 115 | int siw_free_pd(struct ibv_pd *pd) 116 | { 117 | int rv; 118 | 119 | rv = ibv_cmd_dealloc_pd(pd); 120 | if (rv) 121 | return rv; 122 | 123 | free(pd); 124 | return 0; 125 | } 126 | 127 | 128 | struct ibv_mr *siw_reg_mr(struct ibv_pd *pd, void *addr, 129 | size_t len, int access) 130 | { 131 | struct siw_mr *mr; 132 | struct siw_cmd_reg_umr_req req; 133 | struct siw_cmd_reg_umr_resp resp; 134 | 135 | int rv; 136 | 137 | mr = malloc(sizeof *mr); 138 | 139 | if (!mr) 140 | return NULL; 141 | 142 | rv = ibv_cmd_reg_mr(pd, addr, len, (uintptr_t)addr, access, &mr->ofa_mr, 143 | &req.ofa, sizeof req, &resp.ofa, sizeof resp); 144 | 145 | if (rv) { 146 | free(mr); 147 | return NULL; 148 | } 149 | return &mr->ofa_mr; 150 | } 151 | 152 | int siw_dereg_mr(struct ibv_mr *ofa_mr) 153 | { 154 | struct siw_mr *mr = mr_ofa2siw(ofa_mr); 155 | int rv; 156 | 157 | rv = ibv_cmd_dereg_mr(ofa_mr); 158 | 159 | if (rv) 160 | return rv; 161 | 162 | free(mr); 163 | 164 | return 0; 165 | } 166 | 167 | struct ibv_cq *siw_create_cq(struct ibv_context *ctx, int num_cqe, 168 | struct ibv_comp_channel *channel, int comp_vector) 169 | { 170 | struct siw_cq *cq; 171 | struct siw_cmd_create_cq cmd; 172 | struct siw_cmd_create_cq_resp resp; 173 | int rv; 174 | 175 | cq = calloc(1, sizeof *cq); 176 | if (!cq) 177 | return NULL; 178 | 179 | rv = ibv_cmd_create_cq(ctx, num_cqe, channel, comp_vector, 180 | &cq->ofa_cq, &cmd.ofa, sizeof cmd, 181 | &resp.ofa, sizeof resp); 182 | if (rv) 183 | goto fail; 184 | 185 | pthread_spin_init(&cq->lock, PTHREAD_PROCESS_PRIVATE); 186 | 187 | cq->id = resp.siw.cq_id; 188 | cq->num_cqe = resp.siw.num_cqe; 189 | 190 | if (resp.siw.cq_key <= SIW_MAX_UOBJ_KEY) { 191 | int cq_size = resp.siw.num_cqe * sizeof(struct siw_cqe) 192 | + sizeof(struct siw_cq_ctrl); 193 | 194 | if (siw_debug) 195 | printf("CQ mapping: %d elements, size %d\n", 196 | resp.siw.num_cqe, cq_size); 197 | 198 | cq->queue = mmap(NULL, cq_size, 199 | PROT_READ|PROT_WRITE, MAP_SHARED, 200 | ctx->cmd_fd, resp.siw.cq_key); 201 | 202 | if (cq->queue == MAP_FAILED) 203 | goto fail; 204 | 205 | cq->ctrl = (struct siw_cq_ctrl *)&cq->queue[cq->num_cqe]; 206 | cq->ctrl->notify = SIW_NOTIFY_NOT; 207 | } else 208 | goto fail; 209 | 210 | return &cq->ofa_cq; 211 | 212 | fail: 213 | if (siw_debug) 214 | printf("CQ mapping failed: %d", resp.siw.num_cqe); 215 | 216 | free (cq); 217 | 218 | return (struct ibv_cq *) NULL; 219 | } 220 | 221 | int siw_resize_cq(struct ibv_cq *ofa_cq, int num_cqe) 222 | { 223 | return -ENOSYS; 224 | } 225 | 226 | int siw_destroy_cq(struct ibv_cq *ofacq) 227 | { 228 | struct siw_cq *cq = cq_ofa2siw(ofacq); 229 | int rv; 230 | 231 | if (siw_debug) 232 | printf("destroy CQ[%d]\n", cq->id); 233 | 234 | pthread_spin_lock(&cq->lock); 235 | 236 | if (cq->queue) 237 | munmap(cq->queue, cq->num_cqe * sizeof(struct siw_cqe) 238 | + sizeof (struct siw_cq_ctrl)); 239 | 240 | rv = ibv_cmd_destroy_cq(ofacq); 241 | if (rv) { 242 | pthread_spin_unlock(&cq->lock); 243 | return rv; 244 | } 245 | pthread_spin_unlock(&cq->lock); 246 | 247 | free(cq); 248 | 249 | return 0; 250 | } 251 | 252 | struct ibv_srq *siw_create_srq(struct ibv_pd *pd, 253 | struct ibv_srq_init_attr *attr) 254 | { 255 | struct siw_cmd_create_srq cmd; 256 | struct siw_cmd_create_srq_resp resp; 257 | struct siw_srq *srq = calloc(1, sizeof *srq); 258 | 259 | if (!srq) 260 | return NULL; 261 | 262 | if (ibv_cmd_create_srq(pd, &srq->ofa_srq, attr, &cmd.ofa, 263 | sizeof cmd, &resp.ofa, sizeof resp)) { 264 | free(srq); 265 | return NULL; 266 | } 267 | pthread_spin_init(&srq->lock, PTHREAD_PROCESS_PRIVATE); 268 | 269 | if (resp.siw.srq_key <= SIW_MAX_UOBJ_KEY) { 270 | struct ibv_context *ctx = pd->context; 271 | int rq_size = resp.siw.num_rqe * sizeof(struct siw_rqe); 272 | 273 | srq->num_rqe = resp.siw.num_rqe; 274 | 275 | if (siw_debug) 276 | printf("SRQ mapping: %d\n", srq->num_rqe); 277 | 278 | srq->recvq = mmap(NULL, rq_size, PROT_READ|PROT_WRITE, 279 | MAP_SHARED, ctx->cmd_fd, resp.siw.srq_key); 280 | 281 | if (srq->recvq == MAP_FAILED) { 282 | if (siw_debug) 283 | printf("SRQ mapping failed: %d", 284 | resp.siw.num_rqe); 285 | srq->recvq = NULL; 286 | } 287 | } 288 | return &srq->ofa_srq; 289 | } 290 | 291 | int siw_modify_srq(struct ibv_srq *ofa_srq, struct ibv_srq_attr *attr, 292 | int attr_mask) 293 | { 294 | struct siw_srq *srq = srq_ofa2siw(ofa_srq); 295 | struct ibv_modify_srq cmd; 296 | int rv; 297 | 298 | pthread_spin_lock(&srq->lock); 299 | rv = ibv_cmd_modify_srq(ofa_srq, attr, attr_mask, &cmd, sizeof cmd); 300 | pthread_spin_unlock(&srq->lock); 301 | 302 | return rv; 303 | } 304 | 305 | int siw_destroy_srq(struct ibv_srq *ofa_srq) 306 | { 307 | struct siw_srq *srq = srq_ofa2siw(ofa_srq); 308 | 309 | pthread_spin_lock(&srq->lock); 310 | ibv_cmd_destroy_srq(ofa_srq); 311 | pthread_spin_unlock(&srq->lock); 312 | 313 | if (srq->recvq) 314 | munmap(srq->recvq, srq->num_rqe * sizeof(struct siw_rqe)); 315 | 316 | free(srq); 317 | 318 | return 0; 319 | } 320 | 321 | struct ibv_qp *siw_create_qp(struct ibv_pd *pd, struct ibv_qp_init_attr *attr) 322 | { 323 | struct siw_cmd_create_qp cmd; 324 | struct siw_cmd_create_qp_resp resp; 325 | struct siw_qp *qp; 326 | struct ibv_context *ofa_ctx = pd->context; 327 | struct siw_context *ctx = ctx_ofa2siw(ofa_ctx); 328 | 329 | int rv; 330 | 331 | qp = calloc(1, sizeof *qp); 332 | if (!qp) 333 | return NULL; 334 | 335 | qp->dev_id = ctx->dev_id; 336 | 337 | rv = ibv_cmd_create_qp(pd, &qp->ofa_qp, attr, &cmd.ofa, 338 | sizeof cmd, &resp.ofa, sizeof resp); 339 | if (rv) 340 | goto fail; 341 | 342 | qp->id = resp.siw.qp_id; 343 | qp->num_sqe = resp.siw.num_sqe; 344 | qp->num_rqe = resp.siw.num_rqe; 345 | qp->sq_sig_all = attr->sq_sig_all; 346 | 347 | pthread_spin_init(&qp->sq_lock, PTHREAD_PROCESS_PRIVATE); 348 | pthread_spin_init(&qp->rq_lock, PTHREAD_PROCESS_PRIVATE); 349 | 350 | if (resp.siw.sq_key <= SIW_MAX_UOBJ_KEY) { 351 | int sq_size = resp.siw.num_sqe * sizeof(struct siw_sqe); 352 | 353 | if (siw_debug) 354 | printf("SQ mapping: %d\n", resp.siw.num_sqe); 355 | 356 | qp->sendq = mmap(NULL, sq_size, 357 | PROT_READ|PROT_WRITE, MAP_SHARED, 358 | ofa_ctx->cmd_fd, resp.siw.sq_key); 359 | 360 | if (qp->sendq == MAP_FAILED) { 361 | printf("SQ mapping failed: %d", resp.siw.num_sqe); 362 | qp->sendq = NULL; 363 | goto fail; 364 | } 365 | } else { 366 | if (siw_debug) 367 | printf("SQ mapping failed\n"); 368 | goto fail; 369 | } 370 | if (attr->srq) { 371 | struct siw_srq *srq = srq_ofa2siw(attr->srq); 372 | qp->srq = srq; 373 | } else if (resp.siw.rq_key <= SIW_MAX_UOBJ_KEY) { 374 | int rq_size = resp.siw.num_rqe * sizeof(struct siw_rqe); 375 | 376 | if (siw_debug) 377 | printf("RQ mapping: %d\n", resp.siw.num_rqe); 378 | 379 | qp->recvq = mmap(NULL, rq_size, 380 | PROT_READ|PROT_WRITE, MAP_SHARED, 381 | ofa_ctx->cmd_fd, resp.siw.rq_key); 382 | 383 | if (qp->recvq == MAP_FAILED) { 384 | if (siw_debug) 385 | printf("RQ mapping failed: %d\n", 386 | resp.siw.num_rqe); 387 | qp->recvq = NULL; 388 | goto fail; 389 | } 390 | } else { 391 | if (siw_debug) 392 | printf("RQ mapping failed\n"); 393 | goto fail; 394 | } 395 | return &qp->ofa_qp; 396 | 397 | fail: 398 | if (qp->sendq) 399 | munmap(qp->sendq, qp->num_sqe * sizeof(struct siw_sqe)); 400 | if (qp->recvq) 401 | munmap(qp->recvq, qp->num_rqe * sizeof(struct siw_rqe)); 402 | 403 | free(qp); 404 | 405 | return (struct ibv_qp *) NULL; 406 | } 407 | 408 | int siw_modify_qp(struct ibv_qp *ofaqp, struct ibv_qp_attr *attr, 409 | int attr_mask) 410 | { 411 | struct siw_qp *qp = qp_ofa2siw(ofaqp); 412 | struct ibv_modify_qp cmd; 413 | int rv; 414 | 415 | if (siw_debug) 416 | printf("modify QP[%d]\n", qp->id); 417 | 418 | pthread_spin_lock(&qp->sq_lock); 419 | pthread_spin_lock(&qp->rq_lock); 420 | 421 | rv = ibv_cmd_modify_qp(ofaqp, attr, attr_mask, &cmd, sizeof cmd); 422 | 423 | pthread_spin_unlock(&qp->rq_lock); 424 | pthread_spin_unlock(&qp->sq_lock); 425 | 426 | return rv; 427 | } 428 | 429 | int siw_destroy_qp(struct ibv_qp *ofaqp) 430 | { 431 | struct siw_qp *qp = qp_ofa2siw(ofaqp); 432 | int rv; 433 | 434 | if (siw_debug) 435 | printf("destroy QP[%d]\n", qp->id); 436 | 437 | pthread_spin_lock(&qp->sq_lock); 438 | pthread_spin_lock(&qp->rq_lock); 439 | 440 | if (qp->sendq) 441 | munmap(qp->sendq, qp->num_sqe * sizeof(struct siw_sqe)); 442 | if (qp->recvq) 443 | munmap(qp->recvq, qp->num_rqe * sizeof(struct siw_rqe)); 444 | 445 | rv = ibv_cmd_destroy_qp(ofaqp); 446 | if (rv) { 447 | pthread_spin_unlock(&qp->rq_lock); 448 | pthread_spin_unlock(&qp->sq_lock); 449 | return rv; 450 | } 451 | pthread_spin_unlock(&qp->rq_lock); 452 | pthread_spin_unlock(&qp->sq_lock); 453 | 454 | free(qp); 455 | 456 | return 0; 457 | } 458 | 459 | struct ibv_ah *siw_create_ah(struct ibv_pd *pd, struct ibv_ah_attr *attr) 460 | { 461 | return NULL; 462 | } 463 | 464 | int siw_destroy_ah(struct ibv_ah *ah) 465 | { 466 | return -ENOSYS; 467 | } 468 | 469 | void siw_async_event(struct ibv_async_event *event) 470 | { 471 | 472 | switch (event->event_type) { 473 | 474 | case IBV_EVENT_CQ_ERR: 475 | break; 476 | 477 | case IBV_EVENT_QP_FATAL: 478 | case IBV_EVENT_QP_REQ_ERR: 479 | case IBV_EVENT_QP_ACCESS_ERR: 480 | /* TODO: flush qp */ 481 | break; 482 | 483 | case IBV_EVENT_SQ_DRAINED: 484 | case IBV_EVENT_COMM_EST: 485 | case IBV_EVENT_QP_LAST_WQE_REACHED: 486 | break; 487 | 488 | default: 489 | break; 490 | } 491 | } 492 | --------------------------------------------------------------------------------