├── README.rst └── filecopy └── filexfer_test.py /README.rst: -------------------------------------------------------------------------------- 1 | Python RDMA examples 2 | ==================== 3 | 4 | These are example scripts utilizing Python RDMA ( https://github.com/jgunthorpe/python-rdma ) . 5 | 6 | 7 | Scripts 8 | ------- 9 | 10 | * filecopy: Fast file copy remotely using RDMA. 11 | 12 | -------------------------------------------------------------------------------- /filecopy/filexfer_test.py: -------------------------------------------------------------------------------- 1 | # Copyright 2011 Obsidian Research Corp. GLPv2, see COPYING. 2 | import pickle 3 | import socket 4 | import contextlib 5 | import os, sys 6 | import time 7 | from mmap import mmap 8 | from collections import namedtuple 9 | import rdma.ibverbs as ibv; 10 | from rdma.tools import clock_monotonic 11 | import rdma.path 12 | import rdma.vtools 13 | 14 | ip_port = 4444 15 | tx_depth = 100 16 | memsize = 16*1024*1024 17 | 18 | infotype = namedtuple('infotype', 'path addr rkey size iters') 19 | 20 | class Endpoint(object): 21 | ctx = None; 22 | pd = None; 23 | cq = None; 24 | mr = None; 25 | peerinfo = None; 26 | 27 | def __init__(self,fp,sz,dev): 28 | self.ctx = rdma.get_verbs(dev) 29 | self.cc = self.ctx.comp_channel(); 30 | self.cq = self.ctx.cq(2*tx_depth,self.cc) 31 | self.poller = rdma.vtools.CQPoller(self.cq); 32 | self.pd = self.ctx.pd() 33 | self.qp = self.pd.qp(ibv.IBV_QPT_RC,tx_depth,self.cq,tx_depth,self.cq); 34 | self.mem = mmap(fp, sz) 35 | self.mr = self.pd.mr(self.mem, 36 | ibv.IBV_ACCESS_LOCAL_WRITE|ibv.IBV_ACCESS_REMOTE_WRITE) 37 | self.size = sz 38 | 39 | def __enter__(self): 40 | return self; 41 | 42 | def __exit__(self,*exc_info): 43 | return self.close(); 44 | 45 | def close(self): 46 | print "Endpoint:close" 47 | if self.ctx is not None: 48 | self.ctx.close(); 49 | 50 | def connect(self, peerinfo): 51 | self.peerinfo = peerinfo 52 | self.qp.establish(self.path,ibv.IBV_ACCESS_REMOTE_WRITE); 53 | 54 | def rdma(self): 55 | swr = ibv.send_wr(wr_id=0, 56 | remote_addr=self.peerinfo.addr, 57 | rkey=self.peerinfo.rkey, 58 | sg_list=self.mr.sge(), 59 | opcode=ibv.IBV_WR_RDMA_WRITE, 60 | send_flags=ibv.IBV_SEND_SIGNALED) 61 | 62 | n = 1 63 | depth = min(tx_depth, n, self.qp.max_send_wr) 64 | 65 | tpost = clock_monotonic() 66 | for i in xrange(depth): 67 | self.qp.post_send(swr) 68 | 69 | completions = 0 70 | posts = depth 71 | for wc in self.poller.iterwc(timeout=3): 72 | if wc.status != ibv.IBV_WC_SUCCESS: 73 | raise ibv.WCError(wc,self.cq,obj=self.qp); 74 | completions += 1 75 | if posts < n: 76 | self.qp.post_send(swr) 77 | posts += 1 78 | self.poller.wakeat = rdma.tools.clock_monotonic() + 1; 79 | if completions == n: 80 | break; 81 | else: 82 | raise rdma.RDMAError("CQ timed out"); 83 | 84 | tcomp = clock_monotonic() 85 | 86 | rate = self.size/1e6/(tcomp-tpost) 87 | print "%.1f MB/sec" % rate 88 | 89 | def client_mode(hostname,infilename,dev): 90 | f = open(infilename, "r+") 91 | sz = os.path.getsize(infilename) 92 | with Endpoint(f.fileno(), sz, dev) as end: 93 | ret = socket.getaddrinfo(hostname,str(ip_port),0, 94 | socket.SOCK_STREAM); 95 | ret = ret[0]; 96 | with contextlib.closing(socket.socket(ret[0],ret[1])) as sock: 97 | sock.connect(ret[4]); 98 | 99 | path = rdma.path.IBPath(dev,SGID=end.ctx.end_port.default_gid); 100 | rdma.path.fill_path(end.qp,path,max_rd_atomic=0); 101 | path.reverse(for_reply=False); 102 | 103 | sock.send(pickle.dumps(infotype(path=path, 104 | addr=end.mr.addr, 105 | rkey=end.mr.rkey, 106 | size=end.mem.size(), 107 | iters=1))) 108 | buf = sock.recv(1024) 109 | peerinfo = pickle.loads(buf) 110 | 111 | end.path = peerinfo.path; 112 | end.path.reverse(for_reply=False); 113 | end.path.end_port = end.ctx.end_port; 114 | 115 | print "path to peer %r\nMR peer raddr=%x peer rkey=%x"%( 116 | end.path,peerinfo.addr,peerinfo.rkey); 117 | 118 | end.connect(peerinfo) 119 | # Synchronize the transition to RTS 120 | sock.send("Ready"); 121 | sock.recv(1024); 122 | startTime = time.time(); 123 | end.rdma() 124 | endTime = time.time(); 125 | print "-- rmda end: elapsed time = %f " % (endTime - startTime) 126 | 127 | sock.shutdown(socket.SHUT_WR); 128 | sock.recv(1024); 129 | 130 | print "---client end" 131 | print "---sock close" 132 | print "--- endpoint close" 133 | 134 | def server_mode(outfilename, dev): 135 | ret = socket.getaddrinfo(None,str(ip_port),0, 136 | socket.SOCK_STREAM,0, 137 | socket.AI_PASSIVE); 138 | ret = ret[0]; 139 | with contextlib.closing(socket.socket(ret[0],ret[1])) as sock: 140 | sock.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1) 141 | sock.bind(ret[4]); 142 | sock.listen(1) 143 | 144 | print "Listening port..." 145 | s,addr = sock.accept() 146 | with contextlib.closing(s): 147 | totalStartTime = time.time(); 148 | 149 | peerInfoStartTime = time.time(); 150 | buf = s.recv(1024) 151 | peerinfo = pickle.loads(buf) 152 | peerInfoEndTime = time.time(); 153 | print "sz = ", peerinfo.size 154 | print "--peerinfo: elapsed = %f secs" % (peerInfoEndTime - peerInfoStartTime) 155 | 156 | fseekStartTime = time.time(); 157 | f = open(outfilename, "w+") 158 | f.seek(peerinfo.size - 1); 159 | f.write("\0") 160 | f.flush() 161 | f.seek(0) 162 | fseekEndTime = time.time(); 163 | print "--fseek : elapsed = %f secs" % (fseekEndTime - fseekStartTime) 164 | 165 | endPointStartTime = time.time(); 166 | with Endpoint(f.fileno(), peerinfo.size, dev) as end: 167 | with rdma.get_gmp_mad(end.ctx.end_port,verbs=end.ctx) as umad: 168 | end.path = peerinfo.path; 169 | end.path.end_port = end.ctx.end_port; 170 | rdma.path.fill_path(end.qp,end.path); 171 | rdma.path.resolve_path(umad,end.path); 172 | 173 | s.send(pickle.dumps(infotype(path=end.path, 174 | addr=end.mr.addr, 175 | rkey=end.mr.rkey, 176 | size=None, 177 | iters=None))) 178 | 179 | print "path to peer %r\nMR peer raddr=%x peer rkey=%x"%( 180 | end.path,peerinfo.addr,peerinfo.rkey); 181 | 182 | end.connect(peerinfo) 183 | endPointEndTime = time.time(); 184 | print "--endpoint: elapsed = %f secs" % (endPointEndTime - endPointStartTime) 185 | 186 | startTime = time.time(); 187 | 188 | # Synchronize the transition to RTS 189 | s.send("ready"); 190 | s.recv(1024); 191 | 192 | s.shutdown(socket.SHUT_WR); 193 | s.recv(1024); 194 | 195 | endTime = time.time(); 196 | 197 | print "--xfer end: elapsed = %f secs" % (endTime - startTime) 198 | print "--total : elapsed = %f secs" % (endTime - totalStartTime) 199 | #f = open(outfilename, "wb") 200 | #data = end.mem.read(peerinfo.size) 201 | #f.write(data) 202 | f.close() 203 | 204 | def main(): 205 | 206 | if len(sys.argv) < 2: 207 | print "Usage: [server] %s outputfilename" % sys.argv[0] 208 | print " [client] %s ipaddr inputfile" % sys.argv[0] 209 | sys.exit(1) 210 | 211 | if len(sys.argv) == 3: 212 | client_mode(sys.argv[1], sys.argv[2], rdma.get_end_port()) 213 | else: 214 | server_mode(sys.argv[1], rdma.get_end_port()) 215 | return True; 216 | 217 | main() 218 | --------------------------------------------------------------------------------