├── .gitignore ├── helloworld.py ├── memcached1.py ├── memcached2.py ├── memcached_client.py ├── memcached_client2.py ├── memcached_client3.py ├── memcached_client4.py ├── memcached_client5.py ├── memcached_server.py ├── proxy1.py ├── proxy2.py ├── proxy3.py ├── proxy4.py ├── proxy5.py ├── proxy6.py ├── timingclient.py ├── tutorial.txt └── upperserver.py /.gitignore: -------------------------------------------------------------------------------- 1 | *.pyc 2 | -------------------------------------------------------------------------------- /helloworld.py: -------------------------------------------------------------------------------- 1 | from twisted.internet import protocol 2 | 3 | class HelloWorldProtocol(protocol.Protocol): 4 | def dataReceived(self, data): 5 | print data 6 | 7 | class HelloWorldFactory(protocol.ServerFactory): 8 | protocol = HelloWorldProtocol 9 | 10 | 11 | from twisted.internet import reactor, endpoints 12 | 13 | endpoint = endpoints.TCP4ServerEndpoint(reactor, 8000) 14 | factory = HelloWorldFactory() 15 | endpoint.listen(factory) 16 | 17 | reactor.run() 18 | -------------------------------------------------------------------------------- /memcached1.py: -------------------------------------------------------------------------------- 1 | class Memcached(object): 2 | def __init__(self): 3 | self.store = {} 4 | 5 | def get(self, key): 6 | return self.store[key] 7 | 8 | def set(self, key, value, flags, timeout=0): 9 | self.store[key] = (value, flags) 10 | return key 11 | 12 | def delete(self, key): 13 | del self.store[key] 14 | 15 | -------------------------------------------------------------------------------- /memcached2.py: -------------------------------------------------------------------------------- 1 | class Memcached(object): 2 | def __init__(self, reactor=None): 3 | if reactor is None: 4 | from twisted.internet import reactor 5 | self.reactor = reactor 6 | self.store = {} 7 | self.timeouts = {} 8 | 9 | def get(self, key): 10 | return self.store[key] 11 | 12 | def set(self, key, value, flags, timeout=0): 13 | self.cancelTimeout(key) 14 | self.store[key] = (value, flags) 15 | if timeout > 0: 16 | timeoutCall = self.reactor.callLater(timeout, self.delete, key) 17 | self.timeouts[key] = timeoutCall 18 | return key 19 | 20 | def delete(self, key): 21 | self.cancelTimeout(key) 22 | del self.store[key] 23 | 24 | def cancelTimeout(self, key): 25 | dc = self.timeouts.get(key) 26 | if dc and dc.active(): 27 | dc.cancel() 28 | 29 | 30 | -------------------------------------------------------------------------------- /memcached_client.py: -------------------------------------------------------------------------------- 1 | from twisted.protocols import basic 2 | 3 | class MemcachedGetProtocol(basic.LineReceiver): 4 | def get(self, key): 5 | self.key = key 6 | self.value = None 7 | self.flags = None 8 | self.sendLine('get %s' % self.key) 9 | 10 | def lineReceived(self, line): 11 | if line.startswith('VALUE'): 12 | _, key, flags, length = line.split() 13 | self.flags = int(flags) 14 | self.length = int(length) 15 | self.buffer = [] 16 | self.setRawMode() 17 | elif line.startswith('END'): 18 | print 'GOT VALUE', self.value, self.flags 19 | self.transport.loseConnection() 20 | 21 | def rawDataReceived(self, data): 22 | self.buffer.append(data) 23 | raw = ''.join(self.buffer) 24 | if len(raw) >= self.length + len('\r\n'): 25 | self.value = raw[:self.length] 26 | rest = raw[self.length + len('\r\n'):] 27 | self.setLineMode(rest) 28 | 29 | from twisted.internet import protocol 30 | class MemcachedGetFactory(protocol.Factory): 31 | protocol = MemcachedGetProtocol 32 | 33 | from twisted.internet import reactor, endpoints 34 | 35 | endpoint = endpoints.TCP4ClientEndpoint(reactor, '127.0.0.1', 11211) 36 | factory = MemcachedGetFactory() 37 | d = endpoint.connect(factory) 38 | def got_protocol(p): 39 | p.get('a') 40 | d.addCallback(got_protocol) 41 | 42 | reactor.run() 43 | -------------------------------------------------------------------------------- /memcached_client2.py: -------------------------------------------------------------------------------- 1 | from twisted.protocols import basic 2 | from twisted.internet import defer 3 | 4 | class MemcachedGetProtocol(basic.LineReceiver): 5 | def get(self, key): 6 | self.key = key 7 | self.value = None 8 | self.flags = None 9 | self.deferred = defer.Deferred() 10 | self.sendLine('get %s' % self.key) 11 | return self.deferred 12 | 13 | def lineReceived(self, line): 14 | if line.startswith('VALUE'): 15 | _, key, flags, length = line.split() 16 | self.flags = int(flags) 17 | self.length = int(length) 18 | self.buffer = [] 19 | self.setRawMode() 20 | elif line.startswith('END'): 21 | self.deferred.callback((self.value, self.flags)) 22 | self.transport.loseConnection() 23 | 24 | def rawDataReceived(self, data): 25 | self.buffer.append(data) 26 | raw = ''.join(self.buffer) 27 | if len(raw) >= self.length + len('\r\n'): 28 | self.value = raw[:self.length] 29 | rest = raw[self.length + len('\r\n'):] 30 | self.setLineMode(rest) 31 | 32 | from twisted.internet import protocol 33 | class MemcachedGetFactory(protocol.Factory): 34 | protocol = MemcachedGetProtocol 35 | 36 | if __name__ == '__main__': 37 | from twisted.internet import reactor, endpoints 38 | 39 | endpoint = endpoints.TCP4ClientEndpoint(reactor, '127.0.0.1', 11211) 40 | factory = MemcachedGetFactory() 41 | d = endpoint.connect(factory) 42 | def got_protocol(p): 43 | deferredValue = p.get('a') 44 | def got_v((value, flags)): 45 | print 'VALUE', value, flags 46 | reactor.stop() 47 | deferredValue.addCallback(got_v) 48 | 49 | d.addCallback(got_protocol) 50 | 51 | reactor.run() 52 | -------------------------------------------------------------------------------- /memcached_client3.py: -------------------------------------------------------------------------------- 1 | from twisted.protocols import basic 2 | from twisted.internet import defer 3 | 4 | class MemcachedSetProtocol(basic.LineReceiver): 5 | def set(self, key, value, flags, timeout=0): 6 | self.deferred = defer.Deferred() 7 | length = len(value) 8 | self.sendLine('set %s %d %d %d' % (key, flags, timeout, length)) 9 | self.transport.write(value) 10 | self.sendLine('') 11 | return self.deferred 12 | 13 | def lineReceived(self, line): 14 | if line == 'STORED': 15 | self.deferred.callback(None) 16 | 17 | 18 | if __name__ == '__main__': 19 | from twisted.internet import reactor, endpoints, protocol 20 | 21 | endpoint = endpoints.TCP4ClientEndpoint(reactor, '127.0.0.1', 11211) 22 | factory = protocol.Factory() 23 | factory.protocol = MemcachedSetProtocol 24 | d = endpoint.connect(factory) 25 | def got_protocol(p): 26 | deferredValue = p.set('a', '123', 12, 0) 27 | def set_v(_): 28 | print 'STORED' 29 | reactor.stop() 30 | deferredValue.addCallback(set_v) 31 | 32 | d.addCallback(got_protocol) 33 | 34 | reactor.run() 35 | -------------------------------------------------------------------------------- /memcached_client4.py: -------------------------------------------------------------------------------- 1 | from twisted.internet import protocol, defer 2 | from memcached_client2 import MemcachedGetProtocol 3 | from memcached_client3 import MemcachedSetProtocol 4 | 5 | getFactory = protocol.Factory() 6 | getFactory.protocol = MemcachedGetProtocol 7 | 8 | setFactory = protocol.Factory() 9 | setFactory.protocol = MemcachedSetProtocol 10 | 11 | class MemcachedClient(object): 12 | def __init__(self, endpoint): 13 | self.endpoint = endpoint 14 | 15 | def get(self, key): 16 | deferredValue = defer.Deferred() 17 | d = self.endpoint.connect(getFactory) 18 | def got_protocol(p): 19 | d2 = p.get(key) 20 | def got_response(r): 21 | deferredValue.callback(r) 22 | d2.addCallback(got_response) 23 | d.addCallback(got_protocol) 24 | return deferredValue 25 | 26 | def set(self, key, value, flags, timeout=0): 27 | deferredValue = defer.Deferred() 28 | d = self.endpoint.connect(setFactory) 29 | def got_protocol(p): 30 | d2 = p.set(key, value, flags, timeout) 31 | def got_response(r): 32 | deferredValue.callback(r) 33 | d2.addCallback(got_response) 34 | d.addCallback(got_protocol) 35 | return deferredValue 36 | 37 | if __name__ == '__main__': 38 | 39 | from twisted.internet import reactor, endpoints 40 | endpoint = endpoints.TCP4ClientEndpoint(reactor, '127.0.0.1', 11211) 41 | 42 | client = MemcachedClient(endpoint) 43 | d = client.set('a', 'asdf', 0) 44 | def set(_): 45 | print 'SET' 46 | reactor.stop() 47 | 48 | d.addCallback(set) 49 | reactor.run() 50 | 51 | 52 | 53 | 54 | 55 | -------------------------------------------------------------------------------- /memcached_client5.py: -------------------------------------------------------------------------------- 1 | from twisted.internet import protocol 2 | from memcached_client2 import MemcachedGetProtocol 3 | from memcached_client3 import MemcachedSetProtocol 4 | 5 | getFactory = protocol.Factory() 6 | getFactory.protocol = MemcachedGetProtocol 7 | 8 | setFactory = protocol.Factory() 9 | setFactory.protocol = MemcachedSetProtocol 10 | 11 | class MemcachedClient(object): 12 | def __init__(self, endpoint): 13 | self.endpoint = endpoint 14 | 15 | def get(self, key): 16 | d = self.endpoint.connect(getFactory) 17 | def got_protocol(p): 18 | return p.get(key) 19 | d.addCallback(got_protocol) 20 | return d 21 | 22 | def set(self, key, value, flags, timeout=0): 23 | d = self.endpoint.connect(setFactory) 24 | def got_protocol(p): 25 | return p.set(key, value, flags, timeout) 26 | d.addCallback(got_protocol) 27 | return d 28 | 29 | if __name__ == '__main__': 30 | from twisted.internet import reactor, endpoints 31 | endpoint = endpoints.TCP4ClientEndpoint(reactor, '127.0.0.1', 11211) 32 | client = MemcachedClient(endpoint) 33 | d = client.get('a') 34 | def got_value(r): 35 | print 'GOT', r 36 | reactor.stop() 37 | 38 | d.addCallback(got_value) 39 | reactor.run() 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | -------------------------------------------------------------------------------- /memcached_server.py: -------------------------------------------------------------------------------- 1 | from twisted.protocols import basic 2 | 3 | class MemcachedServerProtocol(basic.LineReceiver): 4 | def __init__(self, store): 5 | self.store = store 6 | 7 | def lineReceived(self, line): 8 | if line.startswith('get'): 9 | _, key = line.split() 10 | self.handle_get(key) 11 | 12 | elif line.startswith('set'): 13 | _, key, flags, timeout, length = line.split() 14 | self.handle_set(key, int(flags), int(timeout), int(length)) 15 | 16 | def handle_get(self, key): 17 | try: 18 | value, flags = self.store.get(key) 19 | self.sendLine('VALUE %s %d %d' % (key, flags, len(value))) 20 | self.transport.write(value) 21 | self.sendLine('') 22 | except KeyError: 23 | pass 24 | finally: 25 | self.sendLine('END') 26 | 27 | def handle_set(self, key, flags, timeout, length): 28 | self.buffer = [] 29 | self.key = key 30 | self.flags = flags 31 | self.timeout = timeout 32 | self.length = length 33 | self.setRawMode() 34 | 35 | def rawDataReceived(self, data): 36 | self.buffer.append(data) 37 | raw = ''.join(self.buffer) 38 | if len(raw) >= self.length + len('\r\n'): 39 | value = raw[:self.length] 40 | rest = raw[self.length + len('\r\n'):] 41 | self.store.set(self.key, value, self.flags, self.timeout) 42 | self.sendLine('STORED') 43 | self.setLineMode(rest) 44 | 45 | 46 | 47 | from memcached1 import Memcached 48 | from twisted.internet import protocol 49 | 50 | class MemcachedServerFactory(protocol.ServerFactory): 51 | def __init__(self): 52 | self.store = Memcached() 53 | 54 | def buildProtocol(self, addr): 55 | p = MemcachedServerProtocol(self.store) 56 | return p 57 | 58 | from twisted.internet import reactor, endpoints 59 | 60 | endpoint = endpoints.TCP4ServerEndpoint(reactor, 11211) 61 | factory = MemcachedServerFactory() 62 | endpoint.listen(factory) 63 | 64 | reactor.run() 65 | -------------------------------------------------------------------------------- /proxy1.py: -------------------------------------------------------------------------------- 1 | import time 2 | import urllib2 3 | 4 | from twisted.protocols import basic 5 | 6 | class ProxyProtocol(basic.LineReceiver): 7 | def lineReceived(self, url): 8 | if not url.startswith('http://'): 9 | return 10 | start = time.time() 11 | print 'fetching', url 12 | connection = urllib2.urlopen(url) 13 | data = connection.read() 14 | print 'fetched', url, 15 | self.transport.write(data) 16 | self.transport.loseConnection() 17 | print 'in', time.time() - start 18 | 19 | from twisted.internet import protocol 20 | class ProxyFactory(protocol.ServerFactory): 21 | protocol = ProxyProtocol 22 | 23 | from twisted.internet import reactor, endpoints 24 | 25 | endpoint = endpoints.TCP4ServerEndpoint(reactor, 8000) 26 | factory = ProxyFactory() 27 | endpoint.listen(factory) 28 | 29 | reactor.run() 30 | -------------------------------------------------------------------------------- /proxy2.py: -------------------------------------------------------------------------------- 1 | import time 2 | from twisted.web import client 3 | 4 | from twisted.protocols import basic 5 | 6 | class ProxyProtocol(basic.LineReceiver): 7 | 8 | def lineReceived(self, url): 9 | if not url.startswith('http://'): 10 | return 11 | start = time.time() 12 | print 'fetching', url 13 | deferredData = client.getPage(url) 14 | 15 | def urlFetched(data): 16 | self.transport.write(data) 17 | self.transport.loseConnection() 18 | print 'fetched', url, 19 | print 'in', time.time() - start 20 | 21 | deferredData.addCallback(urlFetched) 22 | 23 | from twisted.internet import protocol 24 | class ProxyFactory(protocol.ServerFactory): 25 | protocol = ProxyProtocol 26 | 27 | from twisted.internet import reactor, endpoints 28 | 29 | endpoint = endpoints.TCP4ServerEndpoint(reactor, 8000) 30 | factory = ProxyFactory() 31 | endpoint.listen(factory) 32 | 33 | reactor.run() 34 | -------------------------------------------------------------------------------- /proxy3.py: -------------------------------------------------------------------------------- 1 | import time 2 | from twisted.web import client 3 | 4 | from twisted.protocols import basic 5 | 6 | class ProxyProtocol(basic.LineReceiver): 7 | 8 | def urlFetched(self, data, url, start): 9 | self.transport.write(data) 10 | self.transport.loseConnection() 11 | print 'fetched', url, 12 | print 'in', time.time() - start 13 | 14 | def lineReceived(self, url): 15 | if not url.startswith('http://'): 16 | return 17 | start = time.time() 18 | print 'fetching', url 19 | deferredData = client.getPage(url) 20 | deferredData.addCallback(self.urlFetched, url, start) 21 | 22 | from twisted.internet import protocol 23 | class ProxyFactory(protocol.ServerFactory): 24 | protocol = ProxyProtocol 25 | 26 | from twisted.internet import reactor, endpoints 27 | 28 | endpoint = endpoints.TCP4ServerEndpoint(reactor, 8000) 29 | factory = ProxyFactory() 30 | endpoint.listen(factory) 31 | 32 | reactor.run() 33 | -------------------------------------------------------------------------------- /proxy4.py: -------------------------------------------------------------------------------- 1 | import time 2 | from twisted.web import client 3 | 4 | from twisted.protocols import basic 5 | 6 | class CachingProxyProtocol(basic.LineReceiver): 7 | def __init__(self, cache): 8 | self.cache = cache 9 | 10 | def urlFetched(self, data, url, start): 11 | self.transport.write(data) 12 | self.transport.loseConnection() 13 | print 'fetched', url, 14 | print 'in', time.time() - start 15 | 16 | def storeInCache(self, data, url): 17 | self.cache[url] = data 18 | return data 19 | 20 | def lineReceived(self, url): 21 | if not url.startswith('http://'): 22 | return 23 | start = time.time() 24 | print 'fetching', url 25 | if url in self.cache: 26 | data = self.cache[url] 27 | self.urlFetched(data, url, start) 28 | else: 29 | deferredData = client.getPage(url) 30 | deferredData.addCallback(self.storeInCache, url) 31 | deferredData.addCallback(self.urlFetched, url, start) 32 | 33 | from twisted.internet import protocol 34 | class ProxyFactory(protocol.ServerFactory): 35 | def __init__(self): 36 | self.cache = {} 37 | 38 | def buildProtocol(self, addr): 39 | p = CachingProxyProtocol(self.cache) 40 | return p 41 | 42 | from twisted.internet import reactor, endpoints 43 | 44 | endpoint = endpoints.TCP4ServerEndpoint(reactor, 8000) 45 | factory = ProxyFactory() 46 | endpoint.listen(factory) 47 | 48 | reactor.run() 49 | -------------------------------------------------------------------------------- /proxy5.py: -------------------------------------------------------------------------------- 1 | import time 2 | from twisted.web import client 3 | from twisted.protocols import basic 4 | 5 | from twisted.internet import defer 6 | 7 | def getCachedPage(url, cache): 8 | if url in cache: 9 | data = cache[url] 10 | return defer.succeed(data) 11 | else: 12 | def _storeInCache(data, url): 13 | cache[url] = data 14 | return data 15 | d = client.getPage(url) 16 | d.addCallback(_storeInCache, url) 17 | return d 18 | 19 | class CachingProxyProtocol(basic.LineReceiver): 20 | def __init__(self, cache): 21 | self.cache = cache 22 | 23 | def lineReceived(self, url): 24 | if not url.startswith('http://'): 25 | return 26 | start = time.time() 27 | print 'fetching', url 28 | deferredData = getCachedPage(url, self.cache) 29 | deferredData.addCallback(self.urlFetched, url, start) 30 | 31 | def urlFetched(self, data, url, start): 32 | self.transport.write(data) 33 | self.transport.loseConnection() 34 | print 'fetched', url, 35 | print 'in', time.time() - start 36 | 37 | from twisted.internet import protocol 38 | class ProxyFactory(protocol.ServerFactory): 39 | def __init__(self): 40 | self.cache = {} 41 | 42 | def buildProtocol(self, addr): 43 | p = CachingProxyProtocol(self.cache) 44 | return p 45 | 46 | from twisted.internet import reactor, endpoints 47 | 48 | endpoint = endpoints.TCP4ServerEndpoint(reactor, 8000) 49 | factory = ProxyFactory() 50 | endpoint.listen(factory) 51 | 52 | reactor.run() 53 | -------------------------------------------------------------------------------- /proxy6.py: -------------------------------------------------------------------------------- 1 | import time 2 | from twisted.web import client 3 | from twisted.protocols import basic 4 | 5 | def getMemcachedPage(url, memcacheClient, timeout=0): 6 | d = memcacheClient.get(url) 7 | def got((v, f)): 8 | if v is not None: 9 | return v 10 | else: 11 | d = client.getPage(url) 12 | def _storeInCache(data, url): 13 | d = memcacheClient.set(url, data, 0, timeout) 14 | d.addCallback(lambda _: data) 15 | return d 16 | d.addCallback(_storeInCache, url) 17 | return d 18 | d.addCallback(got) 19 | return d 20 | 21 | class CachingProxyProtocol(basic.LineReceiver): 22 | def __init__(self, cache): 23 | self.cache = cache 24 | 25 | def lineReceived(self, url): 26 | if not url.startswith('http://'): 27 | return 28 | start = time.time() 29 | print 'fetching', url 30 | deferredData = getMemcachedPage(url, self.cache) 31 | deferredData.addCallback(self.urlFetched, url, start) 32 | 33 | def urlFetched(self, data, url, start): 34 | self.transport.write(data) 35 | self.transport.loseConnection() 36 | print 'fetched', url, 37 | print 'in', time.time() - start 38 | 39 | from twisted.internet import protocol 40 | from memcached_client5 import MemcachedClient 41 | class ProxyFactory(protocol.ServerFactory): 42 | def __init__(self): 43 | self.cache = MemcachedClient( 44 | endpoints.TCP4ClientEndpoint(reactor, '127.0.0.1', 11211)) 45 | 46 | def buildProtocol(self, addr): 47 | p = CachingProxyProtocol(self.cache) 48 | return p 49 | 50 | from twisted.internet import reactor, endpoints 51 | 52 | endpoint = endpoints.TCP4ServerEndpoint(reactor, 8000) 53 | factory = ProxyFactory() 54 | endpoint.listen(factory) 55 | 56 | reactor.run() 57 | -------------------------------------------------------------------------------- /timingclient.py: -------------------------------------------------------------------------------- 1 | import threading 2 | import time 3 | import socket 4 | 5 | def make_connection(host, port, data_to_send): 6 | s = socket.socket(socket.AF_INET, socket.SOCK_STREAM) 7 | s.connect((host, port)) 8 | s.send(data_to_send) 9 | s.send('\r\n') 10 | b = [] 11 | while True: 12 | data = s.recv(1024) 13 | if data: 14 | b.append(data) 15 | else: 16 | break 17 | 18 | return ''.join(b) 19 | 20 | 21 | def t_connection(host, port, d): 22 | start = time.time() 23 | make_connection(host, port, d) 24 | print d, 'took', time.time() - start 25 | 26 | if __name__ == '__main__': 27 | 28 | import sys 29 | host, port = sys.argv[1].split(':') 30 | data_to_send = sys.argv[2:] 31 | 32 | threads = [] 33 | overallstart = time.time() 34 | for d in data_to_send: 35 | t = threading.Thread(target=t_connection, args=(host, int(port), d)) 36 | t.start() 37 | threads.append(t) 38 | 39 | for t in threads: 40 | t.join() 41 | 42 | print 'FINISHED in', time.time() - overallstart 43 | 44 | -------------------------------------------------------------------------------- /tutorial.txt: -------------------------------------------------------------------------------- 1 | ================ 2 | Twisted Tutorial 3 | ================ 4 | 5 | Network I/O 101 6 | =============== 7 | 8 | Twisted is a library that can help you easily build networked applications in Python. While Twisted manages to abstract away most of the low-level network details, having at least a high-level understanding of network I/O will be very helpful. 9 | 10 | At the very basic level, when you connect two processes using a mechanism that allows them to exchange data, you have networked them. It helps imagining two physical machines connected via a piece wire (FOOTNOTE: this metaphor doesn't stand for some mechanisms like UDP, but let's ignore that for now). 11 | 12 | Once two machines are connected via a piece of wire, they can start using it. A single machine can write data at its end of the wire, and after some time it will arrive at the other end of the wire. Also, at any time data may arrive on the same end of the wire. 13 | 14 | Event-driven programming 15 | ------------------------ 16 | 17 | We've already found one complication of network programming - data may arrive at any time. So some kind of mechanism must be in place in order to read the data and forward it to interested parties. 18 | 19 | This is a form of event-driven programming, similar to GUI programming. GUI programs wait for a user to generate events such as "mouse clicked" or "key pressed" and will run specific methods to handle those events. When doing network programming, the events are streams of bytes arriving at your end of the wire. 20 | 21 | The usual way to deal with events arrinving at any possible time is to have a constant running loop that will periodically check if there is a new event and forward it to whoever might be listening. In GUI programs the OS maintains a queue of events and your program will have a UI loop that pulls events off this queue. 22 | 23 | Blocking I/O 24 | ------------ 25 | 26 | Unfortunately, network programs have one extra bit of complication. I f you try to read data off a socket (that's the end of the wire) that has no data in it, your program will suspend execution until some data arrives. For small programs that just need to do one thing, this is not a problem. However, when you have a long-running process, such a web server, that needs to stay responsive to other clients, this becomes an issue. 27 | 28 | The usual way of avoiding suspening execution, or "blocking", is to put independent socket operations in different threads or even processes. This way, one can have a central service that just handles new connections, but the actual data back-and-forth happens in parallel. 29 | 30 | This approach, while completely correct and proven over the years, has some drawbacks: 31 | 32 | * Every thread or process launched will consume some amount of memory. This means that the number of concurrent connections is limited by the available memory size. 33 | * Writing large multi-threaded programs is hard. Avoiding starvation, deadlocks, guarding access to shared data with locks, synchronisation etc is very hard to get right and even harder to debug. 34 | * Writing multi-process programs means that you have to use some kind of inter-process communication to pass data back and forth, which has a certain overhead. 35 | 36 | Non-blocking I/O 37 | ---------------- 38 | 39 | The alternative approach is to open the socket in a non-blocking mode. This way, before you commit to read or write to a socket, you can check to see if it is ready. If it is, the OS guarantees that the operation will not block. If it isn't, you just check again a bit later. 40 | 41 | This sounds like a very good solution, and indeed you can implement it yourself using a good socket library. Unfortunately, there are a lot of things that need to be done just in the right way to ensure that no network operations will ever block, and if you need to support multiple platforms then there are subtle edge cases you need to worry about. 42 | 43 | Fortunately, Twisted has solved all those problems and will give you non-blocking network I/O on most major platforms for free, leaving you free to focus on your actual application instead of worrying about low-level network details (FOOTNOTE: All abstractions are leaky. An understanding of the basics of networks can be very beneficial). Twisted provides an event loop that will forward data arriving from the network to interested handlers, and will also take care of sending data over the network to remote machines. This event loop all runs in a single thread, removing any reason to worry about synchronising data access or any other threading issues. 44 | 45 | My first server 46 | =============== 47 | 48 | Let's see an actual example of this. We'll write a server that listens on a TCP port, allows multiple clients to connect at the same time, and will print all data to its standard output. 49 | 50 | First we need to setup the event loop. In Twisted we do this by using a global object called "reactor": 51 | 52 | ..code: 53 | from twisted.internet import reactor 54 | reactor.run() 55 | 56 | When you run the snippet above, you will see that your program goes into an infinite loop, because the reactor loop has taken over. You will need to kill your process with ctrl-c to get your shell back. 57 | 58 | Defining behaviour 59 | ------------------ 60 | 61 | The next step is to install a network event handler. The first event we care about is someone connecting to our server. When this happens, we should create a separate instance of a class for each connection, to ensure client-specific state is separated (if we were doing this with threads, we'd use a threadlocal object. However Twisted is single-threaded, so we can use plain Object Oriented Programming approaches). The classes that we'll be creating must implement twisted.internet.interfaces.IProtocol. 62 | 63 | The design pattern of something that creates instances is called a Factory, so our "connection made" handler will be a twisted.internet.interfaces.IProtocolFactory. 64 | 65 | Twisted fortunately has concrete implementations of those interfaces. So we will be using twisted.internet.protocol.Factory and we'll subclass twisted.internet.protocol.Protocol to provide our custom behaviour. 66 | 67 | (FOOTNOTE: While Twisted has default implementations for those interfaces, you are free to implement your own should you ever need to. Just make sure you conform to the interface. It usually is a better idea to subclass BaseProtocol than to start from scratch.) 68 | 69 | .. code: helloworld.py 1:10 70 | 71 | So, at first we do the necessary imports. Then we define a very simple protocol, that will print whatever data arrives in its standard output. Then we define a factory that on every connection will create instances of the HelloWorldProtocol. Note how none of this knows or cares of where the data comes from. Twisted will deal with creating instances of the protocol as new connections arrive, and will call 'dataReceived' whenever data becomes available. 72 | 73 | (SIDENOTE: `dataReceived` will be called with arbitrary amounts of data. The protocol class is required to buffer incoming data and try to divide them in meaningful messages. The best way is to assume you will receive data one byte at a time. That said, Twisted has a lot of useful protocol subclasses that help with usual message formats such as line-separated, length prefixed etc. See `twisted.protocols` for a complete list.) 74 | 75 | Listening on a port 76 | ------------------- 77 | 78 | The final piece is installing the factory. Since we want a TCP server, we will attach it to a specific TCP port. To do this, we need to use something called an Endpoint. 79 | 80 | Endpoints are a bit like our end of the wire. You create an endpoint of a specific type - in our case a TCP endpoint - with transport-specific paramters - in our case, a port number. Then you attach a factory instance to it. 81 | 82 | .. code: helloworld.py 11:17 83 | 84 | So, we start again with necessary imports. We then create an endpoint instance and a factory instance and bind them together. Finally we start the reactor loop. 85 | 86 | (SIDENOTE: While the reactor is used like a global object, new code is advised to accept reactor as a parameter. This allows for much better testing, passing special-behavior reactors, and in the future will allow for multiple reactors running together) 87 | 88 | (SIDENOTE: Endpoints are a fairly recent (in Twisted time) API. If you have an older version of Twisted, you will need to use the equivalent but less flexible approach of doing `reactor.listenTCP(8000, factory)`.) 89 | 90 | Trying it out 91 | ------------- 92 | 93 | Try running the complete listing on a shell (watch out for firewalls blocking access!) and from another shell do `telnet 127.0.0.1 8000`. Start typing data into telnet (telnet will only send them across the wire when you hit enter - do not rely on this behaviour!). Kill the telnet process by sending the escape character (usually `ctrl-]`) followed by `ctrl-c`. Try starting multiple parallel telnet sessions. You'll find that the server quite happily accepts multiple connections. 94 | ` 95 | 96 | Sending data back to the client 97 | =============================== 98 | 99 | We've already made a huge step - we wrote a non-blocking server that accepts data from multiple clients in only a few lines of code. While this may be useful if you want to do things like logging, at most cases you'd want to send something back to the client. 100 | 101 | The way to send data to the client is by using the `transport` attribute on our protocol instance. This is an implementation of `twisted.internet.interfaces.ITransport` we can think about it as the end of the wire you write data into. This happens by using `write` or `writeSequence`. 102 | 103 | So, let's extend our server to make it send every piece of data it receives back to the client, but uppercased! We do this in our `dataReceived` handler. 104 | 105 | ..code: upperserver.py 3:9 106 | 107 | So just like that we can send data back to the client. 108 | 109 | Something a bit more useful 110 | =========================== 111 | 112 | So far we've covered the very basics of writing a server. However, even with this small knowledge we can go very very far. We will now write an almost-compliant memcached server that you can use on your webserver with any compatible memcached client. 113 | 114 | The basic stuff 115 | --------------- 116 | 117 | At its core, memcached is a key-value store. So our implementation will be based on a plain python dictionary. 118 | 119 | .. code: memcached1.py 120 | 121 | Note we are ignoring the timeout value for simplicity. By the end of this tutorial we'll have built a truly compliant memcached server. 122 | 123 | Sharing state 124 | ------------- 125 | 126 | Memcached is useful because multiple clients can connect to it and shared the same data. Therefore, we will need to create a single instance of `Memcached` and share it between different protocol instances. We do this by passing a reference to it when we build a protocol instance in our factory. 127 | 128 | .. code: memcached_server.py 53:63 129 | 130 | Our Factory now has an `__init__` method, and we also override the `buildProtocol` method to create a protocol instance with specific arguments. 131 | 132 | (SIDENOTE: You will find a lot of Twisted examples or code that uses a slightly different approach of relying on the default implementation of `twisted.internet.protocol.Factory` that sets a `factory` attribute on all protocol instances it creates, pointing to itself. This way a protocol instance can access shared state by doing `self.factory.store`. While this is normal python usage, newcomers tend to be confused and assume some magic is going on. It is recommended that a Protocol class accepts any dependencies as arguments for code clarity and of course testability.) 133 | 134 | Specification 135 | ------------- 136 | 137 | This factory will not change throughout the tutorial. Let's see the protocol next. We should start at the memcached protocol spefication (actually, a subset of it - get and set commands). 138 | 139 | A clients sends a 'get' command by sending the following string of bytes: `get \r\n` where the key is a string. A 'set' command is initiated by first sending `set \r\n` where flags is an integer that can be retrieved later, timeout is the number of seconds this entry will stay around and length is the length of the data that will represent the value. Then the client sends number of bytes, followed by `\r\n`. Keys may not contain spaces or newlines, and must be less than 250 characters. 140 | 141 | Example: 142 | 143 | .. pre: 144 | 145 | set example_key 0 3600 13\r\n 146 | example_value\r\n 147 | 148 | (line break added for clarity) 149 | 150 | Decoding messages 151 | ----------------- 152 | 153 | Remember how `dataReceived` is called with any data that is available and it is the protocol's job to decode that into actual messages? In this case, it seems that splitting at lines is important. We could do it ourselves by buffering the data and looking for new line delimiters, but this a so frequent example that Twisted has already support for it, in the form of `twisted.protocols.LineReceiver`. This is a Protocol subclass that instead of dataReceived exposes `lineReceived` method that will only be called with complete lines (without the delimiter). 154 | 155 | .. code: memcached_server.py 1:15 156 | 157 | Much easier - now lineReceived just parses the arguments and delegates ot other methods to do the actual work. 158 | 159 | The spec for responding to a `get` request is straightforward. If the requested key is in the store, we respond with `VALUE \r\n`, then the raw data followed by '\r\n' then `END\r\n`. If the key is not in the store, just send `END\r\n`. 160 | 161 | .. code: memcached_server.py 16:25 162 | 163 | So, this is exactly what we do. We use the `sendLine` convenience method of `basic.LineReceiver` when we need to have `\r\n` delimiters, and `self.transport.write` when we need to send raw data. 164 | 165 | The spec for responding to a `set` request is a bit more involved. This is because the client will send `set \r\n` followed by `length` bytes of data followed by `\r\n`. Since the raw data *can* contain newlines or indeed any binary data, we cannot use `lineReceived` as it will try to parse them. Fortunately `basic.LineReceiver` has a way to do just that, called `setRawMode`. Calling this will deactivate the line parsing behaviour and data will be delivered instead using the `rawDataReceived` method. When we want to switch the line parsing mode on again, we'll call `setLineMode` passing in any superfluous data we might have received. 166 | 167 | When the server has managed to store the value, it will send `STORED` back. 168 | 169 | (SIDENOTE: This kind of behaviour is very usual when designing or implementing protocols. Fortunately Twisted has already support for a lot of protocols in `twisted.protocols` and a lot of utility class ready for subclassing in `twisted.protocols.basic`.) 170 | 171 | (TODO: find if the client is allowed to send new requests if a `STORED` is pending) 172 | 173 | ..code: memcached_server.py 27:43 174 | 175 | So, when `handle_set` is called the first thing it does is initialise some instance variables that will be passed on to the actual store later on (if this seems a bit unsafe, remember that this protocol instance will handle data only from a single client connection), and then goes into raw data mode. 176 | 177 | `rawDataReceived` is then called repeatedly, buffering the received data. Once `` amount of bytes has been received, plus the newline required by the spec, we store everything in the store, reply with 'STORED\r\n' and switch line mode back on. Since we might have received more data than we can handle (for example, the start of a new command) we need to pass that in `setLineMode` so that the line parsing mechanism can pass them into the next call of `lineReceived`. 178 | 179 | (SIDENOTE: Notice that so far everything we have been doing for implementing the protocol is not Twisted specific and with a bit of API massaging it could be reused with any similar networking library. In fact, there is a PEP being written currently that hopefully be in Python 3.4 that will provide a library-agnostic API that various protocols can provide so that they can be used with any networking library that supports the Python Async API.) 180 | 181 | ..code: memcached_server.py 58:64 182 | 183 | 184 | (TODO: make sure we haven't done anything stupid and very incompatible in the above code) 185 | 186 | (TODO: why oh why aren't those classes new-style objects? How is someone supposed to know if something has an __init__ or not?) 187 | 188 | Finally we just instantiate an endpoint and a factory and bind them together. We are using the memcached default port, 11211. Run this example and have a play with it using any memcached-compatible client and see it behaves quite as you expected (at least when using only the get and set commands). (WARNING: This implementation is not suitable for production systems and certainly not supposed to be exposed to untrusted clients) 189 | 190 | (TODO: perhaps we should point people to an actual production implementation here?) 191 | 192 | Adding timeouts 193 | --------------- 194 | 195 | The implementation, apart from its gaping security holes, has another problem - it ignores the timeout setting completely. This will mean that at some point your server will run out of memory and even worse, clients would expect things to be expiring and they would receive incorrect results. Let's fix that. 196 | 197 | If you have a function that needs to be called some time in the future, `reactor.callLater` does this for you. It works like so: 198 | 199 | ..code: 200 | 201 | from twisted.internet import reactor 202 | 203 | def print_it(s): 204 | print s 205 | reactor.callLater(5, print_it, "5 seconds passed") 206 | reactor.callLater(10, print_it, "10 seconds passed") 207 | reactor.callLater(11, reactor.stop) 208 | 209 | reactor.run() 210 | 211 | If you run the above snippet you'll see that it will print two lines after 5 and 10 seconds respectively, and after 11 seconds it will stop and exit to the command line. 212 | 213 | (SIDENOTE: Careful readers may have read that reactor also has a `listenTCP` method, and we now also show `callLater`. The global reactor instance actually implements numerous interfaces, all found in `twisted.internet.reactor`) 214 | 215 | When scheduling a call into the future, it is useful to be able to retain a reference to it so you can cancel it or reschedule it. `callLater` returns an `IDelayedCall` instance that you can call `cancel`, `reset` or `delay` on. 216 | 217 | ..code: 218 | 219 | def print_it(p): 220 | print p 221 | 222 | def abort(call): 223 | if call.active(): 224 | print 'CANCELLING' 225 | call.cancel() 226 | 227 | delayedCall = reactor.callLater(5, print_it, 'HI') 228 | reactor.callLater(4, abort, delayedCall) 229 | reactor.callLater(6, reactor.stop) 230 | 231 | reactor.run() 232 | 233 | Running the above snippet you will see how 'HI' is never printed because the call was cancelled. Note also that you can only call `cancel` on a call that is still active. Calling `cancel` on a call that has either been called or has been cancelled will raise an Exception. 234 | 235 | So, in our case, we, will change our memcached store implementation to look like this: 236 | 237 | ..code: memcached2.py 238 | 239 | When dealing with calls that will arrive later, we must take care that a left-over timeout will expire a key prematurely, so we cancel any timeouts that may or may not be pending before we schedule any new ones. 240 | 241 | All we need to change to support timeouts is we import `Memcached` from `memcached2` instead of `memcached1`. 242 | 243 | A summary 244 | --------- 245 | 246 | So, in bullet form, here's key points covered so far: 247 | 248 | * To do anything in Twisted, you need to start a reactor loop by doing `reactor.run()` 249 | * To find a piece of wire to listen on, you need to instantiate an appropriate endpoint. 250 | * To handle new connections, you need a protocol factory that will build protocol instances. 251 | * Protocol instances only deal with a single connection and only live as long as that. 252 | * It is your responsibility to deal with shared state by injecting it into the protocol instances you create. Subclass a server factory. 253 | * Data will arrive in a protocol instance at arbitrary chunks. Test is with a byte at a time to make sure you are not making any false assumptions. 254 | * Implementing protocols correctly is hard, try to reuse as much code as possible. 255 | * To schedule a function call later in time, use `reactor.callLater` and keep a reference to the return value if you want to do something with it before it's called. 256 | 257 | Something even cooler - a proxy server 258 | ====================================== 259 | 260 | A memcached server is cool, but let's dive a bit deeper. Let's write a proxy server that will receive a url followed by a newline, it will fetch the url (following any redirects) and return it to the client. After it has finished sending all the data, it must close the connection to the client. Of course it has to support multiple simultaneous connections. Let's also make it print various information messages on its stdout, such as time took for each request. 261 | 262 | (SIDENOTE: For reasons that will become apparent soon, you must not do this in Twisted code. If you're looking for an actually working proxy server example code, see `proxy2.py`. If you're looking for an actual HTTP proxy server, see `twisted.web.proxy`) 263 | 264 | ..code: proxy1.py 6:17 265 | 266 | The interesting bit here is the ProxyProtocol. Since we are expecting a url followed by a newline, we are inheriting from `basic.LineReceiver`. For simplicity, we ignore lines not beginning with 'http://', record the start time, use `urllib2.urlopen` to get a file-like object back, then read from it, write the data to the transport, then call `loseConnection`. Twisted will make sure that all pending data writes will be finished before the connection is closed. We finally print the time it took to fetch the url. 267 | 268 | Test it with a simple telnet connection, and you will see it works fine. Let's try more than one simultaneous clients. We would expect that clients will receive a response at roughly the time it takes the server to fetch it, perhaps with a little bit of overhead. 269 | 270 | If you're following this at home, download `timingclient.py` and run it like so: `python timingclient.py 127.0.0.1` `. Here are some actual results: 271 | 272 | 273 | Site Server Client 274 | ===== ====== ====== 275 | A 0.771 3.817 276 | B 0.567 2.026 277 | C 1.457 2.026 278 | D 1.019 3.815 279 | ----- ------ ------ 280 | Total: 3.813 3.817 281 | 282 | Wait a minute, this looks wrong. We expected the server and the client to take roughly the same amount of time, but it looks very off. If we look closely, it looks like some connections had to wait until other connections were finished, and in fact two of them had to wait for the total time it took the server to finish *all* requests. 283 | 284 | Eagle-eyed readers will have noticed the previous sidenote - our code is wrong. The culprit is the `urllib2.urlopen` call. When diving into the actual implementation of that call, we'll find that at its basic level it opens a plain socket to a remote machine and reads data from there. However, as we said at the very start, you cannot have single-threaded programs making blocking network connections, because the reactor loop will stall and will not have the chance to root unrelated data to other handlers. One potential way to deal with this will be to make the call in a different thread, but since this would introduce the usual threading issues and would defeat the purpose of using with an asynchronous I/O library in the first place! 285 | 286 | (SIDENOTE: In some cases, like database interactions that cannot be done asynchronously because of the need to support atomic transactions, using threads is the correct approach. For actual database support look at `twisted.enterprise.dbapi`. For general threading support, use `reactor.deferToThread`. More about deferreds soon). 287 | 288 | So we have finally hit a snag - there are *a lot* of libraries and tools out there that use blocking sockets by default. This means we cannot use them in Twisted code (unless we do it in a thread - see sidenote). Fortunately Twisted comes with a lot of batteries built-in, and there's a wealth of Twisted-compatible alternatives to choose from. Look for any library that has the `tx` prefix. For a community-maintained listing, look at http://launchpad.net/tx . 289 | 290 | In this particular case we need a library that can do HTTP requests without using blocking sockets. Twisted has this built-in in `twisted.web.client.getPage`. This is a utility function that will take a url, connect to it, and return the data some time in the future. 291 | 292 | Wait, what? 293 | ----------- 294 | 295 | Don't panic, we're still using Python and there's no magic way to receive data in the future. What we'll do instead is going to install another network event handler. Similar to how we have a factory handling "connection made" events, protocol instances handling "data received" calls, we'll install a handler for a "requested url fetch finished" event. After all, we're still receiving data over the network and we don't know when it'll arrive. 296 | 297 | In fact, `getPage` is implemented by using factories and protocols. A connection is made to a server, a new protocol instance is created that asks the server for a specific resource, and buffers data passed into its `dataReceived` method. When the server signals it's finished sending data, the protocol can then fire the "requested url fetch finished" event to anyone interested, passing the collected data along. 298 | 299 | However, while the 'connection made' and 'data received' events are common network events, it'd madness to have a separate API for every possible event that someone can think of. What we have instead is a way to allow defining custom events and attaching arbitrary handlers to them. 300 | 301 | So, what the call to `getPage` will return is an instance of a `twisted.internet.defer.Deferred`. A Deferred instance is a specific event that will happen in the future. In this particular case, the event is 'the url you've requested has finished loading', so the event carries some data as well (similar to how `dataReceived` carries data as well, but more high-level as it will return *all* the data and not chunks). 302 | 303 | So now that we have an event, we should attach an event handler function to it. In our particular case, the event handler function might look like this: 304 | 305 | ..code: proxy2.py 15:19 306 | 307 | We attach it to the deffered instance by using `addCallback`. 308 | 309 | And in context: 310 | 311 | ..code: proxy2.py 8:21 312 | 313 | If you run `proxy2.py` and make the same requests as before, you will get results that look something like this: 314 | 315 | Site Server Client 316 | ===== ====== ====== 317 | A 0.850 0.853 318 | B 0.486 0.488 319 | C 1.582 1.584 320 | D 0.999 1.000 321 | ----- ------ ------ 322 | Total: 3.918 1.585 323 | 324 | So, each request arrived at the client at roughly the same time it took the server to fetch it, and all requests executed in parallel. So the client finished when the longest request did. This mean all the requests executed in parallel and any network I/O happened in a non-blocking way. 325 | 326 | So, to recap - `getPage` will return a `Deferred` instance that represents the 'url fetched' event. We define a function named `urlFetched` that accepts a single argument, `data`. We register this function by calling `deferredData.addCallback(urlFetched)`. When all the data from the url has been fetched, `urlFetched` will be called with the fetched data, and it will write the data to the transport and close the connection. Note that since we are defining this function inside the scope of `lineReceived`, we can access the `url` and `start` variables. In Twisted-speak, `urlFetched` is called a "callback function", hence the `addCallback` method name. 327 | 328 | Defining nested functions can become confusing very soon, so `addCallback` can accept arbitrary arguments (both positional and keyword) that will be passed to the callback function. The first argument will always be the data the Deferred carries (even if it is None). 329 | 330 | In this case, two argument `urlFetched` requires are `url` and `start` so, let's make it accept it and pass it via the `addCallback` call: 331 | 332 | ..code: proxy3.py 8:20 333 | 334 | A caching proxy server 335 | ---------------------- 336 | 337 | So, we have a reasonable implementation of a proxy server. Let's add another feature, the proxy server should cache responses and return subsequent requests to the same address from the cache. 338 | 339 | Let's start the simple way, by having just an in-memory dict that will store an address as the key and the response as the value. Since the cache must be shared, we will inject it to each protocol instance we create by subclassing the factory and defining our own `buildProtocol` method. 340 | 341 | (SIDENOTE: You might have expected we'd use the key-value store we implemented at the start of this tutorial - have patience, we need to cover a few important topics first to avoid potential confusion) 342 | 343 | Let's first handle the case where we do have the data in the cache: 344 | 345 | ..code: 346 | 347 | if url in self.cache: 348 | data = self.cache[url] 349 | self.urlFetched(data, url, start) 350 | 351 | Note how can just call `urlFetched` with the cached data. It is, after all, just a simple python method. Since it implements a well-defined piece of functionality, we can just reuse it. 352 | 353 | In the case where we don't have the data in the cache, we need to do three things: 354 | 355 | a) call `client.getPage` to initiate the request 356 | b) store the data in the cache 357 | c) call `urlFetched` with the data 358 | 359 | We know how to go from a) to b) - just define a `storeInCache` method and attach it as a callback. Going from b) to c) is a bit more tricky. We could just call `urlFetched` from `storeInCache` but it would look messy: 360 | 361 | ..code: 362 | 363 | def storeInCache(self, data, url, start): 364 | self.cache[url] = data 365 | self.urlFetched(data, url, start) 366 | 367 | We are passing in extra arguments (`start`) that will just be forwarded to `urlFetched`. A nicer thing to do would be to allow callbacks to be chained together so that one will run after another. This way every callback function can do just one thing (and be reused the same way we reused `urlFetched` when we already had the data available). 368 | 369 | Fortunately this is such a common pattern that `Deferred` supports it natively - just do another `addCallback`. To allow for more interesting behaviour, callbacks are required to return a result that will get passed to the next callback and so on. In our particular case, `storeInCache` will need to return the raw data so that `urlFetched` can access them. Of course, if you forget to return something in your callback functions, the next callback will receive `None`. 370 | 371 | ..code: proxy4.py 16:31 372 | 373 | That's better! We have two plain methods that do one specific thing, and we chain them together. There's another little thing we can do to improve this code though - eliminate the duplication of typing `urlFetched` twice. What we should do instead is have a utility function like `getCachedPage(url, cache)` that will contain all the cache logic and will return just a deferred we'll attach `urlFetched` to. 374 | 375 | ..code: proxy5.py 5:29 376 | 377 | That's better! `getCachedPage` is now a reusable function that can be used in other places as well (and of course, being simple and standalone, it can be easily tested as well). Client code doesn't care how storing is done (or even if it will be done at all) - client code just cares about the 'url fetched' event. 378 | 379 | You might be wandering that the `defer.succeed` is doing there. Since the `getCachedPage` function needs to return a `Deferred` so that client code can attach callbacks to it without caring where the data came from, we need to wrap actual data in a `Deferred` instance. `defer.succeed` does just that - it's a way to convert an already existing result into a deferred that will trigger its callback chain immediately. 380 | 381 | Writing clients 382 | =============== 383 | 384 | Time to put the memcached server we wrote to good use - we'll use that as our cache layer for the proxy server. This way, we can share the same cache between multiple proxy servers. 385 | 386 | In order to do that, we need to somehow connect to the cache server, and speak the expected protocol. Our final aim is to have a `getMemcachedPage` that will do all the background cache checking and return a `Deferred` instance that will carry the data. 387 | 388 | For simplicity, we'll implement the different commands as separate protocol classes, and we'll close the connection to the server once we get a response. Let's start with the `get` command protocol. 389 | 390 | (SIDENOTE: Memcached actually encourages you to maintain a single persistent to the server for as long as you can, in order to save the the connect/teardown overhead associated with every new connection. We'll get to that a bit later. Meanwhile, if you're actually looking for a real memcached client implementation, `twisted.protocols.memcached` has what you need) 391 | 392 | ..code: memcached_client.py 1:31 393 | 394 | The first interesting observation is that this protocol has an actual method named `get` that we can call once we are connected. `rawDataReceived` and `lineReceived` look a lot like the server implementation, with one difference - when we receive the `END` line, we know the server has finished responding so we print whatever value we received (may be None) and we close the connection. The factory is very simple as well. 395 | 396 | ..code: memcached_client.py 33:43 397 | 398 | In order to actually talk to a server, we need to create an endpoint. Since this is the client side, we use a `TCP4ClientEndpoint` that takes a reactor, a host name and a port. Then instead of `listen` we call `connect` passing in our factory instance. Notice that this call returns a `Deferred`, since the connection will happen in the future. When a connection has been made, a protocol instance will be created, and passed to our callback function, which will call `get`. 399 | 400 | Note how the server version of listening didn't return any Deferred - this is because when listening we accept multiple connections on the same port, and multiple protocol instances will be created. However for cients, every `connect` call corresponds to exactly one protocol instance, so we can get a reference to it and make any interaction we want. A factory is still used to create those instances, of course. 401 | 402 | (SIDENOTE: Some older, mostly equivalent APIs are `reactor.connectTCP` and `ClientCreator`. The endpoint API is the preferred one though). 403 | 404 | Creating Deferreds 405 | ------------------ 406 | 407 | In the above implementation, we print the value to stdout when we receive it. This is not very useful - we need to give this value to the code that called our `get` method. But, when the `get` method is called we have no value - the value will come in the future. By now you should be guessing that `get` should return a `Deferred` instance that we'll trigger once the server finishes responding. 408 | 409 | ..code: memcached_client2.py 1:22 410 | 411 | So, what `get` does is create a `Deferred` instance (it is of course a plain python class!), keep a reference to it and return it to anyone interested. When we receive a final response, we call `self.deferred.callback`, passing in a tuple of the value and the flags. Calling `callback` will start running the callback chain, passing in the result. Remember that callback functions must accept the result a single first argument, so we need to pass just a single argument to `callback`, hence the need to wrap it in a tuple. 412 | 413 | ..code: memcached_client2.py 36:51 414 | 415 | Finally, now that `get` returns a callback, we can add a callback to it to get the value and flags. Unfortunately there's a bit of nesting going on - we'll fix that in a while. 416 | 417 | Note also that we started to do the proper thing and moved any connection specific stuff to happen only when you run the module, because we're going to be importing this module very very soon. 418 | 419 | Let's finish things off by implementing the `set` command and making it a bit easier to use. 420 | 421 | ..code: memcached_client3.py 1:15 422 | 423 | This protocol is much simpler - just write out the correct line, the raw data and finish with a new line, then wait for the 'STORED' response. 424 | 425 | ..code: memcached_client3.py 18:34 426 | 427 | Using it is also straightforward - create an endpoint, connect, get the protocol instance, call 'set', wait for the confirmation that it was actually set. Notice that we ignore the returned value (using the common underscore name to indicate we will not do anything with the argument), since the callback passes None anyway. We must accept one argument (and call `callback` with one too) in order to conform to the `Deferred` API. 428 | 429 | Notice how we don't even bother to subclass `Factory`, and we just set the protocol class directly onto the instance. This is a very common idiom in Twisted when we don't really need the factory to do anything else than building protocol instances with the default behaviour. 430 | 431 | Create an API 432 | ------------- 433 | 434 | So far we're using those protocols from the command line, but it's time to actually embed them into our proxy server. For that, we'll create a simple utility class that will do the connection for us. Since it's the callers concern where the connection should be made, we'll expect an endpoint as a parameter. 435 | 436 | (SIDENOTE: People familiar with the previous Twisted APIs will realise this is a very nice improvement. By passing in different endpoints, we can connect to a server over SSL or Unix sockets or anything that endpoints support, without changing a single line of code.) 437 | 438 | ..code: memcached_client4.py 1:13 439 | 440 | We import the protocols, create a couple of factories (outside the class, since they don't have any kind of state) and define the constructor of our client. 441 | 442 | ..code: memcached_client4.py 15:35 443 | 444 | Now this looks ugly! There are 3 levels of nesting there, and the reason is that we have a chain of callbacks that all have potential "pauses" in them because they are accessing the network, so we need to unravel all the chain to get to the final value. 445 | 446 | It turns out that this is again such a common requirement that Deferred has built-in support for it. All you need to to is return a *new* Deferred instance from a callback function. When this happens, the chain of callbacks is suspended until the result of that new Deferred comes back. When it does, it is passed to the next callback. This will look easier in code: 447 | 448 | ..code: memcached_client5.py 15:27 449 | 450 | This looks much cleaner. Let's see what's going on. First, we connect to the server, getting a deferred back. When that deferred fires, it will give us an instance of the relevant protocol, connected to the server. We can then call `get` or `set` on the protocol. Those calls will also return deferreds, which we just return. This is all transparent to the client: 451 | 452 | ..code: memcached_client5.py 29:39 453 | 454 | Let's walk the execution chain, just as the connection happens: 455 | 456 | * connection is established 457 | * `got_protocol` is executed 458 | * `p.get` is executed, returning a deferred 459 | * Since a callback returned a deferred, the execution pauses here, instead of proceeding to the `got_value` callback the client attached. 460 | * The server finishes responding 461 | * The deferred that paused the execution is called back with the returned value from the server 462 | * `got_value` is executed with the value of the previous callback, which is the return value from the server. 463 | 464 | The point to keep is that whenever you need to access the network in one of your callbacks, just return the deferred you get and everything will proceed normally when that is finished. 465 | 466 | Let's bring it all together 467 | --------------------------- 468 | 469 | Remember our proxy server? Let's integrate it with the key-value store. We'll need to change it a bit so that instead of a cache instance protocols will share a Memcached client instance. 470 | 471 | We don't need to change very much in our Protocol and Factory - instead of using `getCachedPage` we use `getMemcachedPage`, and instead of creating a dictionary cache, we use a `MemcachedClient`: 472 | 473 | ..code: proxy6.py 21:48 474 | 475 | Other than that, the client is exactly the same. 476 | 477 | The `getMemcachedPage` function is a bit more complicated, albeit more interesting: 478 | 479 | ..code: proxy6.py 5:19 480 | 481 | Everything starts by looking into the cache for the request value. As this returns a deferred, since we now have to go to the network, we need to specify what happens when we receive a response in the `got` callback. 482 | 483 | If we have a value in the cache, just return it. Otherwise fetch the page. When the page is fetched, store it in the cache. Since the `set` doesn't return any data, add a lambda callback that will return it to the final client code. 484 | 485 | (TODO: I think this desperately needs a diagram showing the flow. Perhaps there's a clearer way to write it too?) 486 | 487 | TUTORIAL PLAN 488 | ============= 489 | 490 | * errbacks 491 | * persistent connections 492 | 493 | 494 | 495 | 496 | 497 | 498 | 499 | 500 | 501 | 502 | 503 | 504 | 505 | Asynchronous Programming 101 506 | ============================ 507 | 508 | So, we've found a way to deal with data coming from a socket in a non-blocking fashion, so we can use an event loop that will forward them to our handlers. However, since 509 | 510 | 511 | 512 | 513 | 514 | -------------------------------------------------------------------------------- /upperserver.py: -------------------------------------------------------------------------------- 1 | from twisted.internet import protocol 2 | 3 | class UpperProtocol(protocol.Protocol): 4 | def dataReceived(self, data): 5 | print data 6 | self.transport.write(data.upper()) 7 | 8 | class UpperFactory(protocol.ServerFactory): 9 | protocol = UpperProtocol 10 | 11 | 12 | from twisted.internet import reactor, endpoints 13 | 14 | endpoint = endpoints.TCP4ServerEndpoint(reactor, 8000) 15 | factory = UpperFactory() 16 | endpoint.listen(factory) 17 | 18 | reactor.run() 19 | --------------------------------------------------------------------------------