├── ampoule ├── test │ ├── __init__.py │ ├── test_proxy.py │ └── test_process.py ├── _version.py ├── commands.py ├── __init__.py ├── iampoule.py ├── util.py ├── child.py ├── service.py ├── rpool.py ├── main.py └── pool.py ├── .gitignore ├── .bzrignore ├── Manifest.in ├── tox.ini ├── examples ├── mapreduce │ ├── books │ │ ├── i_promes.txt │ │ └── la_divin.txt │ ├── mapreduce.py │ └── client.py ├── basic.py └── pid.py ├── setup.cfg ├── INSTALL.txt ├── .travis.yml ├── README.md ├── COPYING.txt ├── setup.py ├── twisted └── plugins │ └── ampoule_plugin.py └── NEWS.txt /ampoule/test/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *.egg-info 2 | /build 3 | *.pyc 4 | .tox 5 | -------------------------------------------------------------------------------- /.bzrignore: -------------------------------------------------------------------------------- 1 | ./_trial_temp 2 | twisted/plugins/dropin.cache 3 | -------------------------------------------------------------------------------- /Manifest.in: -------------------------------------------------------------------------------- 1 | include *.txt 2 | recursive-include examples * 3 | recursive-include twisted * -------------------------------------------------------------------------------- /tox.ini: -------------------------------------------------------------------------------- 1 | [tox] 2 | envlist = py27, py36 3 | 4 | [testenv] 5 | commands = python -m twisted.trial ampoule 6 | -------------------------------------------------------------------------------- /examples/mapreduce/books/i_promes.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zendesk/ampoule/master/examples/mapreduce/books/i_promes.txt -------------------------------------------------------------------------------- /examples/mapreduce/books/la_divin.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zendesk/ampoule/master/examples/mapreduce/books/la_divin.txt -------------------------------------------------------------------------------- /setup.cfg: -------------------------------------------------------------------------------- 1 | [install] 2 | optimize = 1 3 | 4 | [aliases] 5 | release = egg_info -RDb '' 6 | 7 | [bdist_wheel] 8 | universal = 1 9 | -------------------------------------------------------------------------------- /INSTALL.txt: -------------------------------------------------------------------------------- 1 | ampoule is packaged using setuptools and thus can be installed 2 | by either simply running: 3 | 4 | python setup.py develop 5 | 6 | or in the standard python way: 7 | 8 | python setup.py install 9 | 10 | Alternatively use: 11 | 12 | easy_install ampoule 13 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | 2 | language: python 3 | 4 | branches: 5 | only: 6 | - master 7 | 8 | script: 9 | - tox -e $TOX_ENV 10 | 11 | install: 12 | - pip install tox 13 | 14 | matrix: 15 | include: 16 | - python: 2.7 17 | env: TOX_ENV=py27 18 | - python: 3.6 19 | env: TOX_ENV=py36 20 | 21 | -------------------------------------------------------------------------------- /ampoule/_version.py: -------------------------------------------------------------------------------- 1 | """ 2 | Provides ampoule version information. 3 | """ 4 | 5 | # This file is auto-generated! Do not edit! 6 | # Use `python -m incremental.update ampoule` to change this file. 7 | 8 | from incremental import Version 9 | 10 | __version__ = Version('ampoule', 19, 6, 0) 11 | __all__ = ["__version__"] 12 | -------------------------------------------------------------------------------- /ampoule/commands.py: -------------------------------------------------------------------------------- 1 | from twisted.protocols import amp 2 | 3 | class Shutdown(amp.Command): 4 | responseType = amp.QuitBox 5 | 6 | class Ping(amp.Command): 7 | response = [(b'response', amp.String())] 8 | 9 | class Echo(amp.Command): 10 | arguments = [(b'data', amp.String())] 11 | response = [(b'response', amp.String())] 12 | -------------------------------------------------------------------------------- /ampoule/__init__.py: -------------------------------------------------------------------------------- 1 | from .pool import deferToAMPProcess, pp 2 | from .commands import Shutdown, Ping, Echo 3 | from .child import AMPChild 4 | from ._version import __version__ as _my_version 5 | 6 | __version__ = _my_version.short() 7 | 8 | 9 | __all__ = [ 10 | 'deferToAMPProcess', 11 | 'pp', 12 | 'Shutdown', 'Ping', 'Echo', 13 | 'AMPChild', 14 | '__version__', 15 | ] 16 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | 2 | Ampoule - a process pool for Twisted, based on AMP 3 | ================================================== 4 | 5 | Ampoule is a process pool written on top of 6 | [Twisted](https://twistedmatrix.com/). Its name comes from the use of 7 | [AMP](https://amp-protocol.net/) as the default communication protocol between 8 | the pool and all its children. 9 | 10 | It provides an API very close to that of the built-in Twisted `ThreadPool`. As 11 | an helper function it also provides a `deferToAMPProcess` function that creates 12 | the `ProcessPool` and submits jobs to it. 13 | -------------------------------------------------------------------------------- /examples/basic.py: -------------------------------------------------------------------------------- 1 | from ampoule import child, util 2 | 3 | @util.mainpoint 4 | def main(args): 5 | from twisted.internet import reactor, defer 6 | from ampoule import pool, Ping 7 | import time 8 | 9 | # enable logging to see what happens in children 10 | import sys 11 | from twisted.python import log 12 | log.startLogging(sys.stdout) 13 | 14 | @defer.inlineCallbacks 15 | def _run(): 16 | pp = pool.ProcessPool(child.AMPChild, recycleAfter=5000) 17 | pp.min = 1 18 | pp.max = 5 19 | yield pp.start() 20 | t = time.time() 21 | REPEATS = 40000 22 | l = [pp.doWork(Ping) for x in range(REPEATS)] 23 | yield defer.DeferredList(l) 24 | print(REPEATS/(time.time() - t)) 25 | yield pp.stop() 26 | reactor.stop() 27 | 28 | reactor.callLater(1, _run) 29 | reactor.run() 30 | -------------------------------------------------------------------------------- /examples/pid.py: -------------------------------------------------------------------------------- 1 | from ampoule import child, util 2 | from twisted.protocols import amp 3 | 4 | class Pid(amp.Command): 5 | response = [("pid", amp.Integer())] 6 | 7 | class MyChild(child.AMPChild): 8 | @Pid.responder 9 | def pid(self): 10 | import os 11 | return {"pid": os.getpid()} 12 | 13 | @util.mainpoint 14 | def main(args): 15 | import sys 16 | from twisted.internet import reactor, defer 17 | from twisted.python import log 18 | log.startLogging(sys.stdout) 19 | 20 | from ampoule import pool 21 | 22 | @defer.inlineCallbacks 23 | def _run(): 24 | pp = pool.ProcessPool(MyChild, min=1, max=1) 25 | yield pp.start() 26 | result = yield pp.doWork(Pid) 27 | print("The Child process PID is:", result['pid']) 28 | yield pp.stop() 29 | reactor.stop() 30 | 31 | reactor.callLater(1, _run) 32 | reactor.run() 33 | -------------------------------------------------------------------------------- /ampoule/iampoule.py: -------------------------------------------------------------------------------- 1 | from zope.interface import Interface 2 | 3 | class IStarter(Interface): 4 | def startAMPProcess(ampChild, ampParent=None): 5 | """ 6 | @param ampChild: The AMP protocol spoken by the created child. 7 | @type ampChild: L{twisted.protocols.amp.AMP} 8 | 9 | @param ampParent: The AMP protocol spoken by the parent. 10 | @type ampParent: L{twisted.protocols.amp.AMP} 11 | """ 12 | 13 | def startPythonProcess(prot, *args): 14 | """ 15 | @param prot: a L{protocol.ProcessProtocol} subclass 16 | @type prot: L{protocol.ProcessProtocol} 17 | 18 | @param args: a tuple of arguments that will be passed to the 19 | child process. 20 | 21 | @return: a tuple of the child process and the deferred finished. 22 | finished triggers when the subprocess dies for any reason. 23 | """ 24 | 25 | -------------------------------------------------------------------------------- /COPYING.txt: -------------------------------------------------------------------------------- 1 | Copyright (c) 2008 2 | Valentino Volonghi 3 | Matthew Lefkowitz 4 | Copyright (c) 2009 Canonical Ltd. 5 | 6 | Permission is hereby granted, free of charge, to any person obtaining 7 | a copy of this software and associated documentation files (the 8 | "Software"), to deal in the Software without restriction, including 9 | without limitation the rights to use, copy, modify, merge, publish, 10 | distribute, sublicense, and/or sell copies of the Software, and to 11 | permit persons to whom the Software is furnished to do so, subject to 12 | the following conditions: 13 | 14 | The above copyright notice and this permission notice shall be 15 | included in all copies or substantial portions of the Software. 16 | 17 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 18 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 19 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 20 | NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE 21 | LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION 22 | OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION 23 | WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 24 | -------------------------------------------------------------------------------- /examples/mapreduce/mapreduce.py: -------------------------------------------------------------------------------- 1 | from twisted.protocols import amp 2 | from twisted.python import reflect 3 | from twisted.internet import defer 4 | 5 | from ampoule import child 6 | 7 | class Function(amp.Argument): 8 | def toString(self, inObject): 9 | return reflect.qual(inObject) 10 | 11 | def fromString(self, inString): 12 | return reflect.namedAny(inString) 13 | 14 | 15 | class Map(amp.Command): 16 | arguments = [('mapper', Function()), 17 | ('filename', amp.Path()), 18 | ('outdir', amp.Path())] 19 | 20 | response = [('result', amp.Path())] 21 | 22 | class Reduce(amp.Command): 23 | arguments = [('reducer', Function()), 24 | ('directory', amp.Path())] 25 | 26 | response = [('result', amp.Path())] 27 | 28 | class MapReducer(child.AMPChild): 29 | def _call(self, fun, in_, out): 30 | return defer.maybeDeferred(fun, in_, out 31 | ).addCallback(lambda _: {'result': out}) 32 | 33 | @Map.responder 34 | def map(self, mapper, filename, outdir): 35 | in_ = filename 36 | out = outdir.child(filename.basename()).siblingExtension('.map') 37 | return self._call(mapper, in_, out) 38 | 39 | @Reduce.responder 40 | def reduce(self, reducer, directory): 41 | in_ = directory.globChildren('*.map') 42 | out = directory.child('reduced.red') 43 | return self._call(reducer, in_, out) 44 | -------------------------------------------------------------------------------- /ampoule/util.py: -------------------------------------------------------------------------------- 1 | """ 2 | some utilities 3 | """ 4 | import os 5 | import sys 6 | import __main__ 7 | 8 | from twisted.python.filepath import FilePath 9 | from twisted.python.reflect import namedAny 10 | # from twisted.python.modules import theSystemPath 11 | 12 | def findPackagePath(modulePath): 13 | """ 14 | Try to find the sys.path entry from a modulePath object, simultaneously 15 | computing the module name of the targetted file. 16 | """ 17 | p = modulePath 18 | l = [p.basename().split(".")[0]] 19 | while p.parent() != p: 20 | for extension in ['py', 'pyc', 'pyo', 'pyd', 'dll']: 21 | sib = p.sibling("__init__."+extension) 22 | if sib.exists(): 23 | p = p.parent() 24 | l.insert(0, p.basename()) 25 | break 26 | else: 27 | return p.parent(), '.'.join(l) 28 | 29 | 30 | def mainpoint(function): 31 | """ 32 | Decorator which declares a function to be an object's mainpoint. 33 | """ 34 | if function.__module__ == '__main__': 35 | # OK time to run a function 36 | p = FilePath(__main__.__file__) 37 | p, mn = findPackagePath(p) 38 | pname = p.path 39 | if pname not in map(os.path.abspath, sys.path): 40 | sys.path.insert(0, pname) 41 | # Maybe remove the module's path? 42 | exitcode = namedAny(mn+'.'+function.__name__)(sys.argv) 43 | if exitcode is None: 44 | exitcode = 0 45 | sys.exit(exitcode) 46 | return function 47 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- test-case-name: ampoule -*- 3 | 4 | # Copyright (c) 2008 Valentino Volonghi. 5 | # See LICENSE for details. 6 | 7 | """ 8 | Distutils/Setuptools installer for AMPoule. 9 | """ 10 | 11 | from setuptools import setup 12 | 13 | install_requires = ["Twisted>=17[tls]"] 14 | 15 | description = """A process pool built on Twisted and AMP.""" 16 | long_description = open('README.md').read() 17 | 18 | setup( 19 | name = "ampoule", 20 | author = "Valentino Volonghi", 21 | author_email = "dialtone@gmail.com", 22 | maintainer = "Glyph Lefkowitz", 23 | maintainer_email = "glyph@twistedmatrix.com", 24 | description = description, 25 | description_content_type='text/markdown', 26 | long_description = long_description, 27 | long_description_content_type='text/markdown', 28 | license = "MIT License", 29 | install_requires=install_requires + ['incremental'], 30 | url="https://github.com/glyph/ampoule", 31 | classifiers = [ 32 | 'Development Status :: 4 - Beta', 33 | 'Environment :: Console', 34 | 'Intended Audience :: Developers', 35 | 'License :: OSI Approved :: MIT License', 36 | 'Natural Language :: English', 37 | 'Programming Language :: Python', 38 | 'Topic :: System', 39 | ], 40 | packages=["ampoule", "ampoule.test"], 41 | package_data={'twisted': ['plugins/ampoule_plugin.py']}, 42 | use_incremental=True, 43 | setup_requires=['incremental'], 44 | include_package_data = True, 45 | zip_safe=False 46 | ) 47 | -------------------------------------------------------------------------------- /ampoule/child.py: -------------------------------------------------------------------------------- 1 | from twisted import logger 2 | from twisted.internet import error 3 | from twisted.protocols import amp 4 | from ampoule.commands import Echo, Shutdown, Ping 5 | 6 | 7 | 8 | log = logger.Logger() 9 | 10 | 11 | class AMPChild(amp.AMP): 12 | def __init__(self): 13 | super(AMPChild, self).__init__(self) 14 | self.shutdown = False 15 | 16 | def connectionLost(self, reason): 17 | amp.AMP.connectionLost(self, reason) 18 | from twisted.internet import reactor 19 | try: 20 | reactor.stop() 21 | except error.ReactorNotRunning: 22 | # woa, this means that something bad happened, 23 | # most probably we received a SIGINT. Now this is only 24 | # a problem when you use Ctrl+C to stop the main process 25 | # because it would send the SIGINT to child processes too. 26 | # In all other cases receiving a SIGINT here would be an 27 | # error condition and correctly restarted. maybe we should 28 | # use sigprocmask? 29 | pass 30 | if not self.shutdown: 31 | # if the shutdown wasn't explicit we presume that it's an 32 | # error condition and thus we return a -1 error returncode. 33 | import os 34 | os._exit(-1) 35 | 36 | def shutdown(self): 37 | """ 38 | This method is needed to shutdown the child gently without 39 | generating an exception. 40 | """ 41 | log.info(u'Shutdown message received, goodbye.') 42 | self.shutdown = True 43 | return {} 44 | Shutdown.responder(shutdown) 45 | 46 | def ping(self): 47 | """ 48 | Ping the child and return an answer 49 | """ 50 | return {'response': "pong"} 51 | Ping.responder(ping) 52 | 53 | def echo(self, data): 54 | """ 55 | Echo some data through the child. 56 | """ 57 | return {'response': data} 58 | Echo.responder(echo) 59 | -------------------------------------------------------------------------------- /ampoule/test/test_proxy.py: -------------------------------------------------------------------------------- 1 | from twisted.internet import defer, reactor 2 | from twisted.internet.protocol import ClientFactory 3 | from twisted.trial import unittest 4 | from twisted.protocols import amp 5 | 6 | from ampoule import service, child, pool 7 | from ampoule.commands import Echo 8 | 9 | class ClientAMP(amp.AMP): 10 | factory = None 11 | def connectionMade(self): 12 | if self.factory is not None: 13 | self.factory.theProto = self 14 | if hasattr(self.factory, 'onMade'): 15 | self.factory.onMade.callback(None) 16 | 17 | class TestAMPProxy(unittest.TestCase): 18 | def setUp(self): 19 | """ 20 | Setup the proxy service and the client connection to the proxy 21 | service in order to run call through them. 22 | 23 | Inspiration comes from twisted.test.test_amp 24 | """ 25 | self.pp = pool.ProcessPool() 26 | self.svc = service.AMPouleService(self.pp, child.AMPChild, 0, "") 27 | self.svc.startService() 28 | self.proxy_port = self.svc.server.getHost().port 29 | self.clientFactory = ClientFactory() 30 | self.clientFactory.protocol = ClientAMP 31 | d = self.clientFactory.onMade = defer.Deferred() 32 | self.clientConn = reactor.connectTCP("127.0.0.1", 33 | self.proxy_port, 34 | self.clientFactory) 35 | self.addCleanup(self.clientConn.disconnect) 36 | self.addCleanup(self.svc.stopService) 37 | def setClient(_): 38 | self.client = self.clientFactory.theProto 39 | return d.addCallback(setClient) 40 | 41 | def test_forwardCall(self): 42 | """ 43 | Test that a call made from a client is correctly forwarded to 44 | the process pool and the result is correctly reported. 45 | """ 46 | DATA = b"hello" 47 | return self.client.callRemote(Echo, data=DATA).addCallback( 48 | self.assertEquals, {'response': DATA} 49 | ) 50 | -------------------------------------------------------------------------------- /ampoule/service.py: -------------------------------------------------------------------------------- 1 | from twisted.application import service 2 | from twisted.internet.protocol import ServerFactory 3 | from ampoule import rpool 4 | 5 | def makeService(options): 6 | """ 7 | Create the service for the application 8 | """ 9 | ms = service.MultiService() 10 | 11 | from ampoule.pool import ProcessPool 12 | from ampoule.main import ProcessStarter 13 | name = options['name'] 14 | ampport = options['ampport'] 15 | ampinterface = options['ampinterface'] 16 | child = options['child'] 17 | parent = options['parent'] 18 | min = options['min'] 19 | max = options['max'] 20 | maxIdle = options['max_idle'] 21 | recycle = options['recycle'] 22 | childReactor = options['reactor'] 23 | timeout = options['timeout'] 24 | 25 | starter = ProcessStarter(packages=("twisted", "ampoule"), childReactor=childReactor) 26 | pp = ProcessPool(child, parent, min, max, name, maxIdle, recycle, starter, timeout) 27 | svc = AMPouleService(pp, child, ampport, ampinterface) 28 | svc.setServiceParent(ms) 29 | 30 | return ms 31 | 32 | class AMPouleService(service.Service): 33 | def __init__(self, pool, child, port, interface): 34 | self.pool = pool 35 | self.port = port 36 | self.child = child 37 | self.interface = interface 38 | self.server = None 39 | 40 | def startService(self): 41 | """ 42 | Before reactor.run() is called we setup the system. 43 | """ 44 | service.Service.startService(self) 45 | from twisted.internet import reactor 46 | 47 | try: 48 | factory = ServerFactory() 49 | factory.protocol = lambda: rpool.AMPProxy(wrapped=self.pool.doWork, 50 | child=self.child) 51 | self.server = reactor.listenTCP(self.port, 52 | factory, 53 | interface=self.interface) 54 | # this is synchronous when it's the startup, even though 55 | # it returns a deferred. But we need to run it after the 56 | # first cycle in order to wait for signal handlers to be 57 | # installed. 58 | reactor.callLater(0, self.pool.start) 59 | except: 60 | import traceback 61 | print(traceback.format_exc()) 62 | 63 | def stopService(self): 64 | service.Service.stopService(self) 65 | if self.server is not None: 66 | self.server.stopListening() 67 | return self.pool.stop() 68 | -------------------------------------------------------------------------------- /twisted/plugins/ampoule_plugin.py: -------------------------------------------------------------------------------- 1 | """ 2 | Ampoule plugins for Twisted. 3 | """ 4 | import sys 5 | from zope.interface import provider 6 | from twisted.plugin import IPlugin 7 | from twisted.python.usage import Options 8 | from twisted.python import reflect 9 | from twisted.application.service import IServiceMaker 10 | 11 | @provider(IPlugin, IServiceMaker) 12 | class AMPoulePlugin(object): 13 | """ 14 | This plugin provides ways to create a process pool service in your 15 | system listening on a given port and interface and answering to a 16 | given set of commands. 17 | """ 18 | 19 | tapname = "ampoule" 20 | description = "Run an AMPoule process pool" 21 | 22 | class options(Options): 23 | from twisted.application import reactors 24 | optParameters = [ 25 | ["ampport", "p", 8901, "Listening port for the AMP service", int], 26 | ["ampinterface", "i", "0.0.0.0", "Listening interface for the AMP service"], 27 | ["child", "c", "ampoule.child.AMPChild", "Full module path to the children AMP class"], 28 | ["parent", "s", None, "Full module path to the parent process AMP class"], 29 | ["min", "l", 5, "Minimum number of processes in the pool", int], 30 | ["max", "u", 20, "Maximum number of processes in the pool", int], 31 | ["name", "n", None, "Optional process pool name"], 32 | ["max_idle", "d", 20, "Maximum number of idle seconds before killing a child", int], 33 | ["recycle", "r", 500, "Maximum number of calls before recycling a child", int], 34 | ["reactor", "R", "select", "Select the reactor for child processes"], 35 | ["timeout", "t", None, "Specify a timeout value for ProcessPool calls", int] 36 | ] 37 | 38 | def postOptions(self): 39 | """ 40 | Check and finalize the value of the arguments. 41 | """ 42 | self['child'] = reflect.namedAny(self['child']) 43 | if self['parent'] is not None: 44 | self['parent'] = reflect.namedAny(self['child']) 45 | if self['name']: 46 | self['name'] = self['name'].decode('utf-8') 47 | 48 | def opt_help_reactors(self): 49 | """Display a list of available reactors""" 50 | from twisted.application import reactors 51 | for r in reactors.getReactorTypes(): 52 | sys.stdout.write(' %-4s\t%s\n' % 53 | (r.shortName, r.description)) 54 | raise SystemExit(0) 55 | 56 | @classmethod 57 | def makeService(cls, options): 58 | """ 59 | Create an L{IService} for the parameters and return it 60 | """ 61 | from ampoule import service 62 | return service.makeService(options) 63 | -------------------------------------------------------------------------------- /ampoule/rpool.py: -------------------------------------------------------------------------------- 1 | """ 2 | This module implements a remote pool to use with AMP. 3 | """ 4 | from twisted.protocols import amp 5 | 6 | class AMPProxy(amp.AMP): 7 | """ 8 | A Proxy AMP protocol that forwards calls to a wrapped 9 | callRemote-like callable. 10 | """ 11 | def __init__(self, wrapped, child): 12 | """ 13 | @param wrapped: A callRemote-like callable that takes an 14 | L{amp.Command} as first argument and other 15 | optional keyword arguments afterwards. 16 | @type wrapped: L{callable}. 17 | 18 | @param child: The protocol class of the process pool children. 19 | Used to forward only the methods that are actually 20 | understood correctly by them. 21 | @type child: L{amp.AMP} 22 | """ 23 | amp.AMP.__init__(self) 24 | self.wrapped = wrapped 25 | self.child = child 26 | 27 | localCd = set(self._commandDispatch.keys()) 28 | childCd = set(self.child._commandDispatch.keys()) 29 | assert localCd.intersection(childCd) == set([b"StartTLS"]), \ 30 | "Illegal method overriding in Proxy" 31 | 32 | def locateResponder(self, name): 33 | """ 34 | This is a custom locator to forward calls to the children 35 | processes while keeping the ProcessPool a transparent MITM. 36 | 37 | This way of working has a few limitations, the first of which 38 | is the fact that children won't be able to take advantage of 39 | any dynamic locator except for the default L{CommandLocator} 40 | that is based on the _commandDispatch attribute added by the 41 | metaclass. This limitation might be lifted in the future. 42 | """ 43 | if name == "StartTLS": 44 | # This is a special case where the proxy takes precedence 45 | return amp.AMP.locateResponder(self, "StartTLS") 46 | 47 | # Get the dict of commands from the child AMP implementation. 48 | cd = self.child._commandDispatch 49 | if name in cd: 50 | # If the command is there, then we forward stuff to it. 51 | commandClass, _responderFunc = cd[name] 52 | # We need to wrap the doWork function because the wrapping 53 | # call doesn't pass the command as first argument since it 54 | # thinks that we are the actual receivers and callable is 55 | # already the responder while it isn't. 56 | doWork = lambda **kw: self.wrapped(commandClass, **kw) 57 | # Now let's call the right function and wrap the result 58 | # dictionary. 59 | return self._wrapWithSerialization(doWork, commandClass) 60 | # of course if the name of the command is not in the child it 61 | # means that it might be in this class, so fallback to the 62 | # default behavior of this module. 63 | return amp.AMP.locateResponder(self, name) 64 | 65 | -------------------------------------------------------------------------------- /examples/mapreduce/client.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | # Before using me you need to startup an ampoule process that uses the 4 | # mapreduce child, like this: 5 | # twistd -no ampoule --child=mapreduce.MapReducer 6 | 7 | from twisted.internet.protocol import ClientFactory 8 | from twisted.python import filepath as fp 9 | from twisted.internet import reactor, defer 10 | from twisted.protocols import amp 11 | 12 | from mapreduce import Map, Reduce 13 | from ampoule.util import mainpoint 14 | 15 | import collections 16 | 17 | def mymap(in_, out): 18 | """ 19 | I'm the mapping function executed in the pool. 20 | """ 21 | aggregation = collections.defaultdict(lambda : 0) 22 | 23 | for line in in_.getContent().splitlines(): 24 | for word in line.split(): 25 | word.strip(",:;.'\"[]{}()*&^%$#@!`~><\\+=-_") 26 | aggregation[word] += 1 27 | 28 | f = out.open('wb') 29 | f.write("\n".join("%s %s" % word_frequency 30 | for word_frequency in aggregation.items())) 31 | f.close() 32 | 33 | 34 | def myreduce(files, out): 35 | """ 36 | I'm the reducer function applied to a directory in the pool. 37 | """ 38 | aggregation = collections.defaultdict(lambda : 0) 39 | for f in files: 40 | for line in f.getContent().splitlines(): 41 | key, freq = line.split() 42 | aggregation[key] += int(freq) 43 | 44 | sorted_aggregation = sorted(aggregation.items(), 45 | key=lambda t: t[1], reverse=True) 46 | f = out.open('ab') 47 | f.write("\n".join("%s %s" % word_frequency for word_frequency in sorted_aggregation)) 48 | f.write('\n') 49 | f.close() 50 | 51 | def map_step(pool, directory, resultdir): 52 | """ 53 | I define how to walk in the directory that I was given and apply 54 | the mapper function, then whatever I return is passed to the 55 | successive steps as first argument. 56 | """ 57 | outputdir = resultdir.child(directory.basename()) 58 | outputdir.createDirectory() 59 | 60 | l = [] 61 | for filename in directory.children(): 62 | l.append(pool.callRemote(Map, mapper=mymap, filename=filename, outdir=outputdir)) 63 | 64 | return defer.DeferredList(l).addCallback(lambda _: outputdir) 65 | 66 | def reduce_step(outputdir, pool, resultdir): 67 | """ 68 | I'm the reduce step and our reduce function assumes that the directory 69 | is fully processed and that there's a single reduce process per directory. 70 | """ 71 | return pool.callRemote(Reduce, reducer=myreduce, directory=outputdir) 72 | 73 | 74 | 75 | 76 | 77 | 78 | 79 | 80 | 81 | # Some boilerplate code that is useful to define how to use the 82 | # functions above but nothing more than that. 83 | 84 | 85 | class AMPFactory(ClientFactory): 86 | """ 87 | I store variables useful for the protocol connected to the pool. 88 | """ 89 | 90 | def __init__(self, dirs, resultdir, steps): 91 | self.dirs = dirs 92 | self.resultdir = resultdir 93 | self.steps = steps 94 | 95 | class ClientAMP(amp.AMP): 96 | """ 97 | The main orchestration logic in this process, how every function 98 | in this file is called and in which order and what they do. 99 | """ 100 | @defer.inlineCallbacks 101 | def connectionMade(self): 102 | amp.AMP.connectionMade(self) 103 | resultdir = self.factory.resultdir 104 | steps = self.factory.steps 105 | l = [] 106 | for directory in self.factory.dirs: 107 | d = steps[0](self, directory, resultdir) 108 | for step in steps[1:]: 109 | d = d.addCallback(step, self, resultdir) 110 | l.append(d) 111 | yield defer.DeferredList(l) 112 | reactor.stop() 113 | 114 | def process(dirs, resultdir, steps): 115 | """ 116 | setup twisted and run it with the known parameters. 117 | """ 118 | c = AMPFactory(dirs, resultdir, steps) 119 | c.protocol = ClientAMP 120 | reactor.connectTCP("127.0.0.1", 8901, c) 121 | reactor.run() 122 | 123 | @mainpoint 124 | def main(args): 125 | """ 126 | The mainpoint, the decorator makes this module resolvable by 127 | reflect.qual() used by our Function argument type. Here we 128 | define the steps of our processing and we execute them. 129 | """ 130 | directory_names = [fp.FilePath(name) for name in args[2:]] 131 | results = fp.FilePath(args[1]) 132 | results.remove() 133 | results.makedirs() 134 | steps = [map_step, reduce_step] 135 | process(directory_names, results, steps) 136 | 137 | -------------------------------------------------------------------------------- /NEWS.txt: -------------------------------------------------------------------------------- 1 | Ampoule 0.3.1 (2017-12-10) 2 | ========================== 3 | 4 | - Restored to original name after maintenance handover. 5 | - Updated home page to new source code location. 6 | - Minor python 2 compatibility issue in the tests (introduced in 0.3.0) fixed. 7 | 8 | Ampoul3 0.3.0 (2017-12-10) 9 | ========================== 10 | 11 | Changes 12 | ------- 13 | 14 | - Python 3 support. 15 | - Pyflakes fixes. 16 | - No more trailing whitespace :). 17 | - Migrated to Git. 18 | - (Hopefully temporary) package name change while I wait for Valentino to 19 | give me access to PyPI... 20 | 21 | Ampoule 0.2.0 (2010-02-02) 22 | ========================== 23 | 24 | Changes 25 | -------- 26 | - Fixed bug #317287: Twisted was required by setup.py due to import 27 | of application code to get the version number. The setup.py now 28 | tries to do the same thing but if it fails it uses a separate 29 | hardcoded version. 30 | - Fixed bug #317077: Ampoule didn't work on windows xp due to childFD 31 | customization, now on windows ampoule uses the standard 0,1,2 fds. 32 | - Added pyOpenSSL as an explicit dependency since Twisted doesn't 33 | require it but AMP does. 34 | - Greatly simplify setup.py and support plain distutils too. 35 | - Bootstrap code now supports context managers 36 | - Support for execution deadline on child calls 37 | - Parametrize the timeout signal to send to the child 38 | - Pass arguments directly from the ProcessPool object for the child 39 | process creation. 40 | 41 | 42 | Ampoule 0.1 (2008-11-15) 43 | ========================== 44 | 45 | Changes 46 | -------- 47 | 48 | - Fixed bug #276841: Add timeout for subprocess calls. 49 | The ProcessPool constructor now supports an additional timeout 50 | argument that specifies a pool level timeout, in seconds, before 51 | which every call ever made should return. Alternatively there is 52 | also a per-call mechanism. Currently this second per-call system 53 | is not available when using the remote pool service, so be sure 54 | to set the timeout command line parameter at an high enough level 55 | to satisfy all the possible commands. If a command doesn't require 56 | an answer the timeout doesn't apply, of course. 57 | 58 | The error returned when a call is timedout is 'error.ProcessTerminated'. 59 | On *nix systems the process is terminated with signal 9, on windows 60 | ampoule uses SIGTERM and returns error code 255. 61 | 62 | Ampoule 0.0.5 (2008-11-12) 63 | ========================== 64 | 65 | Changes 66 | -------- 67 | - Fixed bug #259264, this fix introduces a number of changes in the 68 | architecture of the project: 69 | 1. Removed childReactor argument form the process pool and added 70 | a starter argument. 71 | 2. Introduced the concept of a starter object whose role is to 72 | start subprocesses with given parameters. This makes it easier 73 | to specify particular parameters (eg. new env variables) to the 74 | child processes without needing to override many methods in the 75 | pool using closures. 76 | 3. main.py is completely changed and now provides the ProcessStarter 77 | object which is a default implementation of IStarter. 78 | 4. IStarter interface currently documents only 2 methods: 79 | startAMPProcess 80 | startPythonProcess 81 | in the future it's possible that we will add an additional: 82 | startProcess 83 | that starts whichever process we want without requiring python, 84 | also this might end up with the separation of ProcessPool in at 85 | least 2 logical levels: the ProcessPool and a dispatcher that 86 | talks with the children, in this way it would be possible to 87 | create custom ProcessPools without changing much code or requiring 88 | any special requirement on the children. 89 | 90 | - Introduced a callRemote method on the ProcessPool that is basically 91 | the same as doWork. Introduced for symmetry between all the RPC libraries 92 | in Twisted. 93 | 94 | - reactor short name and ampoule child class are now passed as the 2 95 | last arguments rather than the first 2. So if you have written any 96 | custom bootstrap code be sure to change sys.argv[1] and sys.argv[2] 97 | into sys.argv[-2] and sys.argv[-1] respectively. 98 | 99 | 100 | Ampoule 0.0.4 (2008-11-03) 101 | ========================== 102 | 103 | Changes 104 | -------- 105 | - Ampoule parent process and child processes now talk using FDs 3 (in) 106 | and 4 (out) in order to avoid problems with libraries that mistakenly 107 | send error lines to stdout (yes, I'm looking at you gtk+) 108 | 109 | Ampoule 0.0.3 (2008-10-01) 110 | ========================== 111 | 112 | Features 113 | -------- 114 | - Added a Twisted Matrix plugin to run a process pool service that 115 | can be used over the network using an AMP client. 116 | 117 | - Added ability to change the reactor used to run the children 118 | 119 | Changes 120 | ------- 121 | - If you wrote your own bootstrap code for the pool you should change 122 | the way it works because now it takes 2 arguments, sys.argv[1] is 123 | the reactor short name while sys.argv[2] is still the AMPChild path. 124 | If you don't use startAMPProcess you probably have nothing to worry 125 | about. 126 | 127 | Fixes 128 | ----- 129 | - Now the process pool doesn't raise an error when you use a command 130 | that doesn't require an answer. 131 | 132 | Ampoule 0.0.2 (2008-09-26) 133 | ========================== 134 | 135 | Features 136 | -------- 137 | - Support process recycling after predefined number of calls. 138 | 139 | Changes 140 | ------- 141 | - ProcessPool argument max_idle is now maxIdle to comply with Twisted 142 | style guidelines. 143 | 144 | - ProcessPool.startAWorker is now a synchronous call 145 | 146 | - removed ampoule.runner module because it was essentially useless, 147 | if you want to change subprocess startup behavior pass it as an 148 | argument by overriding the processFactory with a closure like this: 149 | 150 | from ampoule.main import startAMPProcess 151 | from ampoule.pool import ProcessPool 152 | pp = ProcessPool() 153 | def myProcessFactory(*args, **kwargs): 154 | kwargs['bootstrap'] = myBootstrapCode 155 | return startAMPProcess(*args, **kwargs) 156 | pp.processFactory = staticmethod(myProcessFactory) 157 | -------------------------------------------------------------------------------- /ampoule/main.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | import imp 4 | import itertools 5 | 6 | from zope.interface import implementer 7 | 8 | from twisted import logger 9 | from twisted.internet import reactor, protocol, defer, error 10 | from twisted.python import reflect 11 | from twisted.protocols import amp 12 | from twisted.python import runtime 13 | from twisted.python.compat import set 14 | 15 | from ampoule import iampoule 16 | 17 | 18 | 19 | log = logger.Logger() 20 | 21 | 22 | gen = itertools.count() 23 | 24 | if runtime.platform.isWindows(): 25 | IS_WINDOWS = True 26 | TO_CHILD = 0 27 | FROM_CHILD = 1 28 | else: 29 | IS_WINDOWS = False 30 | TO_CHILD = 3 31 | FROM_CHILD = 4 32 | 33 | class AMPConnector(protocol.ProcessProtocol): 34 | """ 35 | A L{ProcessProtocol} subclass that can understand and speak AMP. 36 | 37 | @ivar amp: the children AMP process 38 | @type amp: L{amp.AMP} 39 | 40 | @ivar finished: a deferred triggered when the process dies. 41 | @type finished: L{defer.Deferred} 42 | 43 | @ivar name: Unique name for the connector, much like a pid. 44 | @type name: int 45 | """ 46 | 47 | def __init__(self, proto, name=None): 48 | """ 49 | @param proto: An instance or subclass of L{amp.AMP} 50 | @type proto: L{amp.AMP} 51 | 52 | @param name: optional name of the subprocess. 53 | @type name: int 54 | """ 55 | self.finished = defer.Deferred() 56 | self.amp = proto 57 | self.name = name 58 | if name is None: 59 | self.name = next(gen) 60 | 61 | def signalProcess(self, signalID): 62 | """ 63 | Send the signal signalID to the child process 64 | 65 | @param signalID: The signal ID that you want to send to the 66 | corresponding child 67 | @type signalID: C{str} or C{int} 68 | """ 69 | return self.transport.signalProcess(signalID) 70 | 71 | def connectionMade(self): 72 | log.info(u'Subprocess {n} started.', n=self.name) 73 | self.amp.makeConnection(self) 74 | 75 | # Transport 76 | disconnecting = False 77 | 78 | def write(self, data): 79 | if IS_WINDOWS: 80 | self.transport.write(data) 81 | else: 82 | self.transport.writeToChild(TO_CHILD, data) 83 | 84 | def loseConnection(self): 85 | self.transport.closeChildFD(TO_CHILD) 86 | self.transport.closeChildFD(FROM_CHILD) 87 | self.transport.loseConnection() 88 | 89 | def getPeer(self): 90 | return ('subprocess',) 91 | 92 | def getHost(self): 93 | return ('no host',) 94 | 95 | def childDataReceived(self, childFD, data): 96 | if childFD == FROM_CHILD: 97 | self.amp.dataReceived(data) 98 | return 99 | self.errReceived(data) 100 | 101 | def errReceived(self, data): 102 | for line in data.strip().splitlines(): 103 | log.error(u'FROM {n}: {l}', n=self.name, l=line) 104 | 105 | def processEnded(self, status): 106 | log.info(u'Process: {n} ended', n=self.name) 107 | self.amp.connectionLost(status) 108 | if status.check(error.ProcessDone): 109 | self.finished.callback('') 110 | return 111 | self.finished.errback(status) 112 | 113 | BOOTSTRAP = """\ 114 | import sys 115 | 116 | def main(reactor, ampChildPath): 117 | from twisted.application import reactors 118 | reactors.installReactor(reactor) 119 | 120 | from twisted import logger 121 | observer = logger.textFileLogObserver(sys.stderr) 122 | logLevelPredicate = logger.LogLevelFilterPredicate( 123 | defaultLogLevel=logger.LogLevel.info 124 | ) 125 | filteringObserver = logger.FilteringLogObserver( 126 | observer, [logLevelPredicate] 127 | ) 128 | logger.globalLogBeginner.beginLoggingTo([filteringObserver]) 129 | 130 | from twisted.internet import reactor, stdio 131 | from twisted.python import reflect, runtime 132 | 133 | ampChild = reflect.namedAny(ampChildPath) 134 | ampChildInstance = ampChild(*sys.argv[1:-2]) 135 | if runtime.platform.isWindows(): 136 | stdio.StandardIO(ampChildInstance) 137 | else: 138 | stdio.StandardIO(ampChildInstance, %s, %s) 139 | enter = getattr(ampChildInstance, '__enter__', None) 140 | if enter is not None: 141 | enter() 142 | try: 143 | reactor.run() 144 | except: 145 | if enter is not None: 146 | info = sys.exc_info() 147 | if not ampChildInstance.__exit__(*info): 148 | raise 149 | else: 150 | raise 151 | else: 152 | if enter is not None: 153 | ampChildInstance.__exit__(None, None, None) 154 | 155 | main(sys.argv[-2], sys.argv[-1]) 156 | """ % (TO_CHILD, FROM_CHILD) 157 | 158 | @implementer(iampoule.IStarter) 159 | class ProcessStarter(object): 160 | 161 | connectorFactory = AMPConnector 162 | def __init__(self, bootstrap=BOOTSTRAP, args=(), env={}, 163 | path=None, uid=None, gid=None, usePTY=0, 164 | packages=(), childReactor="select"): 165 | """ 166 | @param bootstrap: Startup code for the child process 167 | @type bootstrap: C{str} 168 | 169 | @param args: Arguments that should be supplied to every child 170 | created. 171 | @type args: C{tuple} of C{str} 172 | 173 | @param env: Environment variables that should be present in the 174 | child environment 175 | @type env: C{dict} 176 | 177 | @param path: Path in which to run the child 178 | @type path: C{str} 179 | 180 | @param uid: if defined, the uid used to run the new process. 181 | @type uid: C{int} 182 | 183 | @param gid: if defined, the gid used to run the new process. 184 | @type gid: C{int} 185 | 186 | @param usePTY: Should the child processes use PTY processes 187 | @type usePTY: 0 or 1 188 | 189 | @param packages: A tuple of packages that should be guaranteed 190 | to be importable in the child processes 191 | @type packages: C{tuple} of C{str} 192 | 193 | @param childReactor: a string that sets the reactor for child 194 | processes 195 | @type childReactor: C{str} 196 | """ 197 | self.bootstrap = bootstrap 198 | self.args = args 199 | self.env = env 200 | self.path = path 201 | self.uid = uid 202 | self.gid = gid 203 | self.usePTY = usePTY 204 | self.packages = ("ampoule",) + packages 205 | self.childReactor = childReactor 206 | 207 | def __repr__(self): 208 | """ 209 | Represent the ProcessStarter with a string. 210 | """ 211 | return """ProcessStarter(bootstrap=%r, 212 | args=%r, 213 | env=%r, 214 | path=%r, 215 | uid=%r, 216 | gid=%r, 217 | usePTY=%r, 218 | packages=%r, 219 | childReactor=%r)""" % (self.bootstrap, 220 | self.args, 221 | self.env, 222 | self.path, 223 | self.uid, 224 | self.gid, 225 | self.usePTY, 226 | self.packages, 227 | self.childReactor) 228 | 229 | def _checkRoundTrip(self, obj): 230 | """ 231 | Make sure that an object will properly round-trip through 'qual' and 232 | 'namedAny'. 233 | 234 | Raise a L{RuntimeError} if they aren't. 235 | """ 236 | tripped = reflect.namedAny(reflect.qual(obj)) 237 | if tripped is not obj: 238 | raise RuntimeError("importing %r is not the same as %r" % 239 | (reflect.qual(obj), obj)) 240 | 241 | def startAMPProcess(self, ampChild, ampParent=None, ampChildArgs=()): 242 | """ 243 | @param ampChild: a L{ampoule.child.AMPChild} subclass. 244 | @type ampChild: L{ampoule.child.AMPChild} 245 | 246 | @param ampParent: an L{amp.AMP} subclass that implements the parent 247 | protocol for this process pool 248 | @type ampParent: L{amp.AMP} 249 | """ 250 | self._checkRoundTrip(ampChild) 251 | fullPath = reflect.qual(ampChild) 252 | if ampParent is None: 253 | ampParent = amp.AMP 254 | prot = self.connectorFactory(ampParent()) 255 | args = ampChildArgs + (self.childReactor, fullPath) 256 | return self.startPythonProcess(prot, *args) 257 | 258 | 259 | def startPythonProcess(self, prot, *args): 260 | """ 261 | @param prot: a L{protocol.ProcessProtocol} subclass 262 | @type prot: L{protocol.ProcessProtocol} 263 | 264 | @param args: a tuple of arguments that will be added after the 265 | ones in L{self.args} to start the child process. 266 | 267 | @return: a tuple of the child process and the deferred finished. 268 | finished triggers when the subprocess dies for any reason. 269 | """ 270 | spawnProcess(prot, self.bootstrap, self.args+args, env=self.env, 271 | path=self.path, uid=self.uid, gid=self.gid, 272 | usePTY=self.usePTY, packages=self.packages) 273 | 274 | # XXX: we could wait for startup here, but ... is there really any 275 | # reason to? the pipe should be ready for writing. The subprocess 276 | # might not start up properly, but then, a subprocess might shut down 277 | # at any point too. So we just return amp and have this piece to be 278 | # synchronous. 279 | return prot.amp, prot.finished 280 | 281 | def spawnProcess(processProtocol, bootstrap, args=(), env={}, 282 | path=None, uid=None, gid=None, usePTY=0, 283 | packages=()): 284 | env = env.copy() 285 | 286 | pythonpath = [] 287 | for pkg in packages: 288 | p = os.path.split(imp.find_module(pkg)[1])[0] 289 | if p.startswith(os.path.join(sys.prefix, 'lib')): 290 | continue 291 | pythonpath.append(p) 292 | pythonpath = list(set(pythonpath)) 293 | pythonpath.extend(env.get('PYTHONPATH', '').split(os.pathsep)) 294 | env['PYTHONPATH'] = os.pathsep.join(pythonpath) 295 | args = (sys.executable, '-c', bootstrap) + args 296 | # childFDs variable is needed because sometimes child processes 297 | # misbehave and use stdout to output stuff that should really go 298 | # to stderr. Of course child process might even use the wrong FDs 299 | # that I'm using here, 3 and 4, so we are going to fix all these 300 | # issues when I add support for the configuration object that can 301 | # fix this stuff in a more configurable way. 302 | if IS_WINDOWS: 303 | return reactor.spawnProcess(processProtocol, sys.executable, args, 304 | env, path, uid, gid, usePTY) 305 | else: 306 | return reactor.spawnProcess(processProtocol, sys.executable, args, 307 | env, path, uid, gid, usePTY, 308 | childFDs={0:"w", 1:"r", 2:"r", 3:"w", 4:"r"}) 309 | -------------------------------------------------------------------------------- /ampoule/pool.py: -------------------------------------------------------------------------------- 1 | import time 2 | import random 3 | import heapq 4 | import itertools 5 | import functools 6 | import signal 7 | choice = random.choice 8 | now = time.time 9 | count = functools.partial(next, itertools.count()) 10 | pop = heapq.heappop 11 | 12 | from twisted import logger 13 | from twisted.internet import defer, task, error 14 | 15 | from ampoule import commands, main 16 | 17 | 18 | 19 | log = logger.Logger() 20 | 21 | 22 | STATS_TEMPLATE = u"""ProcessPool stats: 23 | workers: {w} 24 | timeout: {t} 25 | parent: {p} 26 | child: {c} 27 | max idle: {i} 28 | recycle after: {r} 29 | ProcessStarter: 30 | {s}""" 31 | 32 | 33 | try: 34 | DIE = signal.SIGKILL 35 | except AttributeError: 36 | # Windows doesn't have SIGKILL, let's just use SIGTERM then 37 | DIE = signal.SIGTERM 38 | 39 | 40 | class ProcessPool(object): 41 | """ 42 | This class generalizes the functionality of a pool of 43 | processes to which work can be dispatched. 44 | 45 | @ivar finished: Boolean flag, L{True} when the pool is finished. 46 | 47 | @ivar started: Boolean flag, L{True} when the pool is started. 48 | 49 | @ivar name: Optional name for the process pool 50 | 51 | @ivar min: Minimum number of subprocesses to set up 52 | 53 | @ivar max: Maximum number of subprocesses to set up 54 | 55 | @ivar maxIdle: Maximum number of seconds of indleness in a child 56 | 57 | @ivar starter: A process starter instance that provides 58 | L{iampoule.IStarter}. 59 | 60 | @ivar recycleAfter: Maximum number of calls before restarting a 61 | subprocess, 0 to not recycle. 62 | 63 | @ivar ampChild: The child AMP protocol subclass with the commands 64 | that the child should implement. 65 | 66 | @ivar ampParent: The parent AMP protocol subclass with the commands 67 | that the parent should implement. 68 | 69 | @ivar timeout: The general timeout (in seconds) for every child 70 | process call. 71 | """ 72 | 73 | finished = False 74 | started = False 75 | name = None 76 | 77 | def __init__(self, ampChild=None, ampParent=None, min=5, max=20, 78 | name=None, maxIdle=20, recycleAfter=500, starter=None, 79 | timeout=None, timeout_signal=DIE, ampChildArgs=()): 80 | self.starter = starter 81 | self.ampChildArgs = tuple(ampChildArgs) 82 | if starter is None: 83 | self.starter = main.ProcessStarter(packages=("twisted",)) 84 | self.ampParent = ampParent 85 | self.ampChild = ampChild 86 | if ampChild is None: 87 | from ampoule.child import AMPChild 88 | self.ampChild = AMPChild 89 | self.min = min 90 | self.max = max 91 | self.name = name 92 | self.maxIdle = maxIdle 93 | self.recycleAfter = recycleAfter 94 | self.timeout = timeout 95 | self.timeout_signal = timeout_signal 96 | self._queue = [] 97 | 98 | self.processes = set() 99 | self.ready = set() 100 | self.busy = set() 101 | self._finishCallbacks = {} 102 | self._lastUsage = {} 103 | self._calls = {} 104 | self.looping = task.LoopingCall(self._pruneProcesses) 105 | self.looping.start(maxIdle, now=False) 106 | 107 | def start(self, ampChild=None): 108 | """ 109 | Starts the ProcessPool with a given child protocol. 110 | 111 | @param ampChild: a L{ampoule.child.AMPChild} subclass. 112 | @type ampChild: L{ampoule.child.AMPChild} subclass 113 | """ 114 | if ampChild is not None and not self.started: 115 | self.ampChild = ampChild 116 | self.finished = False 117 | self.started = True 118 | return self.adjustPoolSize() 119 | 120 | def _pruneProcesses(self): 121 | """ 122 | Remove idle processes from the pool. 123 | """ 124 | n = now() 125 | d = [] 126 | for child, lastUse in self._lastUsage.items(): 127 | if len(self.processes) > self.min and (n - lastUse) > self.maxIdle: 128 | # we are setting lastUse when processing finishes, it 129 | # might be processing right now 130 | if child not in self.busy: 131 | # we need to remove this child from the ready set 132 | # and the processes set because otherwise it might 133 | # get calls from doWork 134 | self.ready.discard(child) 135 | self.processes.discard(child) 136 | d.append(self.stopAWorker(child)) 137 | return defer.DeferredList(d) 138 | 139 | def _pruneProcess(self, child): 140 | """ 141 | Remove every trace of the process from this instance. 142 | """ 143 | self.processes.discard(child) 144 | self.ready.discard(child) 145 | self.busy.discard(child) 146 | self._lastUsage.pop(child, None) 147 | self._calls.pop(child, None) 148 | self._finishCallbacks.pop(child, None) 149 | 150 | def _addProcess(self, child, finished): 151 | """ 152 | Adds the newly created child process to the pool. 153 | """ 154 | def fatal(reason, child): 155 | log.error( 156 | u'FATAL: Process exited.\n\t{r}', r=reason.getErrorMessage() 157 | ) 158 | self._pruneProcess(child) 159 | 160 | def dieGently(data, child): 161 | log.info(u'STOPPING: {s}', s=data) 162 | self._pruneProcess(child) 163 | 164 | self.processes.add(child) 165 | self.ready.add(child) 166 | finished.addCallback(dieGently, child).addErrback(fatal, child) 167 | self._finishCallbacks[child] = finished 168 | self._lastUsage[child] = now() 169 | self._calls[child] = 0 170 | self._catchUp() 171 | 172 | def _catchUp(self): 173 | """ 174 | If there are queued items in the list then run them. 175 | """ 176 | if self._queue: 177 | _, (d, command, kwargs) = pop(self._queue) 178 | self._cb_doWork(command, **kwargs).chainDeferred(d) 179 | 180 | def _handleTimeout(self, child): 181 | """ 182 | One of the children went timeout, we need to deal with it 183 | 184 | @param child: The child process 185 | @type child: L{child.AMPChild} 186 | """ 187 | try: 188 | child.transport.signalProcess(self.timeout_signal) 189 | except error.ProcessExitedAlready: 190 | # don't do anything then... we are too late 191 | # or we were too early to call 192 | pass 193 | 194 | def startAWorker(self): 195 | """ 196 | Start a worker and set it up in the system. 197 | """ 198 | if self.finished: 199 | # this is a race condition: basically if we call self.stop() 200 | # while a process is being recycled what happens is that the 201 | # process will be created anyway. By putting a check for 202 | # self.finished here we make sure that in no way we are creating 203 | # processes when the pool is stopped. 204 | # The race condition comes from the fact that: 205 | # stopAWorker() is asynchronous while stop() is synchronous. 206 | # so if you call: 207 | # pp.stopAWorker(child).addCallback(lambda _: pp.startAWorker()) 208 | # pp.stop() 209 | # You might end up with a dirty reactor due to the stop() 210 | # returning before the new process is created. 211 | return 212 | startAMPProcess = self.starter.startAMPProcess 213 | child, finished = startAMPProcess(self.ampChild, 214 | ampParent=self.ampParent, 215 | ampChildArgs=self.ampChildArgs) 216 | return self._addProcess(child, finished) 217 | 218 | def _cb_doWork(self, command, _timeout=None, _deadline=None, 219 | **kwargs): 220 | """ 221 | Go and call the command. 222 | 223 | @param command: The L{amp.Command} to be executed in the child 224 | @type command: L{amp.Command} 225 | 226 | @param _d: The deferred for the calling code. 227 | @type _d: L{defer.Deferred} 228 | 229 | @param _timeout: The timeout for this call only 230 | @type _timeout: C{int} 231 | @param _deadline: The deadline for this call only 232 | @type _deadline: C{int} 233 | """ 234 | timeoutCall = None 235 | deadlineCall = None 236 | 237 | def _returned(result, child, is_error=False): 238 | def cancelCall(call): 239 | if call is not None and call.active(): 240 | call.cancel() 241 | cancelCall(timeoutCall) 242 | cancelCall(deadlineCall) 243 | self.busy.discard(child) 244 | if not die: 245 | # we are not marked to be removed, so add us back to 246 | # the ready set and let's see if there's some catching 247 | # up to do 248 | self.ready.add(child) 249 | self._catchUp() 250 | else: 251 | # We should die and we do, then we start a new worker 252 | # to pick up stuff from the queue otherwise we end up 253 | # without workers and the queue will remain there. 254 | self.stopAWorker(child).addCallback(lambda _: self.startAWorker()) 255 | self._lastUsage[child] = now() 256 | # we can't do recycling here because it's too late and 257 | # the process might have received tons of calls already 258 | # which would make it run more calls than what is 259 | # configured to do. 260 | return result 261 | 262 | die = False 263 | child = self.ready.pop() 264 | self.busy.add(child) 265 | self._calls[child] += 1 266 | 267 | # Let's see if this call goes over the recycling barrier 268 | if self.recycleAfter and self._calls[child] >= self.recycleAfter: 269 | # it does so mark this child, using a closure, to be 270 | # removed at the end of the call. 271 | die = True 272 | 273 | # If the command doesn't require a response then callRemote 274 | # returns nothing, so we prepare for that too. 275 | # We also need to guard against timeout errors for child 276 | # and local timeout parameter overrides the global one 277 | if _timeout == 0: 278 | timeout = _timeout 279 | else: 280 | timeout = _timeout or self.timeout 281 | 282 | if timeout is not None: 283 | from twisted.internet import reactor 284 | timeoutCall = reactor.callLater(timeout, self._handleTimeout, child) 285 | 286 | if _deadline is not None: 287 | from twisted.internet import reactor 288 | delay = max(0, _deadline - reactor.seconds()) 289 | deadlineCall = reactor.callLater(delay, self._handleTimeout, 290 | child) 291 | 292 | return defer.maybeDeferred(child.callRemote, command, **kwargs 293 | ).addCallback(_returned, child 294 | ).addErrback(_returned, child, is_error=True) 295 | 296 | def callRemote(self, *args, **kwargs): 297 | """ 298 | Proxy call to keep the API homogeneous across twisted's RPCs 299 | """ 300 | return self.doWork(*args, **kwargs) 301 | 302 | def doWork(self, command, **kwargs): 303 | """ 304 | Sends the command to one child. 305 | 306 | @param command: an L{amp.Command} type object. 307 | @type command: L{amp.Command} 308 | 309 | @param kwargs: dictionary containing the arguments for the command. 310 | """ 311 | if self.ready: # there are unused processes, let's use them 312 | return self._cb_doWork(command, **kwargs) 313 | else: 314 | if len(self.processes) < self.max: 315 | # no unused but we can start some new ones 316 | # since startAWorker is synchronous we won't have a 317 | # race condition here in case of multiple calls to 318 | # doWork, so we will end up in the else clause in case 319 | # of such calls: 320 | # Process pool with min=1, max=1, recycle_after=1 321 | # [call(Command) for x in xrange(BIG_NUMBER)] 322 | self.startAWorker() 323 | return self._cb_doWork(command, **kwargs) 324 | else: 325 | # No one is free... just queue up and wait for a process 326 | # to start and pick up the first item in the queue. 327 | d = defer.Deferred() 328 | self._queue.append((count(), (d, command, kwargs))) 329 | return d 330 | 331 | def stopAWorker(self, child=None): 332 | """ 333 | Gently stop a child so that it's not restarted anymore 334 | 335 | @param child: an L{ampoule.child.AmpChild} type object. 336 | @type child: L{ampoule.child.AmpChild} or None 337 | 338 | """ 339 | if child is None: 340 | if self.ready: 341 | child = self.ready.pop() 342 | else: 343 | child = choice(list(self.processes)) 344 | child.callRemote(commands.Shutdown 345 | # This is needed for timeout handling, the reason is pretty hard 346 | # to explain but I'll try to: 347 | # There's another small race condition in the system. If the 348 | # child process is shut down by a signal and you try to stop 349 | # the process pool immediately afterwards, like tests would do, 350 | # the child AMP object would still be in the system and trying 351 | # to call the command Shutdown on it would result in the same 352 | # errback that we got originally, for this reason we need to 353 | # trap it now so that it doesn't raise by not being handled. 354 | # Does this even make sense to you? 355 | ).addErrback(lambda reason: reason.trap(error.ProcessTerminated)) 356 | return self._finishCallbacks[child] 357 | 358 | def adjustPoolSize(self, min=None, max=None): 359 | """ 360 | Change the pool size to be at least min and less than max, 361 | useful when you change the values of max and min in the instance 362 | and you want the pool to adapt to them. 363 | """ 364 | if min is None: 365 | min = self.min 366 | if max is None: 367 | max = self.max 368 | 369 | assert min >= 0, 'minimum is negative' 370 | assert min <= max, 'minimum is greater than maximum' 371 | 372 | self.min = min 373 | self.max = max 374 | 375 | l = [] 376 | if self.started: 377 | 378 | for i in range(len(self.processes)-self.max): 379 | l.append(self.stopAWorker()) 380 | while len(self.processes) < self.min: 381 | self.startAWorker() 382 | 383 | return defer.DeferredList(l).addCallback(lambda _: self.dumpStats()) 384 | 385 | def stop(self): 386 | """ 387 | Stops the process protocol. 388 | """ 389 | self.finished = True 390 | l = [self.stopAWorker(process) for process in self.processes] 391 | def _cb(_): 392 | if self.looping.running: 393 | self.looping.stop() 394 | 395 | return defer.DeferredList(l).addCallback(_cb) 396 | 397 | def dumpStats(self): 398 | log.info( 399 | STATS_TEMPLATE, 400 | w=len(self.processes), 401 | t=self.timeout, 402 | p=self.ampParent, 403 | c=self.ampChild, 404 | i=self.maxIdle, 405 | r=self.recycleAfter, 406 | s=self.starter 407 | ) 408 | 409 | pp = None 410 | 411 | def deferToAMPProcess(command, **kwargs): 412 | """ 413 | Helper function that sends a command to the default process pool 414 | and returns a deferred that fires when the result of the 415 | subprocess computation is ready. 416 | 417 | @param command: an L{amp.Command} subclass 418 | @param kwargs: dictionary containing the arguments for the command. 419 | 420 | @return: a L{defer.Deferred} with the data from the subprocess. 421 | """ 422 | global pp 423 | if pp is None: 424 | pp = ProcessPool() 425 | return pp.start().addCallback(lambda _: pp.doWork(command, **kwargs)) 426 | return pp.doWork(command, **kwargs) 427 | -------------------------------------------------------------------------------- /ampoule/test/test_process.py: -------------------------------------------------------------------------------- 1 | 2 | from signal import SIGHUP 3 | import os 4 | import os.path 5 | from io import BytesIO as sio 6 | import tempfile 7 | 8 | from twisted.internet import error, defer, reactor 9 | from twisted.python import failure 10 | from twisted.trial import unittest 11 | from twisted.protocols import amp 12 | from ampoule import main, child, commands, pool 13 | 14 | class ShouldntHaveBeenCalled(Exception): 15 | pass 16 | 17 | def _raise(_): 18 | raise ShouldntHaveBeenCalled(_) 19 | 20 | class _FakeT(object): 21 | closeStdinCalled = False 22 | def __init__(self, s): 23 | self.s = s 24 | 25 | def closeStdin(self): 26 | self.closeStdinCalled = True 27 | 28 | def write(self, data): 29 | self.s.write(data) 30 | 31 | class FakeAMP(object): 32 | connector = None 33 | reason = None 34 | def __init__(self, s): 35 | self.s = s 36 | 37 | def makeConnection(self, connector): 38 | if self.connector is not None: 39 | raise Exception("makeConnection called twice") 40 | self.connector = connector 41 | 42 | def connectionLost(self, reason): 43 | if self.reason is not None: 44 | raise Exception("connectionLost called twice") 45 | self.reason = reason 46 | 47 | def dataReceived(self, data): 48 | self.s.write(data) 49 | 50 | class Exit(amp.Command): 51 | arguments = [] 52 | response = [] 53 | 54 | class Ping(amp.Command): 55 | arguments = [(b'data', amp.String())] 56 | response = [(b'response', amp.String())] 57 | 58 | class Pong(amp.Command): 59 | arguments = [(b'data', amp.String())] 60 | response = [(b'response', amp.String())] 61 | 62 | class Pid(amp.Command): 63 | response = [(b'pid', amp.Integer())] 64 | 65 | class Reactor(amp.Command): 66 | response = [(b'classname', amp.String())] 67 | 68 | class NoResponse(amp.Command): 69 | arguments = [(b'arg', amp.String())] 70 | requiresAnswer = False 71 | 72 | class GetResponse(amp.Command): 73 | response = [(b"response", amp.String())] 74 | 75 | class Child(child.AMPChild): 76 | def ping(self, data): 77 | return self.callRemote(Pong, data=data) 78 | Ping.responder(ping) 79 | 80 | class PidChild(child.AMPChild): 81 | def pid(self): 82 | import os 83 | return {'pid': os.getpid()} 84 | Pid.responder(pid) 85 | 86 | class NoResponseChild(child.AMPChild): 87 | _set = False 88 | def noresponse(self, arg): 89 | self._set = arg 90 | return {} 91 | NoResponse.responder(noresponse) 92 | 93 | def getresponse(self): 94 | return {"response": self._set} 95 | GetResponse.responder(getresponse) 96 | 97 | class ReactorChild(child.AMPChild): 98 | def reactor(self): 99 | from twisted.internet import reactor 100 | return {'classname': reactor.__class__.__name__.encode()} 101 | Reactor.responder(reactor) 102 | 103 | class First(amp.Command): 104 | arguments = [(b'data', amp.String())] 105 | response = [(b'response', amp.String())] 106 | 107 | class Second(amp.Command): 108 | pass 109 | 110 | class WaitingChild(child.AMPChild): 111 | deferred = None 112 | def first(self, data): 113 | self.deferred = defer.Deferred() 114 | return self.deferred.addCallback(lambda _: {'response': data}) 115 | First.responder(first) 116 | def second(self): 117 | self.deferred.callback('') 118 | return {} 119 | Second.responder(second) 120 | 121 | class Die(amp.Command): 122 | pass 123 | 124 | class BadChild(child.AMPChild): 125 | def die(self): 126 | self.shutdown = False 127 | self.transport.loseConnection() 128 | return {} 129 | Die.responder(die) 130 | 131 | 132 | class ExitingChild(child.AMPChild): 133 | def exit(self): 134 | import os 135 | os._exit(33) 136 | Exit.responder(exit) 137 | 138 | class Write(amp.Command): 139 | response = [(b"response", amp.String())] 140 | 141 | 142 | class Writer(child.AMPChild): 143 | 144 | def __init__(self, data=b'hello'): 145 | child.AMPChild.__init__(self) 146 | if isinstance(data, str): 147 | # this is passing through sys.argv, argv is unconditionally unicode 148 | # on py3; see https://bugs.python.org/issue8776 149 | data = data.encode() 150 | self.data = data 151 | 152 | def write(self): 153 | return {'response': self.data} 154 | Write.responder(write) 155 | 156 | 157 | class GetCWD(amp.Command): 158 | 159 | response = [(b"cwd", amp.Unicode())] 160 | 161 | 162 | class TempDirChild(child.AMPChild): 163 | 164 | def __init__(self, directory=None): 165 | child.AMPChild.__init__(self) 166 | self.directory = directory 167 | 168 | def __enter__(self): 169 | directory = tempfile.mkdtemp() 170 | os.chdir(directory) 171 | if self.directory is not None: 172 | os.mkdir(self.directory) 173 | os.chdir(self.directory) 174 | 175 | def __exit__(self, exc_type, exc_val, exc_tb): 176 | cwd = os.getcwd() 177 | os.chdir('..') 178 | os.rmdir(cwd) 179 | 180 | def getcwd(self): 181 | return {'cwd': os.getcwd()} 182 | GetCWD.responder(getcwd) 183 | 184 | 185 | class TestAMPConnector(unittest.TestCase): 186 | def setUp(self): 187 | """ 188 | The only reason why this method exists is to let 'trial ampoule' 189 | to install the signal handlers (#3178 for reference). 190 | """ 191 | super(TestAMPConnector, self).setUp() 192 | d = defer.Deferred() 193 | reactor.callLater(0, d.callback, None) 194 | return d 195 | 196 | def _makeConnector(self, s, sa): 197 | a = FakeAMP(sa) 198 | ac = main.AMPConnector(a) 199 | assert ac.name is not None 200 | ac.transport = _FakeT(s) 201 | return ac 202 | 203 | def test_protocol(self): 204 | """ 205 | Test that outReceived writes to AMP and that it triggers the 206 | finished deferred once the process ended. 207 | """ 208 | s = sio() 209 | sa = sio() 210 | ac = self._makeConnector(s, sa) 211 | 212 | for x in range(99): 213 | ac.childDataReceived(4, str(x).encode("ascii")) 214 | 215 | ac.processEnded(failure.Failure(error.ProcessDone(0))) 216 | return ac.finished.addCallback( 217 | lambda _: self.assertEqual(sa.getvalue(), b''.join( 218 | str(x).encode("ascii") for x in range(99) 219 | )) 220 | ) 221 | 222 | def test_protocol_failing(self): 223 | """ 224 | Test that a failure in the process termination is correctly 225 | propagated to the finished deferred. 226 | """ 227 | s = sio() 228 | sa = sio() 229 | ac = self._makeConnector(s, sa) 230 | 231 | ac.finished.addCallback(_raise) 232 | fail = failure.Failure(error.ProcessTerminated()) 233 | self.assertFailure(ac.finished, error.ProcessTerminated) 234 | ac.processEnded(fail) 235 | 236 | def test_startProcess(self): 237 | """ 238 | Test that startProcess actually starts a subprocess and that 239 | it receives data back from the process through AMP. 240 | """ 241 | s = sio() 242 | a = FakeAMP(s) 243 | STRING = b"ciao" 244 | BOOT = """\ 245 | import sys, os 246 | def main(arg): 247 | os.write(4, arg.encode("utf-8")) 248 | main(sys.argv[1]) 249 | """ 250 | starter = main.ProcessStarter(bootstrap=BOOT, 251 | args=(STRING,), 252 | packages=("twisted", "ampoule")) 253 | 254 | amp, finished = starter.startPythonProcess(main.AMPConnector(a)) 255 | return finished.addCallback(lambda _: self.assertEquals(s.getvalue(), STRING)) 256 | 257 | def test_failing_deferToProcess(self): 258 | """ 259 | Test failing subprocesses and the way they terminate and preserve 260 | failing information. 261 | """ 262 | s = sio() 263 | a = FakeAMP(s) 264 | STRING = b"ciao" 265 | BOOT = """\ 266 | import sys 267 | def main(arg): 268 | raise Exception(arg) 269 | main(sys.argv[1]) 270 | """ 271 | starter = main.ProcessStarter(bootstrap=BOOT, args=(STRING,), packages=("twisted", "ampoule")) 272 | ready, finished = starter.startPythonProcess(main.AMPConnector(a), "I'll be ignored") 273 | 274 | self.assertFailure(finished, error.ProcessTerminated) 275 | finished.addErrback(lambda reason: self.assertEquals(reason.getMessage(), STRING)) 276 | return finished 277 | 278 | def test_env_setting(self): 279 | """ 280 | Test that and environment variable passed to the process starter 281 | is correctly passed to the child process. 282 | """ 283 | s = sio() 284 | a = FakeAMP(s) 285 | STRING = b"ciao" 286 | BOOT = """\ 287 | import sys, io, os 288 | def main(): 289 | with io.open(4, 'w' + ('b' if bytes is str else '')) as f: 290 | f.write(os.environ['FOOBAR']) 291 | main() 292 | """ 293 | starter = main.ProcessStarter(bootstrap=BOOT, 294 | packages=("twisted", "ampoule"), 295 | env={"FOOBAR": STRING}) 296 | amp, finished = starter.startPythonProcess(main.AMPConnector(a), "I'll be ignored") 297 | return finished.addCallback(lambda _: self.assertEquals(s.getvalue(), STRING)) 298 | 299 | def test_startAMPProcess(self): 300 | """ 301 | Test that you can start an AMP subprocess and that it correctly 302 | accepts commands and correctly answers them. 303 | """ 304 | STRING = b"ciao" 305 | 306 | starter = main.ProcessStarter(packages=("twisted", "ampoule")) 307 | c, finished = starter.startAMPProcess(child.AMPChild) 308 | c.callRemote(commands.Echo, data=STRING 309 | ).addCallback(lambda response: 310 | self.assertEquals(response['response'], STRING) 311 | ).addCallback(lambda _: c.callRemote(commands.Shutdown)) 312 | return finished 313 | 314 | def test_BootstrapContext(self): 315 | starter = main.ProcessStarter(packages=('twisted', 'ampoule')) 316 | c, finished = starter.startAMPProcess(TempDirChild) 317 | cwd = [] 318 | def checkBootstrap(response): 319 | cwd.append(response['cwd']) 320 | self.assertNotEquals(cwd, os.getcwd()) 321 | d = c.callRemote(GetCWD) 322 | d.addCallback(checkBootstrap) 323 | d.addCallback(lambda _: c.callRemote(commands.Shutdown)) 324 | finished.addCallback(lambda _: self.assertFalse(os.path.exists(cwd[0]))) 325 | return finished 326 | 327 | def test_BootstrapContextInstance(self): 328 | starter = main.ProcessStarter(packages=('twisted', 'ampoule')) 329 | c, finished = starter.startAMPProcess(TempDirChild, 330 | ampChildArgs=('foo',)) 331 | cwd = [] 332 | def checkBootstrap(response): 333 | cwd.append(response['cwd']) 334 | self.assertTrue(cwd[0].endswith('/foo')) 335 | d = c.callRemote(GetCWD) 336 | d.addCallback(checkBootstrap) 337 | d.addCallback(lambda _: c.callRemote(commands.Shutdown)) 338 | finished.addCallback(lambda _: self.assertFalse(os.path.exists(cwd[0]))) 339 | return finished 340 | 341 | def test_startAMPAndParentProtocol(self): 342 | """ 343 | Test that you can start an AMP subprocess and the children can 344 | call methods on their parent. 345 | """ 346 | DATA = b"CIAO" 347 | APPEND = b"123" 348 | 349 | class Parent(amp.AMP): 350 | def pong(self, data): 351 | return {'response': DATA+APPEND} 352 | Pong.responder(pong) 353 | 354 | starter = main.ProcessStarter(packages=("twisted", "ampoule")) 355 | 356 | subp, finished = starter.startAMPProcess(ampChild=Child, ampParent=Parent) 357 | subp.callRemote(Ping, data=DATA 358 | ).addCallback(lambda response: 359 | self.assertEquals(response['response'], DATA+APPEND) 360 | ).addCallback(lambda _: subp.callRemote(commands.Shutdown)) 361 | return finished 362 | 363 | def test_roundtripError(self): 364 | """ 365 | Test that invoking a child using an unreachable class raises 366 | a L{RunTimeError} . 367 | """ 368 | class Child(child.AMPChild): 369 | pass 370 | 371 | starter = main.ProcessStarter(packages=("twisted", "ampoule")) 372 | 373 | self.assertRaises(RuntimeError, starter.startAMPProcess, ampChild=Child) 374 | 375 | class TestProcessPool(unittest.TestCase): 376 | 377 | def test_startStopWorker(self): 378 | """ 379 | Test that starting and stopping a worker keeps the state of 380 | the process pool consistent. 381 | """ 382 | pp = pool.ProcessPool() 383 | self.assertEquals(pp.started, False) 384 | self.assertEquals(pp.finished, False) 385 | self.assertEquals(pp.processes, set()) 386 | self.assertEquals(pp._finishCallbacks, {}) 387 | 388 | def _checks(): 389 | self.assertEquals(pp.started, False) 390 | self.assertEquals(pp.finished, False) 391 | self.assertEquals(len(pp.processes), 1) 392 | self.assertEquals(len(pp._finishCallbacks), 1) 393 | return pp.stopAWorker() 394 | 395 | def _closingUp(_): 396 | self.assertEquals(pp.started, False) 397 | self.assertEquals(pp.finished, False) 398 | self.assertEquals(len(pp.processes), 0) 399 | self.assertEquals(pp._finishCallbacks, {}) 400 | pp.startAWorker() 401 | return _checks().addCallback(_closingUp).addCallback(lambda _: pp.stop()) 402 | 403 | def test_startAndStop(self): 404 | """ 405 | Test that a process pool's start and stop method create the 406 | expected number of workers and keep state consistent in the 407 | process pool. 408 | """ 409 | pp = pool.ProcessPool() 410 | self.assertEquals(pp.started, False) 411 | self.assertEquals(pp.finished, False) 412 | self.assertEquals(pp.processes, set()) 413 | self.assertEquals(pp._finishCallbacks, {}) 414 | 415 | def _checks(_): 416 | self.assertEquals(pp.started, True) 417 | self.assertEquals(pp.finished, False) 418 | self.assertEquals(len(pp.processes), pp.min) 419 | self.assertEquals(len(pp._finishCallbacks), pp.min) 420 | return pp.stop() 421 | 422 | def _closingUp(_): 423 | self.assertEquals(pp.started, True) 424 | self.assertEquals(pp.finished, True) 425 | self.assertEquals(len(pp.processes), 0) 426 | self.assertEquals(pp._finishCallbacks, {}) 427 | return pp.start().addCallback(_checks).addCallback(_closingUp) 428 | 429 | def test_adjustPoolSize(self): 430 | """ 431 | Test that calls to pool.adjustPoolSize are correctly handled. 432 | """ 433 | pp = pool.ProcessPool(min=10) 434 | self.assertEquals(pp.started, False) 435 | self.assertEquals(pp.finished, False) 436 | self.assertEquals(pp.processes, set()) 437 | self.assertEquals(pp._finishCallbacks, {}) 438 | 439 | def _resize1(_): 440 | self.assertEquals(pp.started, True) 441 | self.assertEquals(pp.finished, False) 442 | self.assertEquals(len(pp.processes), pp.min) 443 | self.assertEquals(len(pp._finishCallbacks), pp.min) 444 | return pp.adjustPoolSize(min=2, max=3) 445 | 446 | def _resize2(_): 447 | self.assertEquals(pp.started, True) 448 | self.assertEquals(pp.finished, False) 449 | self.assertEquals(pp.max, 3) 450 | self.assertEquals(pp.min, 2) 451 | self.assertEquals(len(pp.processes), pp.max) 452 | self.assertEquals(len(pp._finishCallbacks), pp.max) 453 | 454 | def _resize3(_): 455 | self.assertRaises(AssertionError, pp.adjustPoolSize, min=-1, max=5) 456 | self.assertRaises(AssertionError, pp.adjustPoolSize, min=5, max=1) 457 | return pp.stop() 458 | 459 | return pp.start( 460 | ).addCallback(_resize1 461 | ).addCallback(_resize2 462 | ).addCallback(_resize3) 463 | 464 | def test_childRestart(self): 465 | """ 466 | Test that a failing child process is immediately restarted. 467 | """ 468 | pp = pool.ProcessPool(ampChild=BadChild, min=1) 469 | STRING = b"DATA" 470 | 471 | def _checks(_): 472 | d = next(iter(pp._finishCallbacks.values())) 473 | pp.doWork(Die).addErrback(lambda _: None) 474 | return d.addBoth(_checksAgain) 475 | 476 | def _checksAgain(_): 477 | return pp.doWork(commands.Echo, data=STRING 478 | ).addCallback(lambda result: self.assertEquals(result['response'], STRING)) 479 | 480 | return pp.start( 481 | ).addCallback(_checks 482 | ).addCallback(lambda _: pp.stop()) 483 | 484 | def test_parentProtocolChange(self): 485 | """ 486 | Test that the father can use an AMP protocol too. 487 | """ 488 | DATA = b"CIAO" 489 | APPEND = b"123" 490 | 491 | class Parent(amp.AMP): 492 | def pong(self, data): 493 | return {'response': DATA+APPEND} 494 | Pong.responder(pong) 495 | 496 | pp = pool.ProcessPool(ampChild=Child, ampParent=Parent) 497 | def _checks(_): 498 | return pp.doWork(Ping, data=DATA 499 | ).addCallback(lambda response: 500 | self.assertEquals(response['response'], DATA+APPEND) 501 | ) 502 | 503 | return pp.start().addCallback(_checks).addCallback(lambda _: pp.stop()) 504 | 505 | 506 | def test_deferToAMPProcess(self): 507 | """ 508 | Test that deferToAMPProcess works as expected. 509 | """ 510 | def cleanupGlobalPool(): 511 | d = pool.pp.stop() 512 | pool.pp = None 513 | return d 514 | self.addCleanup(cleanupGlobalPool) 515 | 516 | STRING = b"CIAOOOO" 517 | d = pool.deferToAMPProcess(commands.Echo, data=STRING) 518 | d.addCallback(self.assertEquals, {"response": STRING}) 519 | return d 520 | 521 | def test_checkStateInPool(self): 522 | """ 523 | Test that busy and ready lists are correctly maintained. 524 | """ 525 | pp = pool.ProcessPool(ampChild=WaitingChild) 526 | 527 | DATA = b"foobar" 528 | 529 | def _checks(_): 530 | d = pp.callRemote(First, data=DATA) 531 | self.assertEquals(pp.started, True) 532 | self.assertEquals(pp.finished, False) 533 | self.assertEquals(len(pp.processes), pp.min) 534 | self.assertEquals(len(pp._finishCallbacks), pp.min) 535 | self.assertEquals(len(pp.ready), pp.min-1) 536 | self.assertEquals(len(pp.busy), 1) 537 | child = pp.busy.pop() 538 | pp.busy.add(child) 539 | child.callRemote(Second) 540 | return d 541 | 542 | return pp.start( 543 | ).addCallback(_checks 544 | ).addCallback(lambda _: pp.stop()) 545 | 546 | def test_growingToMax(self): 547 | """ 548 | Test that the pool grows over time until it reaches max processes. 549 | """ 550 | MAX = 5 551 | pp = pool.ProcessPool(ampChild=WaitingChild, min=1, max=MAX) 552 | 553 | def _checks(_): 554 | self.assertEquals(pp.started, True) 555 | self.assertEquals(pp.finished, False) 556 | self.assertEquals(len(pp.processes), pp.min) 557 | self.assertEquals(len(pp._finishCallbacks), pp.min) 558 | 559 | D = b"DATA" 560 | d = [pp.doWork(First, data=D) for x in range(MAX)] 561 | 562 | self.assertEquals(pp.started, True) 563 | self.assertEquals(pp.finished, False) 564 | self.assertEquals(len(pp.processes), pp.max) 565 | self.assertEquals(len(pp._finishCallbacks), pp.max) 566 | 567 | [child.callRemote(Second) for child in pp.processes] 568 | return defer.DeferredList(d) 569 | 570 | return pp.start( 571 | ).addCallback(_checks 572 | ).addCallback(lambda _: pp.stop()) 573 | 574 | def test_growingToMaxAndShrinking(self): 575 | """ 576 | Test that the pool grows but after 'idle' time the number of 577 | processes goes back to the minimum. 578 | """ 579 | 580 | MAX = 5 581 | MIN = 1 582 | IDLE = 1 583 | pp = pool.ProcessPool(ampChild=WaitingChild, min=MIN, max=MAX, maxIdle=IDLE) 584 | 585 | def _checks(_): 586 | self.assertEquals(pp.started, True) 587 | self.assertEquals(pp.finished, False) 588 | self.assertEquals(len(pp.processes), pp.min) 589 | self.assertEquals(len(pp._finishCallbacks), pp.min) 590 | 591 | D = b"DATA" 592 | d = [pp.doWork(First, data=D) for x in range(MAX)] 593 | 594 | self.assertEquals(pp.started, True) 595 | self.assertEquals(pp.finished, False) 596 | self.assertEquals(len(pp.processes), pp.max) 597 | self.assertEquals(len(pp._finishCallbacks), pp.max) 598 | 599 | [child.callRemote(Second) for child in pp.processes] 600 | return defer.DeferredList(d).addCallback(_realChecks) 601 | 602 | def _realChecks(_): 603 | from twisted.internet import reactor 604 | d = defer.Deferred() 605 | def _cb(): 606 | def __(_): 607 | try: 608 | self.assertEquals(pp.started, True) 609 | self.assertEquals(pp.finished, False) 610 | self.assertEquals(len(pp.processes), pp.min) 611 | self.assertEquals(len(pp._finishCallbacks), pp.min) 612 | d.callback(None) 613 | except Exception as e: 614 | d.errback(e) 615 | return pp._pruneProcesses().addCallback(__) 616 | # just to be shure we are called after the pruner 617 | pp.looping.stop() # stop the looping, we don't want it to 618 | # this right here 619 | reactor.callLater(IDLE, _cb) 620 | return d 621 | 622 | return pp.start( 623 | ).addCallback(_checks 624 | ).addCallback(lambda _: pp.stop()) 625 | 626 | def test_recycling(self): 627 | """ 628 | Test that after a given number of calls subprocesses are 629 | recycled. 630 | """ 631 | MAX = 1 632 | MIN = 1 633 | RECYCLE_AFTER = 1 634 | pp = pool.ProcessPool(ampChild=PidChild, min=MIN, max=MAX, recycleAfter=RECYCLE_AFTER) 635 | self.addCleanup(pp.stop) 636 | 637 | def _checks(_): 638 | self.assertEquals(pp.started, True) 639 | self.assertEquals(pp.finished, False) 640 | self.assertEquals(len(pp.processes), pp.min) 641 | self.assertEquals(len(pp._finishCallbacks), pp.min) 642 | return pp.doWork(Pid 643 | ).addCallback(lambda response: response['pid']) 644 | 645 | def _checks2(pid): 646 | return pp.doWork(Pid 647 | ).addCallback(lambda response: response['pid'] 648 | ).addCallback(self.assertNotEquals, pid) 649 | 650 | 651 | d = pp.start() 652 | d.addCallback(_checks) 653 | d.addCallback(_checks2) 654 | return d 655 | 656 | def test_recyclingProcessFails(self): 657 | """ 658 | A process exiting with a non-zero exit code when recycled does not get 659 | multiple processes started to replace it. 660 | """ 661 | MAX = 1 662 | MIN = 1 663 | RECYCLE_AFTER = 1 664 | RECYCLE_AFTER = 1 665 | pp = pool.ProcessPool(ampChild=ExitingChild, min=MIN, max=MAX, recycleAfter=RECYCLE_AFTER) 666 | self.addCleanup(pp.stop) 667 | 668 | def _checks(_): 669 | self.assertEquals(pp.started, True) 670 | self.assertEquals(pp.finished, False) 671 | self.assertEquals(len(pp.processes), pp.min) 672 | self.assertEquals(len(pp._finishCallbacks), pp.min) 673 | child = list(pp.ready)[0] 674 | finished = pp._finishCallbacks[child] 675 | return pp.doWork(Exit).addBoth(lambda _: finished) 676 | 677 | def _checks2(_): 678 | self.assertEquals(len(pp.processes), pp.max) 679 | 680 | d = pp.start() 681 | d.addCallback(_checks) 682 | d.addCallback(_checks2) 683 | return d 684 | 685 | 686 | def test_recyclingWithQueueOverload(self): 687 | """ 688 | Test that we get the correct number of different results when 689 | we overload the pool of calls. 690 | """ 691 | MAX = 5 692 | MIN = 1 693 | RECYCLE_AFTER = 10 694 | CALLS = 60 695 | pp = pool.ProcessPool(ampChild=PidChild, min=MIN, max=MAX, recycleAfter=RECYCLE_AFTER) 696 | self.addCleanup(pp.stop) 697 | 698 | def _check(results): 699 | s = set() 700 | for succeed, response in results: 701 | s.add(response['pid']) 702 | 703 | # For the first C{MAX} calls, each is basically guaranteed to go to 704 | # a different child. After that, though, there are no guarantees. 705 | # All the rest might go to a single child, since the child to 706 | # perform a job is selected arbitrarily from the "ready" set. Fair 707 | # distribution of jobs needs to be implemented; right now it's "set 708 | # ordering" distribution of jobs. 709 | self.assertTrue(len(s) > MAX) 710 | 711 | def _work(_): 712 | l = [pp.doWork(Pid) for x in range(CALLS)] 713 | d = defer.DeferredList(l) 714 | return d.addCallback(_check) 715 | d = pp.start() 716 | d.addCallback(_work) 717 | return d 718 | 719 | 720 | def test_disableProcessRecycling(self): 721 | """ 722 | Test that by setting 0 to recycleAfter we actually disable process recycling. 723 | """ 724 | MAX = 1 725 | MIN = 1 726 | RECYCLE_AFTER = 0 727 | pp = pool.ProcessPool(ampChild=PidChild, min=MIN, max=MAX, recycleAfter=RECYCLE_AFTER) 728 | 729 | def _checks(_): 730 | self.assertEquals(pp.started, True) 731 | self.assertEquals(pp.finished, False) 732 | self.assertEquals(len(pp.processes), pp.min) 733 | self.assertEquals(len(pp._finishCallbacks), pp.min) 734 | return pp.doWork(Pid 735 | ).addCallback(lambda response: response['pid']) 736 | 737 | def _checks2(pid): 738 | return pp.doWork(Pid 739 | ).addCallback(lambda response: response['pid'] 740 | ).addCallback(self.assertEquals, pid 741 | ).addCallback(lambda _: pid) 742 | 743 | def finish(reason): 744 | return pp.stop().addCallback(lambda _: reason) 745 | 746 | return pp.start( 747 | ).addCallback(_checks 748 | ).addCallback(_checks2 749 | ).addCallback(_checks2 750 | ).addCallback(finish) 751 | 752 | def test_changeChildrenReactor(self): 753 | """ 754 | Test that by passing the correct argument children change their 755 | reactor type. 756 | """ 757 | MAX = 1 758 | MIN = 1 759 | FIRST = "select" 760 | SECOND = "poll" 761 | 762 | def checkDefault(): 763 | pp = pool.ProcessPool( 764 | starter=main.ProcessStarter( 765 | childReactor=FIRST, 766 | packages=("twisted", "ampoule")), 767 | ampChild=ReactorChild, min=MIN, max=MAX) 768 | pp.start() 769 | return (pp.doWork(Reactor) 770 | .addCallback(self.assertEquals, 771 | {'classname': b"SelectReactor"}) 772 | .addCallback(lambda _: pp.stop())) 773 | def checkPool(_): 774 | pp = pool.ProcessPool( 775 | starter=main.ProcessStarter( 776 | childReactor=SECOND, 777 | packages=("twisted", "ampoule")), 778 | ampChild=ReactorChild, min=MIN, max=MAX) 779 | pp.start() 780 | return (pp.doWork(Reactor) 781 | .addCallback(self.assertEquals, 782 | {'classname': b"PollReactor"}) 783 | .addCallback(lambda _: pp.stop())) 784 | 785 | return checkDefault( 786 | ).addCallback(checkPool) 787 | try: 788 | from select import poll 789 | except ImportError: 790 | test_changeChildrenReactor.skip = "This architecture doesn't support select.poll, I can't run this test" 791 | 792 | def test_commandsWithoutResponse(self): 793 | """ 794 | Test that if we send a command without a required answer we 795 | actually don't have any problems. 796 | """ 797 | DATA = b"hello" 798 | pp = pool.ProcessPool(ampChild=NoResponseChild, min=1, max=1) 799 | 800 | def _check(_): 801 | return pp.doWork(GetResponse 802 | ).addCallback(self.assertEquals, {"response": DATA}) 803 | 804 | def _work(_): 805 | return pp.doWork(NoResponse, arg=DATA) 806 | 807 | return pp.start( 808 | ).addCallback(_work 809 | ).addCallback(_check 810 | ).addCallback(lambda _: pp.stop()) 811 | 812 | def test_supplyChildArgs(self): 813 | """Ensure that arguments for the child constructor are passed in.""" 814 | pp = pool.ProcessPool(Writer, ampChildArgs=['body'], min=0) 815 | def _check(result): 816 | return pp.doWork(Write).addCallback( 817 | self.assertEquals, {'response': b'body'}) 818 | 819 | return pp.start( 820 | ).addCallback(_check 821 | ).addCallback(lambda _: pp.stop()) 822 | 823 | def processTimeoutTest(self, timeout): 824 | pp = pool.ProcessPool(WaitingChild, min=1, max=1) 825 | 826 | def _work(_): 827 | d = pp.callRemote(First, data=b"ciao", _timeout=timeout) 828 | self.assertFailure(d, error.ProcessTerminated) 829 | return d 830 | 831 | return pp.start( 832 | ).addCallback(_work 833 | ).addCallback(lambda _: pp.stop()) 834 | 835 | def test_processTimeout(self): 836 | """ 837 | Test that a call that doesn't finish within the given timeout 838 | time is correctly handled. 839 | """ 840 | return self.processTimeoutTest(1) 841 | 842 | def test_processTimeoutZero(self): 843 | """ 844 | Test that the process is correctly handled when the timeout is zero. 845 | """ 846 | return self.processTimeoutTest(0) 847 | 848 | def test_processDeadline(self): 849 | pp = pool.ProcessPool(WaitingChild, min=1, max=1) 850 | 851 | def _work(_): 852 | d = pp.callRemote(First, data=b"ciao", _deadline=reactor.seconds()) 853 | self.assertFailure(d, error.ProcessTerminated) 854 | return d 855 | 856 | return pp.start( 857 | ).addCallback(_work 858 | ).addCallback(lambda _: pp.stop()) 859 | 860 | def test_processBeforeDeadline(self): 861 | pp = pool.ProcessPool(PidChild, min=1, max=1) 862 | 863 | def _work(_): 864 | d = pp.callRemote(Pid, _deadline=reactor.seconds() + 10) 865 | d.addCallback(lambda result: self.assertNotEqual(result['pid'], 0)) 866 | return d 867 | 868 | return pp.start( 869 | ).addCallback(_work 870 | ).addCallback(lambda _: pp.stop()) 871 | 872 | def test_processTimeoutSignal(self): 873 | """ 874 | Test that a call that doesn't finish within the given timeout 875 | time is correctly handled. 876 | """ 877 | pp = pool.ProcessPool(WaitingChild, min=1, max=1, 878 | timeout_signal=SIGHUP) 879 | 880 | def _work(_): 881 | d = pp.callRemote(First, data=b"ciao", _timeout=1) 882 | d.addCallback(lambda d: self.fail()) 883 | text = 'signal %d' % SIGHUP 884 | d.addErrback(lambda f: self.assertIn(text, str(f.value))) 885 | return d 886 | 887 | return pp.start( 888 | ).addCallback(_work 889 | ).addCallback(lambda _: pp.stop()) 890 | 891 | def test_processGlobalTimeout(self): 892 | """ 893 | Test that a call that doesn't finish within the given global 894 | timeout time is correctly handled. 895 | """ 896 | pp = pool.ProcessPool(WaitingChild, min=1, max=1, timeout=1) 897 | 898 | def _work(_): 899 | d = pp.callRemote(First, data=b"ciao") 900 | self.assertFailure(d, error.ProcessTerminated) 901 | return d 902 | 903 | return pp.start( 904 | ).addCallback(_work 905 | ).addCallback(lambda _: pp.stop()) 906 | --------------------------------------------------------------------------------