├── .gitignore
├── DESCRIPTION.rst
├── INSTALL
├── LICENSE
├── README.md
├── daemons
    ├── broker
    │   ├── bonjour_register.py
    │   └── broker_daemon.py
    ├── core
    │   ├── base_module.py
    │   ├── core_daemon.py
    │   ├── module_mapper.py
    │   ├── module_reducer.py
    │   ├── module_vertex.py
    │   └── remap.py
    ├── initiator
    │   ├── base_module.py
    │   ├── http_interface.py
    │   ├── initiator.py
    │   ├── module_mapper.py
    │   ├── module_reducer.py
    │   ├── module_vertex.py
    │   ├── monitor.py
    │   └── planner.py
    ├── lib
    │   ├── __init__.py
    │   ├── bonjour_detect.py
    │   ├── remap_constants.py
    │   └── remap_utils.py
    ├── node
    │   ├── node_daemon.py
    │   └── node_hardware.py
    └── vertexbroker
    │   ├── .gitignore
    │   ├── AUTHORS
    │   ├── COPYING
    │   ├── ChangeLog
    │   ├── INSTALL
    │   ├── Makefile.am
    │   ├── NEWS
    │   ├── README
    │   ├── aclocal.m4
    │   ├── autogen.sh
    │   ├── bin
    │       └── vertexbroker-0.1.1.tar.gz
    │   ├── configure.ac
    │   └── src
    │       ├── .gitignore
    │       ├── Makefile.am
    │       ├── control.c
    │       ├── control.h
    │       ├── globals.h
    │       └── main.c
├── dev
    └── basic_elements.py
├── examples
    ├── collation
    │   ├── appconfig.json
    │   └── collation.py
    ├── highest
    │   ├── appconfig.json
    │   └── highest.py
    ├── pagerank
    │   ├── appconfig.json
    │   └── pagerank.py
    ├── secondarysort
    │   ├── appconfig.json
    │   └── secondarysort.py
    └── wordcount
    │   ├── appconfig.json
    │   └── wordcount.py
├── images
    ├── flow.png
    └── flow.svg
├── prepare_env.py
├── setup.py
├── testdata
    ├── csv
    │   └── insurance_sample.csv
    ├── graph
    │   ├── graph1.txt
    │   └── graph2.txt
    ├── gutenberg
    │   ├── alice-in-wonderland.txt
    │   ├── beowulf.txt
    │   ├── legends-tales-poems.txt
    │   ├── picture-dorian-gray.txt
    │   ├── pride-prejudice.txt
    │   └── tomsawyer.txt
    ├── highest
    │   └── test.txt
    └── html
    │   ├── elemtree.html
    │   └── remap-design.html
└── tests
    ├── adhoc
        ├── app-jobstart-reducer-3cores.sh
        ├── app-jobstart-reducer.sh
        ├── app-jobstart.sh
        ├── app-showhands.sh
        ├── nanocat-sub-vbroker.sh
        └── nanocat-sub.sh
    ├── examples
        ├── app-jobstart-3cores.sh
        └── run_wordcount.sh
    ├── scripts
        ├── run_publisher.py
        └── run_subscriber.py
    └── unit
        └── UNITTESTS_GO_HERE


/.gitignore:
--------------------------------------------------------------------------------
 1 | # Byte-compiled / optimized / DLL files
 2 | __pycache__/
 3 | *.py[cod]
 4 | 
 5 | # C extensions
 6 | *.so
 7 | 
 8 | # Distribution / packaging
 9 | .Python
10 | env/
11 | build/
12 | develop-eggs/
13 | dist/
14 | downloads/
15 | eggs/
16 | .eggs/
17 | parts/
18 | sdist/
19 | var/
20 | *.egg-info/
21 | .installed.cfg
22 | *.egg
23 | 
24 | # PyInstaller
25 | #  Usually these files are written by a python script from a template
26 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
27 | *.manifest
28 | *.spec
29 | 
30 | # Installer logs
31 | pip-log.txt
32 | pip-delete-this-directory.txt
33 | 
34 | # Unit test / coverage reports
35 | htmlcov/
36 | .tox/
37 | .coverage
38 | .coverage.*
39 | .cache
40 | nosetests.xml
41 | coverage.xml
42 | *,cover
43 | 
44 | # Translations
45 | *.mo
46 | *.pot
47 | 
48 | # Django stuff:
49 | *.log
50 | 
51 | # Sphinx documentation
52 | docs/_build/
53 | 
54 | # PyBuilder
55 | target/
56 | 
57 | 3rdparty/
58 | 
59 | 


--------------------------------------------------------------------------------
/DESCRIPTION.rst:
--------------------------------------------------------------------------------
 1 | Remap
 2 | =====
 3 | 
 4 | Remap is an distribution execution engine (for lack of a better description) written in 100% pure python. You can kick off distributed processes like map/reduce from a remap monitor. The platform figures out which nodes and cores are available to run the jobs on and it will distribute the work across those nodes and track its progress.
 5 | 
 6 | At the moment map/reduce is implemented and working; that is... the monitor, management and execution bit. You need to use a REST client to kick off the job from the monitor with a bit of simple json. Work is in progress to make that process a bit easier to understand.
 7 | 
 8 | Remap is very new and targeted at small-scale installations. The whole idea of remap is for developers, researchers and tinkerers to be able to put an environment together very quickly, so you can get to your algorithm implementations as quickly as possible.
 9 | 
10 | So the focus of this implementation is on:
11 | - Very low complexity and number of steps for installation
12 | - Can run locally on a single machine or on a couple of machines without much effort
13 | - Sensible, minimalistic API that doesn't get in your way (you have full control over the data flow in code)
14 | - Relies on mature projects to solve 'the difficult stuff' (distributed filesystems, high performance messaging, etc)
15 | - Input and output files are always in an interpretable format, but you decide that.
16 | 
17 | If this sounds interesting, please follow the steps on the wiki to take it for a quick test run:
18 | 
19 | https://github.com/gtoonstra/remap/wiki
20 | 
21 | 


--------------------------------------------------------------------------------
/INSTALL:
--------------------------------------------------------------------------------
 1 | This install process will improve in the future, but this helps to get started.
 2 | 
 3 | This is all pure python code, so you should be able to run on windows, mac osx or linux.
 4 | On linux and windows, you need to have avahi or bonjour-equivalent things installed, on osx this
 5 | just works.
 6 | 
 7 | The following works on Ubuntu 14.10:
 8 | 
 9 | > sudo apt-get install libnanomsg0 python3.4
10 | 
11 | If nanomsg is not available (14.04?, windows?, osx?), try:
12 | 
13 | > cd /tmp
14 | > git clone git@github.com:nanomsg/nanomsg.git
15 | > cd nanomsg
16 | > ./autogen.sh
17 | > ./configure
18 | > make
19 | > sudo make install
20 | 
21 | Then install the following python packages:
22 | 
23 | > sudo apt-get install python3-pip
24 | > sudo python3.4 -m pip install nanomsg
25 | > cd /tmp
26 | > git clone git@github.com:depl0y/pybonjour-python3.git
27 | > cd pybonjour-python3
28 | > sudo python3.4 setup.py install
29 | 
30 | Now your machine is ready to run the code.
31 | 
32 | Find a directory where you want to create your remap directory in, then:
33 | 
34 | > git clone https://github.com/gtoonstra/remap.git
35 | > cd remap
36 | > ls
37 | 
38 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | The MIT License (MIT)
 2 | 
 3 | Copyright (c) 2015 
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 
23 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # remap
 2 | 
 3 | Remap is capable of running many cores in a distributed fashion over the network and orchestrate work. It's impossible to make a 100% generic daemon that does this, so there are modules involved that direct the process flow and status tracking of the workers on the network, which eventually determines how the algorithm behaves.
 4 | 
 5 | Remap is mostly written in pure python with the exception of a specialized broker for "vertex" tasks. The platform figures out if and how many nodes and cores are available to run your algorithm. The way this works is that you have a python script file with your map/reduce or vertex functions, which must be available to all nodes (either locally copied or from a network drive). Then you just start the job from the monitor and watch the fireworks.
 6 | 
 7 | Remap uses an HTTP Rest interface. See the "http_interface" file (which should be really easy to read) for the endpoints. Remap is integrated with airflow too (see airbnb), so you can have pretty advanced workflows where map/reduce or vertex jobs get involved.
 8 | 
 9 | Remap is too new for serious production work, so it's targeted at small-scale installations. The whole idea of remap is for developers, researchers and tinkerers to be able to put an environment together very quickly, so you can work on your algorithm implementations as quickly as possible.
10 | 
11 | Other issues I think are important:
12 | - Minimal code quantity and steps for installation
13 | - Run locally on a single machine or on a couple of machines without much effort
14 | - Sensible, minimalistic API that doesn't get in your way (you have full control over the data flow in code, maybe even too much)
15 | - Rely on mature projects to solve 'difficult stuff' (distributed filesystems, high performance messaging, etc)
16 | - Input and output files can remain in an interpretable format, but you eventually decide that.
17 | 
18 | If this sounds interesting, please follow the steps on the wiki to take it for a quick test run:
19 | 
20 | https://github.com/gtoonstra/remap/wiki
21 | 
22 | ##Help wanted
23 | 
24 | I'm looking for people who're interested in helping out. There's work to be done on design, plumbing, reliability testing, UI for a dashboard and a small nodejs server to interact with the cluster. See the wiki for details.
25 | 
26 | ##Contact
27 | 
28 | There's a project mailing list for users and developers:
29 | 
30 | http://www.freelists.org/list/remap
31 | 
32 | You can also mail me at my gmail account. Use my github username.
33 | 


--------------------------------------------------------------------------------
/daemons/broker/bonjour_register.py:
--------------------------------------------------------------------------------
 1 | import select
 2 | import sys
 3 | import pybonjour
 4 | import threading
 5 | import logging
 6 | 
 7 | logger = logging.getLogger(__name__)
 8 | 
 9 | class BonjourRegistration(object):
10 |     def __init__( self, name, regtype, port ):
11 |         self.name = name
12 |         self.regtype = regtype
13 |         self.port = port
14 |         self.t = threading.Thread(target=self.run, args=())
15 |         self.t.daemon = True
16 | 
17 |     def start( self ):
18 |         self.t.start()
19 | 
20 |     def run( self ):
21 |         logger.info( "Registering broker as %s under %s"%( self.name, self.regtype ))
22 |         sdRef = pybonjour.DNSServiceRegister(name = self.name,
23 |             regtype = self.regtype,
24 |             port = self.port,
25 |             callBack = self.register_callback)
26 | 
27 |         try:
28 |             try:
29 |                 while True:
30 |                     ready = select.select([sdRef], [], [])
31 |                     if sdRef in ready[0]:
32 |                         pybonjour.DNSServiceProcessResult(sdRef)
33 |             except KeyboardInterrupt:
34 |                 pass
35 |         finally:
36 |             sdRef.close()
37 | 
38 |     def register_callback(self, sdRef, flags, errorCode, name, regtype, domain):
39 |         if errorCode == pybonjour.kDNSServiceErr_NoError:
40 |             logger.info( "Registered broker as bonjour service" )
41 |         else:
42 |             logger.info( "Registration of broker over bonjour failed." )
43 | 
44 | 


--------------------------------------------------------------------------------
/daemons/broker/broker_daemon.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | import os
 3 | import nanomsg as nn
 4 | from nanomsg import wrapper as nn_wrapper
 5 | import pybonjour
 6 | import logging
 7 | import time
 8 | from bonjour_register import BonjourRegistration
 9 | 
10 | logging.basicConfig( level=logging.INFO ) 
11 | 
12 | # logger = logging.getLogger(__name__)
13 | logger = logging.getLogger("BrokerDaemon")
14 | 
15 | if __name__ == "__main__":
16 |     # Interactions between brokers not implemented yet, probably requires C
17 |     # and 'poll' functionality to look for any messages.
18 |     # 
19 |     # Which means that brokers do not need to know about connected clients yet...
20 |     # 
21 |     #bpub = nn.socket( nn.PUB )
22 |     #bpub.bind( "tcp://0.0.0.0:8787" )
23 | 
24 |     logger.info( "Starting Broker" )
25 | 
26 |     # Local pub and sub
27 |     lpub = nn.Socket( nn.PUB, domain=nn.AF_SP_RAW )
28 |     lpub.bind( "tcp://0.0.0.0:8687" )
29 |     lsub = nn.Socket( nn.SUB, domain=nn.AF_SP_RAW )
30 |     lsub.bind( "tcp://0.0.0.0:8686" )
31 |     lsub.set_string_option( nn.SUB, nn.SUB_SUBSCRIBE, "")
32 | 
33 |     # expose this service over bonjour
34 |     bv = BonjourRegistration( "vertexbroker", "_vertexremap._tcp", 8689 )
35 |     bv.start()
36 | 
37 |     time.sleep( 1 )
38 | 
39 |     br = BonjourRegistration( "broker", "_remap._tcp", 8687 )
40 |     br.start()
41 | 
42 |     # move messages between them
43 |     dev = nn.Device( lsub, lpub )
44 |     logger.info( "Broker started" )
45 |     dev.start()
46 | 
47 | 


--------------------------------------------------------------------------------
/daemons/core/base_module.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import sys
 3 | import logging
 4 | import time
 5 | 
 6 | parent = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
 7 | sys.path.insert(0, parent)
 8 | 
 9 | import lib.remap_utils as remap_utils
10 | import lib.remap_constants as remap_constants
11 | from lib.remap_utils import RemapException
12 | 
13 | class WorkerBase(object):
14 |     def __init__( self, app, appconfig, workdata ):
15 |         self.app = app
16 |         self.appconfig = appconfig
17 |         self.workdata = workdata
18 |         self.jobid = workdata["jobid"]
19 |         self.remaproot = appconfig["remaproot"]
20 |         self.progress = 0
21 | 
22 |     def module_manages_progress( self ):
23 |         return False
24 | 
25 | 
26 | 


--------------------------------------------------------------------------------
/daemons/core/core_daemon.py:
--------------------------------------------------------------------------------
  1 | import sys
  2 | import os
  3 | import nanomsg as nn
  4 | from nanomsg import wrapper as nn_wrapper
  5 | import logging
  6 | import time
  7 | import json
  8 | 
  9 | parent = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
 10 | sys.path.insert(0, parent)
 11 | 
 12 | import lib.remap_utils as remap_utils
 13 | import lib.remap_constants as remap_constants
 14 | from lib.remap_utils import RemapException
 15 | 
 16 | # A core daemon connects to the node daemon.
 17 | # Core daemons manage the map/reduce processes doing the actual work.
 18 | # 
 19 | # The core alternates between work and reading messages from the node
 20 | # The volume of messages isn't very high, it's mostly about planning and
 21 | # orchestration of the services and relaying progress, status and health messages.
 22 | # 
 23 | 
 24 | logging.basicConfig( level=logging.INFO ) 
 25 | 
 26 | # logger = logging.getLogger(__name__)
 27 | logger = logging.getLogger("CoreDaemon")
 28 | 
 29 | class CoreDaemon( object ):
 30 |     def __init__(self, remaproot):
 31 |         self.remaproot = remaproot
 32 |         self.pid = os.getpid()
 33 |         self.coreid = "unknown"
 34 |         self.sub = None
 35 |         self.pub = None
 36 |         self.jobid = None
 37 |         self.priority = 0
 38 |         self.ts_workRequested = 0
 39 |         self.keepWorking = True
 40 |         self.processed = 0
 41 |         self.total_size = 0
 42 |         self.input = None
 43 |         self.prevkey = None
 44 |         self.prevlist = None
 45 | 
 46 |     # The core daemon connects to the node first.
 47 |     def setup_node( self ):
 48 |         self.sub = nn.Socket( nn.SUB )
 49 |         self.sub.set_string_option( nn.SUB, nn.SUB_SUBSCRIBE, "" )
 50 |         self.sub.connect("ipc:///tmp/node_sub.ipc")
 51 |         self.pub = nn.Socket( nn.PUB )
 52 |         self.pub.connect("ipc:///tmp/node_pub.ipc")
 53 | 
 54 |     def set_node_timeout( self, rcv_timeout ):
 55 |         # Apply a timeout for receiving messages from node.
 56 |         self.sub.set_int_option( nn.SOL_SOCKET, nn.RCVTIMEO, rcv_timeout )
 57 | 
 58 |     def process_node_messages( self ):
 59 |         try:
 60 |             msg = self.sub.recv()
 61 |             msgprefix, data = remap_utils.unpack_msg( msg )
 62 | 
 63 |             recipientid,msgtype,senderid = remap_utils.split_prefix(msgprefix)
 64 | 
 65 |             if recipientid == self.coreid:
 66 |                 # This is directed at this core specifically, so it's more of a req/rep type
 67 |                 self.process_personal_message( msgtype, senderid, data )
 68 |             elif recipientid == "global":
 69 |                 self.process_global_message( msgtype, senderid, data )
 70 |             elif recipientid == "local":
 71 |                 self.process_local_message( msgtype, senderid, data )    
 72 |             elif recipientid == "notlocal":
 73 |                 self.process_global_message( msgtype, senderid, data )
 74 |             elif recipientid == "node":
 75 |                 self.process_node_message( msgtype, senderid, data )
 76 |             else:
 77 |                 logger.info("Unrecognized message type %s, sent by %s"%( msgtype, senderid ) )
 78 |             return True
 79 |         except nn.NanoMsgAPIError as e:
 80 |             return False
 81 | 
 82 |     def forward( self, prefix, data ):
 83 |         msg = remap_utils.pack_msg( prefix, data )
 84 |         self.pub.send( msg )
 85 | 
 86 |     def subscribe( self, prefix, doSub ):
 87 |         if doSub:
 88 |             self.sub.set_string_option( nn.SUB, nn.SUB_SUBSCRIBE, prefix )
 89 |             self.forward( "node._sub.%s"%(self.coreid), { "prefix":prefix } )
 90 |         else:
 91 |             self.sub.set_string_option( nn.SUB, nn.SUB_UNSUBSCRIBE, prefix )
 92 |             self.forward( "node._unsub.%s"%(self.coreid), { "prefix":prefix } )
 93 | 
 94 |     # this function registers the new core process with node
 95 |     def register( self ):
 96 |         self.set_node_timeout( 500 )
 97 |         msgid = remap_utils.unique_id()
 98 | 
 99 |         logger.info( "Registering with node" )
100 |         self.forward( "node._hello.%d"%(self.pid), {"msgid":msgid,"pid":self.pid,"priority":self.priority} )
101 | 
102 |         # The while loop will terminate as soon as node stops sending messages,
103 |         # so this should be safe to do.
104 |         while True:
105 |             try:
106 |                 msg = self.sub.recv()
107 |                 msgprefix, data = remap_utils.unpack_msg( msg )
108 |                 recipientid,msgtype,senderid = remap_utils.split_prefix(msgprefix)
109 |                 if msgtype != "_hey":
110 |                     continue
111 |                 
112 |                 replymsgid = remap_utils.safe_get(data, "msgid")
113 |                 if replymsgid == msgid:
114 |                     # this is us
115 |                     self.coreid = remap_utils.safe_get(data, "coreid" )
116 |                     self.sub.set_string_option( nn.SUB, nn.SUB_UNSUBSCRIBE, "" )
117 |                     self.sub.set_string_option( nn.SUB, nn.SUB_SUBSCRIBE, "global" )
118 |                     self.sub.set_string_option( nn.SUB, nn.SUB_SUBSCRIBE, "local" )
119 |                     self.sub.set_string_option( nn.SUB, nn.SUB_SUBSCRIBE, "notlocal" )
120 |                     self.sub.set_string_option( nn.SUB, nn.SUB_SUBSCRIBE, self.coreid )
121 | 
122 |                     logger.info( "Received coreid %s."%( self.coreid ))
123 |                     return True
124 |             except nn.NanoMsgAPIError as e:
125 |                 logger.error( "Node is currently not available." )
126 |                 break
127 |         logger.error( "Registration failed" )
128 |         return False
129 | 
130 |     def load_plugin(self, name):
131 |         try:
132 |             mod = __import__("module_%s" % name)
133 |             return mod
134 |         except ImportError as ie:
135 |             raise RemapException( "No such worker type: %s"%( name ))
136 | 
137 |     def process_personal_message( self, msgtype, sender, workdata ):
138 |         if msgtype == "_work":
139 |             # {"appmodule": "wordcount", "appconfig": "wordcount/appconfig.json", "priority": 0, "inputfile": "gutenberg/picture-dorian-gray.txt", "type": "mapper", "jobid": "74d74370-1cca-11e5-afab-90e6ba78077a"}
140 |             #
141 |             # Data is the work to be executed
142 |             # Prepare to start that work
143 |             #
144 |             logger.info("Received work item from node")
145 | 
146 |             self.jobid = workdata["jobid"]
147 |             self.workertype = workdata["type"]
148 |             self.appdir = workdata["appdir"]
149 |             appconfig = None
150 | 
151 |             try:
152 |                 configfile = os.path.join( self.remaproot, "job", self.jobid, "app", workdata["appconfig"] )
153 |                 ap = open( configfile, 'r' )
154 |                 contents = ap.read()
155 |                 appconfig = json.loads( contents )
156 |             except IOError as ioe:
157 |                 raise RemapException( "App config not found" ) from ioe
158 | 
159 |             if "module" not in appconfig:
160 |                 raise RemapException( "App config missing 'module' specification." )
161 | 
162 |             modulename = appconfig["module"]
163 |             app = __import__(modulename, fromlist = ["*"])
164 | 
165 |             appconfig["remaproot"] = self.remaproot
166 | 
167 |             plugin = self.load_plugin( self.workertype )
168 |             self.worker = plugin.create_worker( app, appconfig, workdata )
169 |         else:
170 |             logger.warn("Unknown personal message received from node: %s"%( msgtype ))
171 | 
172 |     def process_global_message( self, msgtype, sender, data ):
173 |         pass
174 | 
175 |     def process_local_message( self, msgtype, sender, data ):
176 |         pass
177 | 
178 |     def process_node_message( self, msgtype, sender, data ):
179 |         if msgtype == "_plzreg":
180 |             self.register()
181 |         else:
182 |             logger.warn("Unknown node message received from node: %s"%( msgtype ))
183 | 
184 |     def send_status( self ):
185 |         if self.jobid != None:
186 |             if not self.worker.module_manages_progress():
187 |                 data = self.worker.status()
188 |                 data["type"] = self.workertype
189 |                 self.forward( "%s.corestatus.%s"%(self.jobid, self.coreid), data )
190 |             else:
191 |                 # Still need to send a message to node daemon, which manages processes at local level.
192 |                 self.forward( "node._status.%s"%(self.coreid), {} )
193 | 
194 |     def do_more_work( self ):
195 |         # Check if we have some work to do already
196 |         if self.jobid != None:
197 |             if not self.worker.work():
198 |                 result, data = self.worker.result()
199 |                 data["type"] = self.workertype
200 |                 self.forward( "%s.%s.%s"%(self.jobid, result, self.coreid), data )
201 |                 return False
202 |         else:
203 |             # No work yet, so let's request some and otherwise wait 5 seconds until
204 |             # we go away.
205 |             if self.ts_workRequested > 0:
206 |                 if (time.time() - self.ts_workRequested) < 5:
207 |                     # prevent loop with 100% cpu utilization
208 |                     # wait at most 5 seconds for work to drop in.
209 |                     time.sleep(0.1)
210 |                     return True
211 |                 else:
212 |                     logger.error("The work to be processed never arrived.")
213 |                     return False
214 | 
215 |             logger.info( "Grabbing work item from node" )
216 |             self.ts_workRequested = time.time()
217 |             self.forward( "node._todo.%s"%(self.coreid), {} )
218 |         return True
219 | 
220 |     def shutdown( self ):
221 |         self.sub.close()
222 |         self.pub.close()
223 | 
224 | if __name__ == "__main__":
225 | 
226 |     # Initialization of the core. We need a core id to work with (from node).
227 |     core = CoreDaemon( sys.argv[1] )
228 |     core.setup_node()
229 | 
230 |     # wait 50ms for node comms to be established
231 |     time.sleep( 0.05 )
232 | 
233 |     # 5 attempts to register
234 |     attempts = 5
235 |     registered = False
236 |     while ( attempts > 0 ):
237 |         try:
238 |             if core.register():
239 |                 registered = True
240 |                 break
241 |             attempts = attempts - 1
242 |         except RemapException as re:
243 |             logger.exception( re )
244 |             attempts = attempts - 1
245 | 
246 |     if not registered:
247 |         logger.error( "Could not register with node to get a core id. Exiting." )
248 |         sys.exit(-1)
249 | 
250 |     core.set_node_timeout( 0 )
251 | 
252 |     while( core.keepWorking ):
253 |         try:
254 |             while (core.process_node_messages()):
255 |                 pass
256 |         except RemapException as re:
257 |             logger.exception( re )
258 | 
259 |         try:
260 |             if not core.do_more_work():
261 |                 logger.info("The core finished processing")
262 |                 core.keepWorking = False
263 |                 # allow time for any messages to be sent
264 |                 time.sleep( 0.1 )
265 |             else:
266 |                 core.send_status()
267 |         except RemapException as re:
268 |             logger.exception( re )
269 |             # take other actions
270 | 
271 |     core.shutdown()
272 | 
273 | 


--------------------------------------------------------------------------------
/daemons/core/module_mapper.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import sys
 3 | import logging
 4 | import time
 5 | 
 6 | parent = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
 7 | sys.path.insert(0, parent)
 8 | 
 9 | import lib.remap_utils as remap_utils
10 | import lib.remap_constants as remap_constants
11 | from lib.remap_utils import RemapException
12 | from base_module import WorkerBase
13 | 
14 | logging.basicConfig( level=logging.INFO ) 
15 | 
16 | # logger = logging.getLogger(__name__)
17 | logger = logging.getLogger("Mapper")
18 | 
19 | def create_worker( app, appconfig, workdata ):
20 |     return Mapper( app, appconfig, workdata )
21 | 
22 | class Mapper(WorkerBase):
23 |     def __init__( self, app, appconfig, workdata ):
24 |         WorkerBase.__init__( self, app, appconfig, workdata )
25 |         self.workerid = workdata["workid"]
26 | 
27 |         inputfile = os.path.join( self.remaproot, "data", self.workdata["inputfile"] )
28 |         outputdir = os.path.join( self.remaproot, "job", self.jobid, "part" )
29 | 
30 |         self.input = self.app.create_mapper_reader( inputfile )
31 |         self.outputdir = outputdir
32 |         self.partitions = {}
33 | 
34 |     def status( self ):
35 |         return {"inputfile":self.workdata["inputfile"],"progress":self.progress}
36 | 
37 |     def result( self ):
38 |         if self.input.isComplete():
39 |             return "complete", {"inputfile":self.workdata["inputfile"]}
40 | 
41 |         return "fail", {"inputfile":self.workdata["inputfile"]}
42 | 
43 |     def work( self ):
44 |         if self.input.isComplete():
45 |             return False
46 | 
47 |         # so, do some work
48 |         for k1, v1 in self.input.read():
49 |             for part, k2, v2 in self.app.map( k1, v1 ):
50 |                 if part not in self.partitions:
51 |                     self.partitions[ part ] = self.app.create_mapper_partitioner( self.outputdir, part, self.workerid )
52 |                 self.partitions[ part ].store( k2, v2 )
53 | 
54 |             p = self.input.progress()
55 |             if p > self.progress+5:
56 |                 self.progress = int(p)
57 |                 break
58 | 
59 |         if self.input.isComplete():
60 |             self.progress = 100
61 |             self.input.close()
62 |             for part in self.partitions:
63 |                 self.partitions[part].sort_flush_close()
64 | 
65 |         return True
66 | 
67 | 


--------------------------------------------------------------------------------
/daemons/core/module_reducer.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import sys
 3 | import logging
 4 | import time
 5 | import heapq
 6 | from operator import itemgetter
 7 | 
 8 | parent = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
 9 | sys.path.insert(0, parent)
10 | 
11 | import lib.remap_utils as remap_utils
12 | import lib.remap_constants as remap_constants
13 | from lib.remap_utils import RemapException
14 | from base_module import WorkerBase
15 | 
16 | def create_worker( app, appconfig, workdata ):
17 |     return Reducer( app, appconfig, workdata )
18 | 
19 | class Reducer(WorkerBase):
20 |     def __init__( self, app, appconfig, workdata ):
21 |         WorkerBase.__init__( self, app, appconfig, workdata )
22 |         self.total_size = 0
23 |         self.prevkey = None
24 | 
25 |         # This is a reducer operation
26 |         inputdir = os.path.join( self.remaproot, "job", self.jobid, "part", self.workdata["partition"] )
27 |         outputdir = os.path.join( self.remaproot, "data", self.workdata["outputdir"] )
28 | 
29 |         self.reducerfiles = sorted(os.listdir( inputdir ))
30 |         self.inputdir = inputdir
31 |         self.numparts = len(self.reducerfiles)
32 |         self.fraction = 100.0 / self.numparts
33 |         self.completedparts = 0
34 |         self.outputdir = outputdir
35 |         self.partition = self.workdata["partition"]
36 |         self.reducerWriter = self.app.create_reducer_writer( self.outputdir, self.partition )
37 | 
38 |         self.sources = []
39 |         for filename in self.reducerfiles:
40 |             f = self.app.create_reducer_reader( os.path.join( self.inputdir, filename ))
41 |             self.sources.append( f )
42 |             self.total_size = self.total_size + f.filesize
43 | 
44 |         decorated = [
45 |             ((key,list_of_values,recsize) for key,list_of_values,recsize in f.read())
46 |             for f in self.sources]
47 |         self.merged = heapq.merge(*decorated)
48 | 
49 |     def status( self ):
50 |         return {"partition":self.partition,"progress":self.progress}
51 | 
52 |     def result( self ):
53 |         if len(self.sources) == 0:
54 |             return "complete", {"partition":self.partition}
55 | 
56 |         return "fail", {"partition":self.partition}
57 | 
58 |     def work( self ):
59 |         if len(self.sources) == 0:
60 |             return False
61 | 
62 |         readrec = False
63 |         for k2,v2,recsize in self.merged:
64 |             readrec = True
65 |             if self.prevkey == None:
66 |                 # Initialize the very first step
67 |                 self.prevkey = k2
68 |                 self.prevlist = v2
69 |                 self.processed = recsize
70 |             elif self.prevkey != k2:
71 |                 # The key changed. Dump all values of previous step
72 |                 for k3,v3 in self.app.reduce( self.prevkey, self.prevlist ):
73 |                     self.reducerWriter.store( k3, v3 )
74 |                 self.prevkey = k2
75 |                 self.prevlist = v2
76 |                 self.processed = self.processed + recsize
77 |             else:
78 |                 # Add another record to the list
79 |                 self.prevlist = self.prevlist + v2
80 |                 self.processed = self.processed + recsize
81 | 
82 |             p = (self.processed / self.total_size) * 100
83 |             if p > self.progress+5:
84 |                 self.progress = int(p)
85 |                 # breaking out of the loop to check up on messages
86 |                 break
87 | 
88 |         if not readrec:
89 |             # done
90 |             self.progress = 100
91 |             for f in self.sources:
92 |                 f.close()
93 |             self.sources = []
94 |             self.reducerWriter.close()
95 | 
96 |         return True
97 | 
98 | 


--------------------------------------------------------------------------------
/daemons/core/module_vertex.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import sys
  3 | import logging
  4 | import time
  5 | import nanomsg as nn
  6 | 
  7 | parent = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
  8 | sys.path.insert(0, parent)
  9 | 
 10 | import lib.remap_utils as remap_utils
 11 | import lib.remap_constants as remap_constants
 12 | from lib.remap_utils import RemapException
 13 | from lib.bonjour_detect import BonjourResolver
 14 | from base_module import WorkerBase
 15 | 
 16 | logging.basicConfig( level=logging.INFO ) 
 17 | 
 18 | # logger = logging.getLogger(__name__)
 19 | logger = logging.getLogger("Vertex")
 20 | 
 21 | def create_worker( app, appconfig, workdata ):
 22 |     return Vertex( app, appconfig, workdata )
 23 | 
 24 | MODE_IDLE = 0
 25 | MODE_MSGS = 1
 26 | MODE_HALT = 2
 27 | MODE_RUN  = 3
 28 | MODE_PROCESS = 4
 29 | 
 30 | class Vertex(WorkerBase):
 31 |     def __init__( self, app, appconfig, workdata ):
 32 |         WorkerBase.__init__( self, app, appconfig, workdata )
 33 |         self.surveyorname = workdata["hostname"]
 34 |         self.vsub = nn.Socket( nn.SUB, domain=nn.AF_SP )
 35 |         self.vpub = nn.Socket( nn.PUB, domain=nn.AF_SP )
 36 |         self.broker_address = None
 37 | 
 38 |         self.bonjour = BonjourResolver( "_vertexremap._tcp", self.cb_broker_changed )
 39 |         self.bonjour.start()
 40 | 
 41 |         inputfile = os.path.join( self.remaproot, "data", self.workdata["inputfile"] )
 42 |         outputdir = os.path.join( self.remaproot, "job", self.jobid, "part" )
 43 | 
 44 |         self.input = self.app.create_vertex_reader( inputfile )
 45 |         self.outputdir = outputdir
 46 |         self.partitions = {}
 47 | 
 48 |         self.mode = MODE_IDLE
 49 | 
 50 |         self.surveyor = nn.Socket( nn.RESPONDENT )
 51 |         self.surveyor.connect( "tcp://%s:8688"%(self.surveyorname) )
 52 |         # 6 seconds
 53 |         self.surveyor.set_int_option( nn.SOL_SOCKET, nn.RCVTIMEO, 50 )
 54 |         self.vertices = {}
 55 | 
 56 |         logger.info("Waiting to get vertex broker host from bonjour")
 57 | 
 58 |         self.ready = False
 59 | 
 60 |     def cb_broker_changed( self, broker_address ):
 61 |         logger.info("Received vertex broker address: %s"%(broker_address) )
 62 |         if self.broker_address != None:
 63 |             return
 64 | 
 65 |         self.broker_address = broker_address
 66 | 
 67 |         # vertex broker pub and sub
 68 |         self.vpubc = self.vpub.connect( "tcp://%s:8689"%(self.broker_address) )
 69 |         self.vsubc = self.vsub.connect( "tcp://%s:8690"%(self.broker_address) )
 70 | 
 71 |         # 2 seconds max
 72 |         self.vsub.set_int_option( nn.SOL_SOCKET, nn.RCVTIMEO, 2000 )
 73 | 
 74 |         logger.info("Vertex broker setup complete")
 75 | 
 76 |         for value in self.input.read():
 77 |             key, vertex = self.app.prepare( value )
 78 |             if key == None or vertex == None:
 79 |                 continue
 80 |             # Store vertex by id in dict with 2 lists for messages
 81 |             self.vertices[ key ] = (vertex, [], [])
 82 |             self.vsub.set_string_option( nn.SUB, nn.SUB_SUBSCRIBE, key )
 83 | 
 84 |         logger.info("Ready for processing")
 85 | 
 86 |         self.ready = True
 87 | 
 88 |     def module_manages_progress( self ):
 89 |         return True
 90 | 
 91 |     def result( self ):
 92 |         return "complete", {"inputfile":self.workdata["inputfile"]}
 93 | 
 94 |     def forward( self, id, msg ):
 95 |         # Forward to vertex broker
 96 |         self.vpub.send( id + " " + msg )
 97 | 
 98 |     def subscribe( self, topic ):
 99 |         self.vsub.set_string_option( nn.SUB, nn.SUB_SUBSCRIBE, topic)
100 | 
101 |     def unsubscribe( self, topic ):
102 |         self.vsub.set_string_option( nn.SUB, nn.SUB_UNSUBSCRIBE, topic)
103 | 
104 |     # This function performs actual work. The *state* is in the initiator daemon only,
105 |     # so a worker is directly responsive to whatever the surveyor tells the worker to do.
106 |     def work( self ):
107 |         if not self.ready:
108 |             return True
109 | 
110 |         surveyormsg = None
111 |         try:
112 |             surveyormsg = remap_utils.decode( self.surveyor.recv() )
113 |         except nn.NanoMsgAPIError as e:
114 |             return True
115 | 
116 |         if surveyormsg[0] == 'S':
117 |             # Shift messages
118 |             if self.mode != MODE_MSGS:
119 |                 self.mode = MODE_MSGS
120 |                 # We haven't done this in a previous step. Due to recovery, it might be 
121 |                 # used by the initiator to get others up to speed.
122 |                 for key, (vertex,messages,messagesNext) in self.vertices.items():
123 |                     self.vertices[ key ] = ( vertex, messagesNext, [] )
124 |             self.surveyor.send( "D" )
125 |             return True
126 | 
127 |         if surveyormsg[0] == 'H':
128 |             self.mode = MODE_HALT
129 |             logger.info("Halting core.")
130 |             self.surveyor.close()
131 |             return False
132 | 
133 |         if surveyormsg[0] == 'P':
134 |             if self.mode != MODE_PROCESS:
135 |                 # First time in this state, we need to grab all messages and
136 |                 # allocate them to vertex queue
137 |                 self.mode = MODE_PROCESS
138 |                 logger.info("Processing messages 1")
139 |                 while True:
140 |                     try:
141 |                         msg = self.vsub.recv()
142 |                         prefix, data = remap_utils.unpack_msg( msg )
143 |                         if prefix in self.vertices:
144 |                             # This vertex is indeed on this host. Add the message to its new msg list for next iteration
145 |                             vertex, messages, messagesNext = self.vertices[ prefix ]
146 |                             messagesNext.append( data )
147 |                     except nn.NanoMsgAPIError as e:
148 |                         logger.error( "No more messages available." )
149 |                         break
150 |             else:
151 |                 logger.info("Processing messages 2")
152 |                 # doing things twice does not make a difference. Second time around, just throw away all messages
153 |                 while True:
154 |                     try:
155 |                         msg = self.vsub.recv()
156 |                         print("Received and thrown away: ", msg)
157 |                     except nn.NanoMsgAPIError as e:
158 |                         logger.error( "No more messages available." )
159 |                         break
160 |             self.surveyor.send( "D" )
161 |             return True
162 | 
163 |         self.mode = MODE_RUN
164 |         self.superstep = int(surveyormsg)
165 |         mainHalt = True
166 | 
167 |         for key, (vertex,messages,messagesNext) in self.vertices.items():
168 |             vertex, halt = self.app.compute( self.forward, self.subscribe, self.unsubscribe, self.superstep, vertex, messages )
169 |     
170 |             if vertex != None:
171 |                 # Store the new vertex object in its place, maintaining the messagesNext list as we know it
172 |                 self.vertices[ key ] = (vertex,[],messagesNext)
173 |             if not halt:
174 |                 mainHalt = False
175 | 
176 |         if mainHalt:
177 |             self.surveyor.send( "H" )
178 |         else:
179 |             self.surveyor.send( "D" )
180 |         return True
181 | 
182 | 


--------------------------------------------------------------------------------
/daemons/core/remap.py:
--------------------------------------------------------------------------------
  1 | import os 
  2 | import errno
  3 | import json
  4 | 
  5 | from xml.etree.ElementTree import ElementTree
  6 | from html.parser import HTMLParser
  7 | 
  8 | class BaseReader(object):
  9 |     def __init__(self,filename,yieldkv):
 10 |         self.filename = filename
 11 |         self.filesize = os.stat(filename).st_size 
 12 |         self.complete = False
 13 |         self.yieldkv = yieldkv
 14 | 
 15 |     def isComplete( self ):
 16 |         return self.complete
 17 | 
 18 | # A class for reading in raw data to be processed.
 19 | # Used as input to the mapper
 20 | class TextFileReader(BaseReader):
 21 |     def __init__( self, filename, yieldkv=True ):
 22 |         BaseReader.__init__(self,filename,yieldkv)
 23 |         self.f = open(self.filename, 'r')
 24 |         self.pos = 0
 25 | 
 26 |     def read( self ):
 27 |         for line in self.f:
 28 |             self.pos = self.pos + len(line)
 29 |             if self.yieldkv:
 30 |                 yield self.filename, line
 31 |             else:
 32 |                 yield line
 33 |         self.complete = True
 34 | 
 35 |     def progress( self ):
 36 |         return float( float(self.pos) / self.filesize ) * 100
 37 | 
 38 |     def close( self ):
 39 |         self.f.close()
 40 | 
 41 | # A class for reading in raw data to be processed.
 42 | # Used as input to the mapper
 43 | class XMLFileReader(BaseReader):
 44 |     def __init__( self, filename, yieldkv=True ):
 45 |         BaseReader.__init__(self,filename, yieldkv)
 46 |         self.curelem = 0
 47 |         self.tree = ElementTree()
 48 |         self.tree.parse( filename )
 49 |         # ugh!
 50 |         self.numelems = len(list(self.tree.iter()))
 51 | 
 52 |     def read( self ):
 53 |         for elem in self.tree.iter():
 54 |             self.curelem = self.curelem + 1
 55 |             if self.yieldkv:
 56 |                 yield self.filename, elem.text
 57 |             else:
 58 |                 yield elem.text
 59 | 
 60 |         self.complete = True
 61 | 
 62 |     def progress( self ):
 63 |         return float( float(self.curelem) / self.numelems ) * 100
 64 | 
 65 |     def close( self ):
 66 |         pass
 67 | 
 68 | class HTMLFileReader(TextFileReader, HTMLParser):
 69 |     def __init__( self, filename, yieldkv=True ):
 70 |         TextFileReader.__init__(self,filename,yieldkv)
 71 |         HTMLParser.__init__(self)
 72 |         self.data = None
 73 | 
 74 |     def handle_starttag(self, tag, attrs):
 75 |         pass
 76 |     def handle_endtag(self, tag):
 77 |         pass
 78 |     def handle_data(self, data):
 79 |         self.data = data
 80 | 
 81 |     def read( self ):
 82 |         for line in self.f:
 83 |             self.pos = self.pos + len(line)
 84 |             self.feed(line)
 85 |             if self.yieldkv:
 86 |                 yield self.filename, self.data
 87 |             else:
 88 |                 yield self.data
 89 | 
 90 |         self.complete = True
 91 | 
 92 | # A partitioner creates intermediate data. It is responsible for accepting large volumes of
 93 | # key,value data. If the output file need not be sorted, it can write this to file directly.
 94 | # If sorting is necessary, it should keep things in memory, write it to disk when memory is full
 95 | # and create a new partition file for the same mapper
 96 | class BasePartitioner( object ):
 97 |     def __init__( self, outputdir, partition, mapperid, combiner, customkey ):
 98 |         self.outputdir = os.path.join( outputdir, partition )
 99 |         self.partition = partition
100 |         self.mapperid = mapperid
101 |         self.mem = {}
102 |         self.total_keys = 0
103 |         self.total_values = 0
104 |         self.sequence = 0
105 |         self.customkey = customkey
106 |         self.combiner = combiner
107 |         self.filename = os.path.join( self.outputdir, "part-%s-%05d"%( self.mapperid, self.sequence ) )
108 | 
109 |         try:
110 |             os.makedirs( os.path.dirname( self.filename ) )
111 |         except OSError as exc: # Python >2.5
112 |             if exc.errno == errno.EEXIST:
113 |                 pass
114 |             else: 
115 |                 raise
116 | 
117 |     # Statistics handling here allow future splitting up of further data
118 |     # if this partition overfloweth.
119 |     def store( self, k2, v2  ):
120 |         if k2 not in self.mem:
121 |             self.mem[ k2 ] = []
122 |             self.total_keys = self.total_keys + 1
123 | 
124 |         self.mem[ k2 ].append( v2 )
125 |         self.total_values = self.total_values + 1
126 | 
127 | class TextPartitioner( BasePartitioner ):
128 |     def __init__( self, outputdir, partition, mapperid, combiner=None, customkey=None ):
129 |         BasePartitioner.__init__(self, outputdir, partition, mapperid, combiner, customkey )
130 |         self.f = open(self.filename, 'w')
131 | 
132 |     def sort_flush_close( self ):
133 |         if self.customkey != None:
134 |             for k in sorted(self.mem,key=self.customkey):
135 |                 l = self.mem[k]
136 |                 if self.combiner != None:
137 |                     l = self.combiner(l)
138 |                 out = json.dumps( l )
139 |                 self.f.write( "%s,%s\n"%( k,out ) )
140 |         else:
141 |             for k in sorted(self.mem):
142 |                 l = self.mem[k]
143 |                 if self.combiner != None:
144 |                     l = self.combiner(l)
145 |                 out = json.dumps( l )
146 |                 self.f.write( "%s,%s\n"%( k,out ) )
147 |         self.f.close()
148 | 
149 | # The part file reader reads back in one single partition file.
150 | class TextPartFileReader(BaseReader):
151 |     def __init__( self, filename, yieldkv=True ):
152 |         BaseReader.__init__(self,filename,yieldkv)
153 |         self.f = open(filename, 'r')
154 |         self.pos = 0
155 | 
156 |     def read( self ):
157 |         for line in self.f:
158 |             key, data = line.split(',', 1)
159 |             l = json.loads( data )
160 |             yield (key, l, len(line))
161 |         self.complete = True
162 | 
163 |     def isComplete( self ):
164 |         return self.complete
165 | 
166 |     def progress( self ):
167 |         return float( float(self.pos) / self.filesize )
168 | 
169 |     def close( self ):
170 |         self.f.close()
171 | 
172 | # The reduce writer writes out the final result
173 | class BaseReduceWriter( object ):
174 |     def __init__(self, partdir, partition):
175 |         self.partdir = partdir
176 |         self.partition = partition
177 | 
178 | class TextReduceWriter( BaseReduceWriter ):
179 |     def __init__( self, partdir, partition ):
180 |         BaseReduceWriter.__init__( self, partdir, partition )
181 |         self.filename = os.path.join( self.partdir, "reduce_%s"%( partition ))
182 |         self.f = open(self.filename, 'w')
183 | 
184 |     def store( self, k3, v3  ):
185 |         self.f.write( "%s,%s\n"%( k3, v3 ) )
186 | 
187 |     def close( self ):
188 |         self.f.close()
189 | 
190 | 


--------------------------------------------------------------------------------
/daemons/initiator/base_module.py:
--------------------------------------------------------------------------------
  1 | import sys
  2 | import os
  3 | import logging
  4 | import time
  5 | import shutil
  6 | 
  7 | parent = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
  8 | sys.path.insert(0, parent)
  9 | 
 10 | import lib.remap_utils as remap_utils
 11 | import lib.remap_constants as remap_constants
 12 | from lib.remap_utils import RemapException
 13 | 
 14 | class BaseModule(object):
 15 |     def __init__( self, workdata, config ):
 16 |         self.workdata = workdata
 17 |         self.jobid = config["jobid"]
 18 |         self.remaproot = config["remaproot"]
 19 | 
 20 |         self.rootdir = os.path.abspath( self.remaproot )
 21 |         self.appsdir = os.path.join( self.rootdir, "app" )
 22 |         self.jobsdir = os.path.join( self.rootdir, "job" )
 23 |         self.datadir = os.path.join( self.rootdir, "data" )
 24 | 
 25 |         self.check_param( "app" )
 26 |         self.appname = workdata["app"]
 27 | 
 28 |         self.app_dir = os.path.join( self.appsdir, self.appname )
 29 |         self.job_dir = os.path.join( self.jobsdir, self.jobid )
 30 |         self.app_job_dir = os.path.join( self.job_dir, "app", self.appname )
 31 |         self.config_file = os.path.join( self.app_job_dir, "appconfig.json" )
 32 |         self.relconfig_file = os.path.join( self.appname, "appconfig.json" )
 33 | 
 34 |     def check_param( self, paramname ):
 35 |         if paramname not in self.workdata:
 36 |             raise RemapException("Job requires %s parameter"%( paramname ))
 37 | 
 38 |     def base_prepare( self, failIfJobsDirExists ):
 39 |         if os.path.isdir( self.job_dir ):
 40 |             if failIfJobsDirExists:
 41 |                 raise RemapException("Jobs directory already exists" )
 42 |         else:
 43 |             os.makedirs( self.job_dir )
 44 | 
 45 |             # Copying the app files
 46 |             try:
 47 |                 shutil.copytree(self.app_dir, self.app_job_dir)
 48 |                 # Directories are the same
 49 |             except shutil.Error as e:
 50 |                 raise RemapException("Directory not copied: %s"%(e))
 51 |             except OSError as e:
 52 |                 raise RemapException("Directory not copied: %s"%(e))
 53 | 
 54 |     def all_hands_on_deck( self ):
 55 |         return False
 56 | 
 57 |     def finish( self ):
 58 |         pass
 59 | 
 60 | class FileModule(BaseModule):
 61 |     def __init__( self, workdata, config ):
 62 |         BaseModule.__init__(self, workdata, config )
 63 | 
 64 |         self.type = workdata["type"]
 65 |         self.check_param( "inputdir" )
 66 |         inputdir = workdata["inputdir"]
 67 |         self.inputdir = os.path.join( self.datadir, inputdir.strip("/") )
 68 | 
 69 |         if not os.path.isdir( self.inputdir ):
 70 |             raise RemapException("Input dir does not exist: %s"%(self.inputdir))
 71 | 
 72 |         self.relinputdir = inputdir
 73 | 
 74 |     def prepare( self ):
 75 |         self.base_prepare( True )
 76 | 
 77 |         self.partitions_dir = os.path.join( self.job_dir, "part" )
 78 |         # In the normal mapper process, the output should not yet exist.
 79 | 
 80 |         os.makedirs( self.partitions_dir )
 81 | 
 82 |     def plan_jobs( self, planner ):
 83 |         return planner.task_per_file_in_dir( self.create_job_data, self.inputdir )
 84 | 
 85 | class DirModule(BaseModule):
 86 |     def __init__( self, workdata, config ):
 87 |         BaseModule.__init__(self, workdata, config )
 88 | 
 89 |         self.check_param( "outputdir" )
 90 |         outputdir = workdata["outputdir"]
 91 | 
 92 |         verifyDir = os.path.join( self.remaproot, "job", self.jobid )
 93 |         self.outputdir = os.path.join( self.datadir, outputdir.strip("/") )
 94 | 
 95 |         if not os.path.isdir( verifyDir ):
 96 |             raise RemapException("Input dir does not exist: %s"%(verifyDir))
 97 |         if os.path.isdir( self.outputdir ):
 98 |             raise RemapException("Output dir already exists: %s"%(self.outputdir))
 99 | 
100 |         self.reloutputdir = outputdir
101 | 
102 |     def prepare( self ):
103 |         self.base_prepare( False )
104 | 
105 |         os.makedirs( self.outputdir )
106 | 
107 |     def plan_jobs( self, planner ):
108 |         return planner.task_per_dir( self.create_job_data, os.path.join( self.remaproot, "job", self.jobid, "part" ) )
109 | 
110 |     def module_tracks_progress( self ):
111 |         return False
112 | 
113 | 


--------------------------------------------------------------------------------
/daemons/initiator/http_interface.py:
--------------------------------------------------------------------------------
  1 | import threading
  2 | import json
  3 | import os
  4 | import sys
  5 | 
  6 | from flask import Flask
  7 | from flask.ext.restful import Api, Resource, reqparse, fields, marshal
  8 | from flask import request
  9 | from flask import make_response
 10 | from flask import Response
 11 | 
 12 | parent = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
 13 | sys.path.insert(0, parent)
 14 | from lib.remap_utils import RemapException
 15 | 
 16 | app = Flask(__name__)
 17 | api = Api(app)
 18 | 
 19 | monitor = None
 20 | 
 21 | @app.errorhandler(400)
 22 | def not_found(error):
 23 |     app.logger.exception(error)
 24 |     resp = make_response(str(error), 400)
 25 |     return resp
 26 | 
 27 | @app.errorhandler(KeyError)
 28 | def key_error(error):
 29 |     app.logger.exception(error)
 30 |     return make_response(str(error), 400)
 31 | 
 32 | @app.errorhandler(RemapException)
 33 | def key_error(error):
 34 |     app.logger.exception(error)
 35 |     return make_response(str(error), 400)
 36 | 
 37 | 
 38 | class AppsListApi(Resource):
 39 |     def __init__(self):
 40 |         super(AppsListApi, self).__init__()
 41 | 
 42 |     def get(self):
 43 |         return monitor.list_apps()
 44 | 
 45 | class JobsListApi(Resource):
 46 |     def __init__(self):
 47 |         self.reqparse = reqparse.RequestParser()
 48 |         self.reqparse.add_argument('type', type = str, required = True, help = 'No worker type specified', location = 'json')
 49 |         self.reqparse.add_argument('app', type = str, required = True, help = 'No app name specified', location = 'json')
 50 |         self.reqparse.add_argument('priority', type = int, required = True, help = 'No priority specified', location = 'json')
 51 |         self.reqparse.add_argument('parallellism', type = int, required = True, help = 'No parallellism specified', location = 'json')
 52 |         super(JobsListApi, self).__init__()
 53 | 
 54 |     def get(self):
 55 |         return monitor.list_jobs()
 56 | 
 57 |     def post(self):
 58 |         args = self.reqparse.parse_args()
 59 |         try:
 60 |             results = monitor.start_job( request.json )
 61 |             # Created
 62 |             return results, 201
 63 |         except RemapException as re:
 64 |             return str(re), 400
 65 | 
 66 |     def delete(self):
 67 |         monitor.cancel_job()
 68 | 
 69 | class NodesListApi(Resource):
 70 |     def __init__(self):
 71 |         self.reqparse = reqparse.RequestParser()
 72 |         self.reqparse.add_argument('priority', type = int, required = True, help = 'No priority specified', location = 'json')
 73 |         super(NodesListApi, self).__init__()
 74 | 
 75 |     def get(self):
 76 |         return monitor.list_nodes()
 77 | 
 78 |     def post(self):
 79 |         args = self.reqparse.parse_args()
 80 |         monitor.refresh_nodes( args["priority"] )
 81 |         return "", 202
 82 | 
 83 |     def put(self):
 84 |         args = self.reqparse.parse_args()
 85 |         monitor.refresh_nodes( args["priority"] )
 86 |         return "", 200
 87 | 
 88 |     def delete(self):
 89 |         args = self.reqparse.parse_args()
 90 |         monitor.refresh_nodes( args["priority"] )
 91 |         return "", 200
 92 | 
 93 | class JobApi(Resource):
 94 |     def __init__(self):
 95 |         #self.reqparse = reqparse.RequestParser()
 96 |         #self.reqparse.add_argument('jobid', type=str, location='json')
 97 |         super(JobApi, self).__init__()
 98 | 
 99 |     def delete(self, id):
100 |         if not monitor.has_job( id ):
101 |             abort(404)
102 | 
103 | 
104 | api.add_resource(AppsListApi, '/api/v1.0/apps', endpoint = 'apps')
105 | api.add_resource(JobsListApi, '/api/v1.0/jobs', endpoint = 'jobs')
106 | api.add_resource(NodesListApi, '/api/v1.0/nodes', endpoint = 'nodes')
107 | api.add_resource(JobApi, '/api/v1.0/jobs/<string:id>', endpoint='job')
108 | 
109 | def run():
110 |     global app
111 |     app.run()
112 | 
113 | def start( initiator ):
114 |     global monitor
115 |     monitor = initiator
116 |     t = threading.Thread(target=run, args =())
117 |     t.daemon = True
118 |     t.start()
119 | 
120 | 


--------------------------------------------------------------------------------
/daemons/initiator/initiator.py:
--------------------------------------------------------------------------------
  1 | import sys
  2 | import os
  3 | import nanomsg as nn
  4 | import logging
  5 | import time
  6 | import shutil
  7 | from monitor import Monitor
  8 | from threading import Timer
  9 | from planner import JobPlanner
 10 | 
 11 | parent = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
 12 | sys.path.insert(0, parent)
 13 | 
 14 | from lib.bonjour_detect import BonjourResolver
 15 | import lib.remap_utils as remap_utils
 16 | import lib.remap_constants as remap_constants
 17 | from lib.remap_utils import RemapException
 18 | import http_interface
 19 | 
 20 | logging.basicConfig( level=logging.INFO ) 
 21 | 
 22 | # logger = logging.getLogger(__name__)
 23 | logger = logging.getLogger("Initiator")
 24 | 
 25 | class Initiator( Monitor ):
 26 |     def __init__(self, rootdir):
 27 |         Monitor.__init__(self, rootdir)
 28 |         self.remaproot = rootdir
 29 |         self.broker_address = "unknown"
 30 |         self.brokerChanged = False
 31 |         self.bsub = None
 32 |         self.bpub = None
 33 |         self.bonjour = BonjourResolver( "_remap._tcp", self.cb_broker_changed )
 34 |         self.bonjour.start()
 35 |         self.jobid = None
 36 |         self.refreshed = 0
 37 |         self.job_status = "waiting"
 38 |         self.rejectedtasks = {}
 39 |         self.completedtasks = {}
 40 |         self.tasks = {}
 41 |         self.allocatedtasks = {}
 42 |         self.jobtype = "not_started"
 43 |         self.priority = 0
 44 |         self.parallellism = 1
 45 |         self.manager = None
 46 |         self.last_check = time.time()
 47 | 
 48 |     def load_plugin(self, name):
 49 |         try:
 50 |             mod = __import__("module_%s" % name)
 51 |             return mod
 52 |         except ImportError as ie:
 53 |             raise RemapException( "No such worker type: %s"%( name ))
 54 | 
 55 |     # -------
 56 |     # Broker handling
 57 |     # -------
 58 | 
 59 |     def setup_broker( self ):
 60 |         self.brokerChanged = False
 61 |         if self.bsub != None:
 62 |             self.bsub.close()
 63 |             self.bsub = None
 64 | 
 65 |         self.apply_timeouts()
 66 | 
 67 |         if self.broker_address == "unknown":
 68 |             logger.error("Deferring broker setup as address is still unknown.")
 69 |             return
 70 | 
 71 |         self.bsub = nn.Socket( nn.SUB )
 72 |         self.bsub.connect( "tcp://%s:8687"%( self.broker_address ))
 73 |         self.bsub.set_string_option( nn.SUB, nn.SUB_SUBSCRIBE, "global")
 74 |         self.bsub.set_string_option( nn.SUB, nn.SUB_SUBSCRIBE, "local")
 75 |         self.bsub.set_string_option( nn.SUB, nn.SUB_SUBSCRIBE, "notlocal")
 76 |         if self.jobid != None:
 77 |             self.bsub.set_string_option( nn.SUB, nn.SUB_SUBSCRIBE, self.jobid)
 78 |         self.bsub.set_string_option( nn.SUB, nn.SUB_SUBSCRIBE, "tracker")
 79 |         self.apply_timeouts()
 80 | 
 81 |         self.bpub = nn.Socket( nn.PUB )
 82 |         self.bpub.connect( "tcp://%s:8686"%( self.broker_address ))
 83 | 
 84 |         logger.info("Broker setup complete")
 85 | 
 86 |     def apply_timeouts( self ):
 87 |         if self.bsub != None:
 88 |             rcv_timeout = 100
 89 |             self.bsub.set_int_option( nn.SOL_SOCKET, nn.RCVTIMEO, rcv_timeout )
 90 | 
 91 |     def cb_broker_changed( self, broker_address ):
 92 |         logger.info("Received new broker address: %s"%(broker_address) )
 93 |         self.broker_address = broker_address
 94 |         self.brokerChanged = True
 95 | 
 96 |     def forward_to_broker( self, msg ):
 97 |         if self.bpub != None:
 98 |             try:
 99 |                 self.bpub.send( msg )
100 |             except nn.NanoMsgAPIError as e:
101 |                 pass
102 | 
103 |     def process_broker_messages( self ):
104 |         if self.bsub == None:
105 |             # No broker is known yet.
106 |             if self.brokerChanged:
107 |                 logger.info("The broker configuration changed.")
108 |                 self.setup_broker()
109 |                 if self.bsub == None:
110 |                     logger.info("Failed broker setup.")
111 |                     return False
112 |             else:              
113 |                 return False
114 | 
115 |         try:
116 |             # Grab next msg from broker if any
117 |             msg = self.bsub.recv()
118 |             if msg != None and len(msg)>0:
119 |                 msgprefix, data = remap_utils.unpack_msg( msg )
120 |                 recipientid,msgtype,senderid = remap_utils.split_prefix(msgprefix)
121 |                 if msgtype == "complete":
122 |                     self.update_corecomplete( recipientid, senderid, data )
123 |                 if msgtype == "corestatus":
124 |                     self.update_corestatus( recipientid, senderid, data )
125 |                 if msgtype == "raisehand":
126 |                     self.update_hands( recipientid, senderid, data )
127 |                 return True
128 |             else:
129 |                 return False
130 |         except nn.NanoMsgAPIError as e:
131 |             return False
132 | 
133 |     # -------
134 |     # Messaging handling
135 |     # -------
136 | 
137 |     def update_corestatus( self, recipientid, senderid, data ):
138 |         if self.manager != None:
139 |             key = self.manager.get_work_key( data )
140 |             if key in self.allocatedtasks:
141 |                 job = self.allocatedtasks[ key ]
142 |                 job["ts_finish"] = time.time() + 7
143 | 
144 |     def update_corecomplete( self, recipientid, senderid, data ):
145 |         if self.manager != None:
146 |             key = self.manager.get_work_key( data )
147 |             logger.info( "Job %s completed."%( key ) )
148 |             if key in self.allocatedtasks:
149 |                 job = self.allocatedtasks[ key ]
150 |                 task = self.tasks[ key ]
151 |                 self.completedtasks[ key ] = task
152 |                 del self.tasks[ key ]
153 |                 del self.allocatedtasks[ key ]
154 |                 logger.info( "%d tasks left, %d tasks committed, %d tasks complete, %d tasks failed."%( len(self.tasks), len(self.allocatedtasks), len(self.completedtasks), len(self.rejectedtasks) ))
155 | 
156 |     def update_hands( self, recipientid, senderid, data ):
157 |         # "%s.raisehand.%s"%( senderid, self.nodeid ), {"cores":3,"interruptable":0}
158 |         if senderid in self.nodes:
159 |             self.nodes[ senderid ]["avail"] = data
160 |         else:
161 |             self.nodes[ senderid ] = {}
162 |             self.nodes[ senderid ]["avail"] = data
163 | 
164 |     # -------
165 |     # Job management
166 |     # -------
167 | 
168 |     def start_job( self, jobdata ):
169 |         if self.job_status != "waiting":
170 |             raise RemapException("A job is currently in progress on this monitor")
171 | 
172 |         if "type" not in jobdata:
173 |             raise RemapException("Must have job type specified" )
174 |         if "priority" not in jobdata:
175 |             raise RemapException("Must have priority specified" )
176 |         if "parallellism" not in jobdata:
177 |             raise RemapException("Must have parallellism specified" )
178 | 
179 |         self.job_status = "preparing"
180 |         self.prepare_start = time.time()
181 | 
182 |         self.jobtype = jobdata[ "type" ]
183 |         self.priority = jobdata[ "priority" ]
184 |         self.parallellism = jobdata[ "parallellism" ]
185 |         plugin = self.load_plugin( self.jobtype )
186 |         self.rejectedtasks = {}
187 |         self.completedtasks = {}
188 | 
189 |         if self.jobid != None:
190 |             self.bsub.set_string_option( nn.SUB, nn.SUB_UNSUBSCRIBE, self.jobid)
191 | 
192 |         if "jobid" in jobdata:
193 |             self.jobid = jobdata["jobid"]
194 |             del jobdata[ "jobid" ]
195 |         else:
196 |             self.jobid = remap_utils.unique_id()
197 | 
198 |         self.bsub.set_string_option( nn.SUB, nn.SUB_SUBSCRIBE, self.jobid)
199 | 
200 |         if "app" not in jobdata:
201 |             raise RemapException( "The name of the app must be provided" )
202 | 
203 |         if jobdata["app"] not in self.list_apps():
204 |             raise RemapException("No such application: %s"%(jobdata["app"]))
205 | 
206 |         config = {"jobid":self.jobid,"remaproot":self.remaproot}
207 | 
208 |         logger.info( "Started a new job: %s"%( self.jobid ))
209 |         self.manager = plugin.create_manager( jobdata, config )
210 | 
211 |         if ((time.time() - self.refreshed) > 60):
212 |             # Not refreshed > 60s
213 |             self.refresh_nodes( self.priority )
214 |             # Wait for a bunch of nodes to advertise themselves
215 |             r = Timer(1.0, self.resume, ())
216 |             r.start()
217 |         else:
218 |             self.resume()
219 | 
220 |         return {"jobid":self.jobid}
221 | 
222 |     def resume( self ):
223 |         self.manager.prepare()
224 | 
225 |         logger.info("Starting a %s job"%( self.jobtype ))
226 | 
227 |         self.planner = JobPlanner( self.manager.config_file )
228 |         self.tasks = self.manager.plan_jobs( self.planner )
229 | 
230 |         logger.info( "Found %d tasks to execute"%( len(self.tasks) ))
231 | 
232 |         numnodes, self.allocatedtasks = self.planner.distribute_jobs_over_nodes( self.tasks, {}, self.nodes, self.parallellism )
233 |         if len(self.allocatedtasks) == 0:
234 |             logger.error("No nodes found to distribute the tasks.")
235 |             self.job_status = "waiting"
236 |             return
237 | 
238 |         if self.manager.all_hands_on_deck():
239 |             if len(self.allocatedtasks) != len(self.tasks):
240 |                 raise RemapException("Not enough cores available. Have %d, need %d."%( len(self.allocatedtasks), len(self.tasks) ))
241 | 
242 |         logger.info( "%d new tasks distributed over %d nodes."%( len(self.allocatedtasks), numnodes ))
243 |         self.job_status = "executing"
244 |         self.outbound_work( self.allocatedtasks )
245 | 
246 |     # In outbound work we update our local "jobs" data with timestamps
247 |     # when they were sent out and send the task data to nodes.
248 |     def outbound_work( self, jobs ):
249 |         nodes = {}
250 |         for key, job in jobs.items():
251 |             nodeid = job["nodeid"]
252 |             if nodeid in nodes:
253 |                 nodes[ nodeid ]["cores"].append( job["jobdata"] )
254 |             else:
255 |                 tasks = {}
256 |                 tasklist = []
257 |                 job["ts_start"] = time.time()
258 |                 job["ts_finish"] = time.time() + 7
259 |                 tasklist.append( job["jobdata"] )
260 |                 tasks["cores"] = tasklist
261 |                 tasks["priority"] = self.priority
262 |                 nodes[ nodeid ] = tasks
263 | 
264 |         for nodeid, tasks in nodes.items():
265 |             msg = remap_utils.pack_msg( "%s.jobstart.%s"%(nodeid,self.jobid), tasks )
266 |             self.forward_to_broker( msg )
267 | 
268 |     def check_progress( self ):
269 |         if self.manager != None:
270 |             if self.manager.module_tracks_progress():
271 |                 if not self.manager.check_progress( len(self.tasks) ):
272 |                     self.manager.finish()
273 |                     self.manager = None
274 |                     self.job_status = "waiting"
275 |                     logger.info("Vertex job complete")
276 |             else:
277 |                 if time.time() - self.last_check <= 4:
278 |                     return
279 |                 newtime = time.time()
280 |                 kill_list = []
281 |                 for key, job in self.allocatedtasks.items():
282 |                     if newtime > job["ts_finish"]:
283 |                         # This job hasn't been updated, probably dead.
284 |                         jobdata = job["jobdata"]
285 |                         # Update tasks with an attempt + 1
286 |                         task = self.tasks[ key ]
287 |                         task["attempts" ] = task["attempts" ] + 1
288 |                         nodeid = job["nodeid"]
289 |                         logger.info( "Task %s failed on node %s. Reattempting elsewhere"%( key, nodeid ))
290 |                         if task["attempts" ] > 4:
291 |                             # 5 attempts so far. let's cancel it.
292 |                             logger.warn("Task %s failed 5 attempts. Cancelling file to reject."%( key ))
293 |                             del self.tasks[ key ]
294 |                             kill_list.append( key )
295 |                             self.rejectedtasks[ key ] = task
296 | 
297 |                 for key in kill_list:
298 |                     del self.allocatedtasks[key]
299 | 
300 |                 # Now also check if there are jobs that can be started
301 |                 if len(self.tasks) > 0:
302 |                     numnodes, new_allocations = self.planner.distribute_jobs_over_nodes( self.tasks, self.allocatedtasks, self.nodes, self.parallellism )
303 |                     if numnodes > 0:
304 |                         logger.info( "%d new tasks distributed over %d nodes"%( len(new_allocations), numnodes ))
305 |                         self.outbound_work( new_allocations )
306 |                         self.allocatedtasks.update( new_allocations )
307 | 
308 |                 if self.job_status == "executing" and len(self.tasks) == 0 and len(self.allocatedtasks) == 0:
309 |                     # finished all work
310 |                     self.job_status = "waiting"
311 |                     self.manager.finish()
312 |                     self.manager = None
313 |                     logger.info( "%d jobs left, %d jobs committed, %d jobs complete, %d jobs failed."%( len(self.tasks), len(self.allocatedtasks), len(self.completedtasks), len(self.rejectedtasks) ))
314 | 
315 |                 if self.job_status == "preparing" and time.time() - self.prepare_start > 5:
316 |                     # over 5 seconds? quit it
317 |                     self.job_status = "waiting"
318 |                     if self.manager != None:
319 |                         self.manager.finish()
320 |                         self.manager = None
321 |                     logger.info( "Cancelled job in progress." )
322 |         else:
323 |             # no manager.
324 |             if self.job_status != "waiting":
325 |                 self.job_status = "waiting"
326 |                 logger.info( "Resolving inconsistent state." )
327 | 
328 |         self.last_check = time.time()        
329 | 
330 |     # -------
331 |     # Node management
332 |     # -------
333 |     def refresh_nodes( self, priority ):
334 |         self.nodes = {}
335 |         self.priority = priority
336 |         self.refreshed = time.time()
337 |         msg = remap_utils.pack_msg( "local.showhands.%s"%(self.jobid), {"priority":self.priority} )
338 |         self.forward_to_broker( msg )
339 | 
340 | if __name__ == "__main__":
341 |     logger.info("Starting initiator daemon")
342 | 
343 |     if ( len(sys.argv) < 2 ):
344 |         print("Must supply one argument, the 'rootdir'")
345 |         sys.exit(-1)
346 | 
347 |     initiator = Initiator( sys.argv[1])
348 |     initiator.apply_timeouts()
349 | 
350 |     http_interface.start( initiator )
351 | 
352 |     # wait 200ms to find broker, establish local connection
353 |     time.sleep( 0.2 )
354 | 
355 |     logger.info("Initiator started")
356 | 
357 |     while( True ):
358 |         try:
359 |             while (initiator.process_broker_messages()):
360 |                 pass
361 |             if initiator.brokerChanged:
362 |                 initiator.setup_broker()
363 |         except RemapException as re:
364 |             logger.exception( re )
365 | 
366 |         initiator.check_progress()
367 | 
368 | 


--------------------------------------------------------------------------------
/daemons/initiator/module_mapper.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | import os
 3 | import logging
 4 | import time
 5 | from threading import Timer
 6 | 
 7 | parent = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
 8 | sys.path.insert(0, parent)
 9 | 
10 | import lib.remap_utils as remap_utils
11 | import lib.remap_constants as remap_constants
12 | from lib.remap_utils import RemapException
13 | from base_module import FileModule
14 | 
15 | logging.basicConfig( level=logging.INFO ) 
16 | 
17 | # logger = logging.getLogger(__name__)
18 | logger = logging.getLogger("Mapper")
19 | 
20 | def create_manager( workdata, config ):
21 |     return Mapper( workdata, config )
22 | 
23 | class Mapper(FileModule):
24 |     def __init__(self, workdata, config):
25 |         FileModule.__init__(self,workdata,config)
26 |         logger.info("Created a mapper job for %s"%( self.appname ))
27 | 
28 |     def create_job_data( self, filename, idx ):
29 |         inputfile = os.path.join( self.relinputdir, filename )
30 |         jobdata = { "inputfile": inputfile }
31 |         jobdata["jobid"] = self.jobid
32 |         jobdata["appdir"] = self.appname
33 |         jobdata["appconfig"] = self.relconfig_file
34 |         jobdata["type"] = "mapper"
35 |         jobdata["workid"] = "%05d"%( idx )
36 |         return inputfile, jobdata
37 | 
38 |     def get_work_key( self, data ):
39 |         return data["inputfile"]
40 | 
41 |     def module_tracks_progress( self ):
42 |         return False
43 | 
44 | 


--------------------------------------------------------------------------------
/daemons/initiator/module_reducer.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | import os
 3 | import logging
 4 | import time
 5 | from threading import Timer
 6 | 
 7 | parent = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
 8 | sys.path.insert(0, parent)
 9 | 
10 | import lib.remap_utils as remap_utils
11 | import lib.remap_constants as remap_constants
12 | from lib.remap_utils import RemapException
13 | from base_module import DirModule
14 | 
15 | logging.basicConfig( level=logging.INFO ) 
16 | 
17 | logger = logging.getLogger("Reducer")
18 | 
19 | def create_manager( workdata, config ):
20 |     return Reducer( workdata, config )
21 | 
22 | class Reducer(DirModule):
23 |     def __init__( self, workdata, config ):
24 |         DirModule.__init__(self, workdata, config )
25 | 
26 |     def create_job_data( self, dirname ):
27 |         job = { "partition": dirname }
28 |         job["jobid"] = self.jobid
29 |         job["partition"] = dirname
30 |         job["appdir"] = self.appname
31 |         job["appconfig"] = self.relconfig_file
32 |         job["type"] = "reducer"
33 |         job["outputdir"] = self.reloutputdir
34 |         return dirname, job
35 | 
36 |     def get_work_key( self, data ):
37 |         return data["partition"]
38 | 
39 | 


--------------------------------------------------------------------------------
/daemons/initiator/module_vertex.py:
--------------------------------------------------------------------------------
  1 | import sys
  2 | import os
  3 | import logging
  4 | import time
  5 | from threading import Timer
  6 | import nanomsg as nn
  7 | import socket
  8 | 
  9 | parent = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
 10 | sys.path.insert(0, parent)
 11 | 
 12 | import lib.remap_utils as remap_utils
 13 | import lib.remap_constants as remap_constants
 14 | from lib.remap_utils import RemapException
 15 | from base_module import FileModule
 16 | 
 17 | logging.basicConfig( level=logging.INFO ) 
 18 | 
 19 | # logger = logging.getLogger(__name__)
 20 | logger = logging.getLogger("Vertex")
 21 | 
 22 | def create_manager( workdata, config ):
 23 |     return Vertex( workdata, config )
 24 | 
 25 | MODE_SHIFT = 1
 26 | MODE_COMPUTE = 2
 27 | MODE_PROCESS = 3
 28 | MODE_RECOVERY = 4
 29 | MODE_HALT = 5
 30 | 
 31 | class Vertex(FileModule):
 32 |     def __init__(self, workdata, config):
 33 |         FileModule.__init__(self,workdata,config)
 34 |         self.surveyor = nn.Socket( nn.SURVEYOR )
 35 |         self.surveyor.bind( "tcp://0.0.0.0:8688" )
 36 | 
 37 |         # 10 seconds max
 38 |         self.surveyor.set_int_option( nn.SURVEYOR, nn.SURVEYOR_DEADLINE, 10000 )
 39 |         self.superstep = 0
 40 |         self.mode = MODE_SHIFT
 41 |         self.first = True
 42 | 
 43 |     def create_job_data( self, filename, idx ):
 44 |         inputfile = os.path.join( self.relinputdir, filename )
 45 |         jobdata = { "inputfile": inputfile }
 46 |         jobdata["hostname"] = socket.gethostname()
 47 |         jobdata["jobid"] = self.jobid
 48 |         jobdata["appdir"] = self.appname
 49 |         jobdata["appconfig"] = self.relconfig_file
 50 |         jobdata["type"] = "vertex"
 51 |         jobdata["workid"] = "%05d"%( idx )
 52 |         return inputfile, jobdata
 53 | 
 54 |     def get_work_key( self, data ):
 55 |         return data["inputfile"]
 56 | 
 57 |     def module_tracks_progress( self ):
 58 |         return True
 59 | 
 60 |     def all_hands_on_deck( self ):
 61 |         return True
 62 | 
 63 |     def finish( self ):
 64 |         logger.info("Finished vertex job")
 65 |         self.surveyor.close()
 66 |         self.surveyor = None
 67 | 
 68 |     def check_progress( self, numtasks ):
 69 |         if self.first:
 70 |             self.first = False
 71 |             # First time, wait 5 seconds for at least one worker to connect
 72 |             # BUG in surveyor protocol nanomsg
 73 |             time.sleep(5.0)
 74 |             # first time through, return, because num tasks would have been updated.
 75 |             # if not, it goes through to the other methods, zero tasks == no users, no response and exit.
 76 |             return True
 77 | 
 78 |         # wait 0.2 second for messages to finish propagating
 79 |         time.sleep(0.2)
 80 | 
 81 |         if self.mode == MODE_SHIFT:
 82 |             logger.info("Shifting messages")
 83 |             return self.process_shift_mode( numtasks )
 84 |         elif self.mode == MODE_COMPUTE:
 85 |             logger.info("Computing mode")
 86 |             return self.process_compute_mode( numtasks )
 87 |         elif self.mode == MODE_PROCESS:
 88 |             logger.info("Processing messages at each node")
 89 |             return self.process_process_mode( numtasks )
 90 |         elif self.mode == MODE_RECOVERY:   
 91 |             logger.info("Recovery mode")
 92 |             # TODO: implementation.
 93 |             return False
 94 |         elif self.mode == MODE_HALT:   
 95 |             logger.info("Halt mode")
 96 |             return self.process_halt_mode( numtasks )
 97 | 
 98 |     def process_shift_mode( self, numtasks ):
 99 |         self.surveyor.send( "S" )
100 |         respondents = 0
101 |         try:
102 |             while( True ):
103 |                 msg = remap_utils.decode( self.surveyor.recv() )
104 |                 respondents = respondents + 1
105 |                 if respondents == numtasks:
106 |                     logger.info("All respondents replied")
107 |                     self.mode = MODE_COMPUTE
108 |                     break
109 |         except nn.NanoMsgAPIError as nme:
110 |             logger.error("No vertex nodes replied")
111 |             print(nme)
112 |             self.mode = MODE_RECOVERY
113 |         return True
114 | 
115 |     def process_compute_mode( self, numtasks ):
116 |         # Send superstep
117 |         self.surveyor.send( "%d"%(self.superstep) )
118 |         halt = True
119 |         respondents = 0
120 |         try:
121 |             while( True ):
122 |                 msg = remap_utils.decode( self.surveyor.recv() )
123 |                 if msg != "H":
124 |                     halt = False
125 |                 respondents = respondents + 1
126 |                 if respondents == numtasks:
127 |                     # all replied
128 |                     logger.info("All respondents replied")
129 |                     self.mode = MODE_PROCESS
130 |                     break
131 |         except nn.NanoMsgAPIError as nme:
132 |             logger.error("No vertex nodes connected")
133 |             print(nme)
134 |             self.mode = MODE_RECOVERY
135 |             return True
136 | 
137 |         if halt:
138 |             self.mode = MODE_HALT
139 |         else:
140 |             self.superstep = self.superstep + 1
141 |         return True
142 | 
143 |     def process_process_mode( self, numtasks ):
144 |         self.surveyor.send( "P" )
145 |         respondents = 0
146 |         try:
147 |             while( True ):
148 |                 msg = remap_utils.decode( self.surveyor.recv() )
149 |                 respondents = respondents + 1
150 |                 if respondents == numtasks:
151 |                     logger.info("All respondents replied")
152 |                     self.mode = MODE_SHIFT
153 |                     break
154 |         except nn.NanoMsgAPIError as nme:
155 |             logger.error("No vertex nodes replied")
156 |             print(nme)
157 |             self.mode = MODE_RECOVERY
158 |         return True
159 | 
160 |     def process_halt_mode( self, numtasks ):
161 |         self.surveyor.send( "H" )
162 |         respondents = 0
163 |         try:
164 |             while( True ):
165 |                 msg = remap_utils.decode( self.surveyor.recv() )
166 |                 respondents = respondents + 1
167 |                 if respondents == numtasks:
168 |                     logger.info("All respondents replied")
169 |                     break
170 |         except nn.NanoMsgAPIError as nme:
171 |             print(nme)
172 |         return False
173 | 
174 | 


--------------------------------------------------------------------------------
/daemons/initiator/monitor.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import sys
 3 | import logging
 4 | import time
 5 | 
 6 | parent = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
 7 | sys.path.insert(0, parent)
 8 | 
 9 | import lib.remap_utils as remap_utils
10 | import lib.remap_constants as remap_constants
11 | from lib.remap_utils import RemapException
12 | 
13 | logging.basicConfig( level=logging.INFO ) 
14 | logger = logging.getLogger("Monitor")
15 | 
16 | class Monitor(object):
17 |     def __init__(self, rootdir):
18 |         self.rootdir = os.path.abspath( rootdir )
19 |         self.appsdir = os.path.join( self.rootdir, "app" )
20 |         self.jobsdir = os.path.join( self.rootdir, "job" )
21 |         self.datadir = os.path.join( self.rootdir, "data" )
22 |         self.nodes = {}
23 | 
24 |     def list_apps( self ):
25 |         apps = []
26 |         for root, dirs, files in os.walk( self.appsdir ):
27 |             for f in files:
28 |                 if f == "appconfig.json":
29 |                     apps.append( os.path.relpath(root, self.appsdir) )
30 |         return apps
31 | 
32 |     def list_all_jobs( self ):
33 |         jobs = []
34 |         for root in os.listdir( self.jobsdir ):
35 |             jobs.append( root )
36 |         return jobs
37 | 
38 |     def list_jobs( self ):
39 |         return {"status":self.job_status,"jobid":self.jobid,"type":self.jobtype,"priority":self.priority,"parallellism":self.parallellism,
40 |             "tasks":len(self.tasks), "allocated":len(self.allocatedtasks), "completed":len(self.completedtasks), "rejected":len(self.rejectedtasks)}
41 | 
42 |     def list_nodes( self ):
43 |         return self.nodes
44 | 
45 |     def cancel_job( self ):
46 |         pass
47 | 
48 | 


--------------------------------------------------------------------------------
/daemons/initiator/planner.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import sys
  3 | import logging
  4 | 
  5 | import json
  6 | import math
  7 | import time
  8 | 
  9 | parent = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
 10 | sys.path.insert(0, parent)
 11 | 
 12 | from lib.bonjour_detect import BonjourResolver
 13 | import lib.remap_utils as remap_utils
 14 | import lib.remap_constants as remap_constants
 15 | from lib.remap_utils import RemapException
 16 | 
 17 | logging.basicConfig( level=logging.INFO ) 
 18 | 
 19 | # logger = logging.getLogger(__name__)
 20 | logger = logging.getLogger("Initiator")
 21 | 
 22 | class JobPlanner(object):
 23 |     def __init__(self, config_file):
 24 |         f = open(config_file)
 25 |         data = f.read()
 26 |         self.config = json.loads(data)
 27 | 
 28 |     def task_per_file_in_dir( self, job_def_cb, input_dir ):
 29 |         tasks = {}
 30 |         files = [f for f in os.listdir( input_dir ) if os.path.isfile(os.path.join(input_dir,f))]
 31 |         logger.info("Found %d files to process in %s"%( len(files), input_dir  ))
 32 | 
 33 |         ctr = 0
 34 |         for f in files:
 35 |             key, jobdata = job_def_cb( f, ctr )
 36 |             tasks[ key ] = { "attempts": 0, "jobdata": jobdata }
 37 |             ctr = ctr + 1
 38 | 
 39 |         return tasks
 40 | 
 41 |     def task_per_dir( self, job_def_cb, input_dir ):
 42 |         # First, let's just generate a list of jobs
 43 |         tasks = {}
 44 | 
 45 |         # Grab all input files
 46 |         dirs = [d for d in os.listdir( input_dir ) if os.path.isdir(os.path.join(input_dir,d))]
 47 |         logger.info("Found %d partitions to process in %s"%( len(dirs), input_dir ))
 48 | 
 49 |         # ( Here app config probably tells us how to split files. Not doing that for now. Just process whole thing )
 50 |         for d in dirs:
 51 |             key, jobdata = job_def_cb( d )
 52 |             tasks[ key ] = { "attempts": 0, "jobdata": jobdata }
 53 | 
 54 |         return tasks
 55 | 
 56 |     def distribute_jobs_over_nodes( self, availjobs, allocatedjobs, nodes, parallellism ):
 57 |         # Making a copy first, it gets modified
 58 |         availjobs = dict(availjobs)
 59 |         corejobs = {}
 60 |         committed = {}
 61 | 
 62 |         for inputfile, job in allocatedjobs.items():
 63 |             nodeid = job["nodeid"]
 64 |             if nodeid in committed:
 65 |                 committed[ nodeid ] = committed[ nodeid ]+1
 66 |             else:
 67 |                 committed[ nodeid ] = 1                
 68 | 
 69 |         # Figure out how to distribute mappers.
 70 |         numcores = 0
 71 |         numint = 0
 72 |         for key in nodes:
 73 |             numcores = numcores + nodes[key]["avail"]["free"]
 74 |             numint = numint + nodes[key]["avail"]["interruptable"]
 75 | 
 76 |         parallels = min( numcores, parallellism )
 77 | 
 78 |         added = True
 79 |         while len(availjobs) > 0 and added:
 80 |             i = 0
 81 |             added = False
 82 |             for key in nodes:
 83 |                 if key not in committed:
 84 |                     committed[ key ] = 0
 85 | 
 86 |                 avail = nodes[key]["avail"]["free"] - committed[key]
 87 |                 if avail <= 0:
 88 |                     break
 89 |                 if i == parallels:
 90 |                     break
 91 |                 if len(availjobs)==0:
 92 |                     break
 93 | 
 94 |                 for j in range( 0, avail ):
 95 |                     if i == parallels:
 96 |                        break
 97 |                     i = i + 1
 98 | 
 99 |                     if len(availjobs)>0:
100 |                         workfile, data = availjobs.popitem()
101 |                         if workfile in allocatedjobs:
102 |                             continue
103 |                         corejobs[ workfile ] = {}
104 |                         corejobs[ workfile ]["jobdata"] = data["jobdata"]
105 |                         corejobs[ workfile ]["nodeid"] = key
106 |                         corejobs[ workfile ]["ts_start"] = time.time()
107 |                         corejobs[ workfile ]["ts_finish"] = time.time() + 7
108 |                         committed[ key ] = committed[ key ] + 1
109 |                         added = True
110 |                     else:
111 |                         break
112 | 
113 |         return len(committed), corejobs
114 | 
115 | 


--------------------------------------------------------------------------------
/daemons/lib/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gtoonstra/remap/cfd9a413858c54e7f1049c546d39bd0edccded0c/daemons/lib/__init__.py


--------------------------------------------------------------------------------
/daemons/lib/bonjour_detect.py:
--------------------------------------------------------------------------------
 1 | import select
 2 | import sys
 3 | import pybonjour
 4 | import logging
 5 | import pybonjour
 6 | import threading
 7 | 
 8 | logger = logging.getLogger(__name__)
 9 | 
10 | class BonjourResolver( object ):
11 |     def __init__( self, regtype, callback ):
12 |         self.t = threading.Thread(target=self.run, args=())
13 |         self.t.daemon = True
14 |         self.regtype = regtype
15 |         self.timeout = 5
16 |         self.callback = callback
17 | 
18 |     def start( self ):
19 |         self.resolved = []
20 |         self.t.start()
21 | 
22 |     def resolve_callback(self, sdRef, flags, interfaceIndex, errorCode, fullname,
23 |                          hosttarget, port, txtRecord):
24 |         if errorCode == pybonjour.kDNSServiceErr_NoError:
25 |             logger.info( 'Resolved service %s at %s'%( fullname, hosttarget ))
26 |             self.resolved.append(True)
27 |             self.hosttarget = hosttarget
28 |             self.port = port
29 |             hosttarget = hosttarget.rstrip('.')
30 |             self.callback( hosttarget )
31 |         else:
32 |             logger.info( errorCode )
33 |             return
34 | 
35 |     def browse_callback(self,sdRef, flags, interfaceIndex, errorCode, serviceName,
36 |                         regtype, replyDomain):
37 |         if errorCode != pybonjour.kDNSServiceErr_NoError:
38 |             return
39 | 
40 |         if not (flags & pybonjour.kDNSServiceFlagsAdd):
41 |             logger.info( 'The service entry was removed' )
42 |             self.callback( "unknown" )
43 |             return
44 | 
45 |         logger.info( 'Another service identified, resolving' )
46 | 
47 |         resolve_sdRef = pybonjour.DNSServiceResolve(0,
48 |                                                     interfaceIndex,
49 |                                                     serviceName,
50 |                                                     regtype,
51 |                                                     replyDomain,
52 |                                                     self.resolve_callback)
53 | 
54 |         try:
55 |             while not self.resolved:
56 |                 ready = select.select([resolve_sdRef], [], [], self.timeout)
57 |                 if resolve_sdRef not in ready[0]:
58 |                     logger.info( 'Resolution timed out' )
59 |                     break
60 |                 pybonjour.DNSServiceProcessResult(resolve_sdRef)
61 |             else:
62 |                 self.resolved.pop()
63 |         finally:
64 |             resolve_sdRef.close()
65 | 
66 |     def run( self ):    
67 |         browse_sdRef = pybonjour.DNSServiceBrowse(regtype = self.regtype,
68 |                                                   callBack = self.browse_callback)
69 | 
70 |         try:
71 |             try:
72 |                 while True:
73 |                     ready = select.select([browse_sdRef], [], [])
74 |                     if browse_sdRef in ready[0]:
75 |                         pybonjour.DNSServiceProcessResult(browse_sdRef)
76 |             except KeyboardInterrupt:
77 |                 pass
78 |         finally:
79 |             browse_sdRef.close()
80 | 
81 | 


--------------------------------------------------------------------------------
/daemons/lib/remap_constants.py:
--------------------------------------------------------------------------------
1 | HEALTH_CHECK_DELAY = 5
2 | THR_STATUS_DELAY = 5
3 | MAX_STATUS_DELAY = 12
4 | 
5 | 


--------------------------------------------------------------------------------
/daemons/lib/remap_utils.py:
--------------------------------------------------------------------------------
 1 | # library utility functions
 2 | import json
 3 | import uuid
 4 | import logging
 5 | 
 6 | logger = logging.getLogger(__name__)
 7 | 
 8 | class RemapException(Exception):
 9 |     pass
10 | 
11 | def unpack_msg( msg ):
12 |     try:
13 |         msgtype, data = msg.decode("utf-8").split(" ", 1 )
14 |         try:
15 |             data = json.loads( data )
16 |             return msgtype, data
17 |         except ValueError as ve:
18 |             raise RemapException( "Invalid json data payload" ) from ve
19 |     except ValueError as ve:
20 |         raise RemapException( "Invalid message" ) from ve
21 | 
22 | def decode( msg ):
23 |     return msg.decode("utf-8")
24 | 
25 | def pack_msg( prefix, data ):
26 |     msg = "%s %s"%( prefix, json.dumps( data ))
27 |     return msg
28 | 
29 | def unique_id():
30 |     return str(uuid.uuid1())
31 | 
32 | def node_id():
33 |     return str(uuid.getnode())
34 | 
35 | def core_id( nodeid, pid ):
36 |     return "%s_%d"%( nodeid, pid )
37 | 
38 | def extract_node_id( coreid ):
39 |     return coreid.split("_")[0]
40 | 
41 | def safe_get( data, key ):
42 |     if key not in data:
43 |         raise RemapException("Required key %s not found in data."%( key ))
44 |     return data[ key ]
45 | 
46 | def split_prefix( prefix ):
47 |     try:
48 |         r,t,s = prefix.split(".")
49 |         return r,t,s
50 |     except ValueError as ve:
51 |         raise RemapException( "Invalid prefix %s"%( prefix ) ) from ve
52 | 
53 | 
54 | 


--------------------------------------------------------------------------------
/daemons/node/node_daemon.py:
--------------------------------------------------------------------------------
  1 | import sys
  2 | import os
  3 | import nanomsg as nn
  4 | from nanomsg import wrapper as nn_wrapper
  5 | import logging
  6 | import time
  7 | 
  8 | import sys
  9 | from node_hardware import NodeHardware
 10 | 
 11 | parent = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
 12 | sys.path.insert(0, parent)
 13 | 
 14 | from lib.bonjour_detect import BonjourResolver
 15 | import lib.remap_utils as remap_utils
 16 | import lib.remap_constants as remap_constants
 17 | from lib.remap_utils import RemapException
 18 | 
 19 | # A node daemon connects to the broker and exists to allow
 20 | # cores to work independently. The idea is that the node daemon is a central
 21 | # contact point for concerns like machine health, hardware, proximity, 
 22 | # planned up/down time and that node routes messages from the bus to each core,
 23 | # which should reduce some potential waste if each core process has its own 
 24 | # code to discard messages, etc.
 25 | #
 26 | # The node daemon, together with other processes, is expected to be allocated one core
 27 | # of the machine, leaving (num_cores-1) free for core processes.
 28 | # 
 29 | 
 30 | logging.basicConfig( level=logging.INFO ) 
 31 | 
 32 | # logger = logging.getLogger(__name__)
 33 | logger = logging.getLogger("NodeDaemon")
 34 | 
 35 | class NodeDaemon( object ):
 36 |     def __init__(self, remaproot):
 37 |         self.remaproot = remaproot
 38 |         self.cores = {}
 39 |         self.broker_address = "unknown"
 40 |         self.brokerChanged = False
 41 |         self.bsub = None
 42 |         self.bpub = None
 43 |         self.tot_m_rcv = 0
 44 |         self.hw = NodeHardware()
 45 |         self.nodeid = remap_utils.node_id()
 46 |         self.bonjour = BonjourResolver( "_remap._tcp", self.cb_broker_changed )
 47 |         self.bonjour.start()
 48 |         self.coresChanged = False
 49 | 
 50 |     # Create a bi-directional communication channel, where the node daemon 
 51 |     # 'shouts' in the room even to contact a single core, but the core only
 52 |     # sends written messages back to the shouter with the megaphone.
 53 |     # (embarassing protocol).
 54 |     def setup_bus( self ):
 55 |         self.lsub = nn.Socket( nn.SUB )
 56 |         self.lsub.bind("ipc:///tmp/node_pub.ipc")
 57 |         self.lsub.set_string_option( nn.SUB, nn.SUB_SUBSCRIBE, "" )
 58 |         self.lpub = nn.Socket( nn.PUB )
 59 |         self.lpub.bind("ipc:///tmp/node_sub.ipc")
 60 | 
 61 |     def apply_timeouts( self ):
 62 |         if self.bsub == None:
 63 |             rcv_timeout = 100
 64 |             self.lsub.set_int_option( nn.SOL_SOCKET, nn.RCVTIMEO, rcv_timeout )     
 65 |         else:
 66 |             rcv_timeout = 100
 67 |             self.bsub.set_int_option( nn.SOL_SOCKET, nn.RCVTIMEO, rcv_timeout )
 68 |             rcv_timeout = 0
 69 |             self.lsub.set_int_option( nn.SOL_SOCKET, nn.RCVTIMEO, rcv_timeout )     
 70 | 
 71 |     def cb_broker_changed( self, broker_address ):
 72 |         logger.info("Received new broker address: %s"%(broker_address) )
 73 |         self.broker_address = broker_address
 74 |         self.brokerChanged = True
 75 | 
 76 |     def setup_broker( self ):
 77 |         self.brokerChanged = False
 78 |         if self.bsub != None:
 79 |             self.bsub.close()
 80 |             self.bsub = None
 81 | 
 82 |         self.apply_timeouts()
 83 | 
 84 |         if self.broker_address == "unknown":
 85 |             logger.error("Deferring broker setup as address is still unknown.")
 86 |             return
 87 | 
 88 |         self.bsub = nn.Socket( nn.SUB )
 89 |         self.bsub.connect( "tcp://%s:8687"%( self.broker_address ))
 90 |         self.bsub.set_string_option( nn.SUB, nn.SUB_SUBSCRIBE, "global")
 91 |         self.bsub.set_string_option( nn.SUB, nn.SUB_SUBSCRIBE, "local")
 92 |         self.bsub.set_string_option( nn.SUB, nn.SUB_SUBSCRIBE, "notlocal")
 93 |         self.bsub.set_string_option( nn.SUB, nn.SUB_SUBSCRIBE, self.nodeid)
 94 |         self.apply_timeouts()
 95 | 
 96 |         self.bpub = nn.Socket( nn.PUB )
 97 |         self.bpub.connect( "tcp://%s:8686"%( self.broker_address ))
 98 | 
 99 |         logger.info("Broker setup complete")
100 | 
101 |     def process_bus_messages( self ):
102 |         try:
103 |             msg = self.lsub.recv()
104 |             msgprefix, data = remap_utils.unpack_msg( msg )
105 | 
106 |             if len(msgprefix) == 0:
107 |                 return True
108 | 
109 |             recipientid,msgtype,senderid = remap_utils.split_prefix(msgprefix)
110 | 
111 |             if msgtype[0] == '_':
112 |                 # node message
113 |                 self.process_core_message( msgtype, senderid, data )
114 |             elif msgtype == "corestatus":
115 |                 if senderid in self.cores:                
116 |                     coredata = self.cores[ senderid ]
117 |                     coredata["ts_last_seen"] = time.time()
118 |                     coredata["progress"] = data["progress"]
119 |                     logger.info("Core %s progressed %d"%( senderid, coredata["progress"] ))
120 |                     self.forward_to_broker( msg )
121 |             elif msgtype == "complete":
122 |                 if senderid in self.cores:                
123 |                     coredata = self.cores[ senderid ]
124 |                     logger.info("Core %s completed the job"%( senderid ))
125 |                     self.forward_to_broker( msg )
126 |                     del self.cores[ senderid ]
127 |                     self.coresChanged = True
128 |             else:
129 |                 # forward to broker instead
130 |                 self.forward_to_broker( msg )             
131 |             return True
132 |         except nn.NanoMsgAPIError as e:
133 |             return False
134 | 
135 |     def process_core_message( self, msgtype, senderid, data ):
136 |         if msgtype == "_hello":
137 |             self.process_hello( data )
138 |         if msgtype == "_todo":
139 |             self.process_todo( senderid, data )
140 |         if msgtype == "_status":
141 |             self.process_status( senderid, data )
142 |         if msgtype == "_sub":
143 |             self.bsub.set_string_option( nn.SUB, nn.SUB_SUBSCRIBE, data["prefix"])
144 |         if msgtype == "_unsub":
145 |             self.bsub.set_string_option( nn.SUB, nn.SUB_UNSUBSCRIBE, data["prefix"])
146 | 
147 |     def forward_to_broker( self, msg ):
148 |         if self.bpub != None:
149 |             try:
150 |                 self.bpub.send( msg )
151 |             except nn.NanoMsgAPIError as e:
152 |                 pass
153 | 
154 |     # This processes a message where a core is announcing itself and wants to 
155 |     # get a core id to start existing on the network    
156 |     def process_hello( self, data ):
157 |         msgid = remap_utils.safe_get(data, "msgid")
158 |         pid = remap_utils.safe_get(data, "pid")
159 |         priority = remap_utils.safe_get( data, "priority" )
160 |         coreid = remap_utils.core_id( self.nodeid, pid )
161 |         self.cores[ coreid ] = {"coreid":coreid,"ts_last_seen":time.time(),"progress":-1,"pid":pid,"priority":priority}
162 |         msg = remap_utils.pack_msg( "%s._hey.%s"%(coreid, self.nodeid), {"msgid":msgid,"coreid":coreid} )
163 |         logger.info( "A core registered %s"%( coreid ))
164 |         self.lpub.send( msg )
165 | 
166 |     def process_todo( self, senderid, data ):
167 |         coredata = self.cores[ senderid ]
168 |         work = self.hw.grab_work_item()
169 |         if work != None:
170 |             msg = remap_utils.pack_msg( "%s._work.%s"%(senderid, self.nodeid), work )
171 |             logger.info( "A core was given some work to do: %s"%( senderid ))
172 |             self.lpub.send( msg )
173 | 
174 |     def process_status( self, senderid, data ):
175 |         coredata = self.cores[ senderid ]
176 |         coredata["ts_last_seen"] = time.time()
177 | 
178 |     def process_broker_messages( self ):
179 |         if self.bsub == None:
180 |             # No broker is known yet.
181 |             if self.brokerChanged:
182 |                 logger.info("The broker configuration changed.")
183 |                 self.setup_broker()
184 |                 if self.bsub == None:
185 |                     logger.info("Failed broker setup.")
186 |                     return False
187 |             else:              
188 |                 return False
189 | 
190 |         try:
191 |             # Grab next msg from broker if any
192 |             msg = self.bsub.recv()
193 |             self.tot_m_rcv = self.tot_m_rcv + 1
194 |             if msg == None or len(msg)==0:
195 |                 return False
196 | 
197 |             msgprefix, data = remap_utils.unpack_msg( msg )
198 |             recipientid,msgtype,senderid = remap_utils.split_prefix(msgprefix)
199 |             if msgtype == "showhands":
200 |                 self.handle_showhands( recipientid, senderid, data )
201 |             elif msgtype == "jobstart":
202 |                 #if recipientid == self.nodeid:
203 |                 self.handle_jobstart( recipientid, senderid, data )
204 |             else:
205 |                 # Forward to all cores for their processing.
206 |                 self.lpub.send(msg)
207 |             return True
208 |         except nn.NanoMsgAPIError as e:
209 |             return False
210 | 
211 |     def purge_inactive_cores( self, new_ts ):
212 |         kill_list = []
213 |         for key, coredata in self.cores.items():
214 |             last_ts = coredata["ts_last_seen"]
215 |             if (new_ts - last_ts) > remap_constants.THR_STATUS_DELAY:
216 |                 logger.info("Core %s missed a status report."%( key ))
217 |             if (new_ts - last_ts) > remap_constants.MAX_STATUS_DELAY:
218 |                 logger.info("Core %s is considered dead."%( key ))
219 |                 kill_list.append( key )
220 |                 # Add code here to kill core just in case.
221 | 
222 |         for key in kill_list:                
223 |             del self.cores[ key ]
224 | 
225 |     def maybe_send_status( self ):
226 |         if self.coresChanged:
227 |             self.handle_showhands( "tracker", "unknown", { "priority":0 } )
228 |             self.coresChanged = False
229 | 
230 |     # Request re-registration of existing core processes currently on the bus
231 |     # allows failover restart of this node daemon.
232 |     def req_registration( self ):
233 |         msg = remap_utils.pack_msg( "node._plzreg.%s"%(self.nodeid), {} )
234 |         self.lpub.send( msg )
235 | 
236 |     # Some app initiator requests processing capacity
237 |     def handle_showhands( self, recipientid, senderid, data ):
238 |         avail, interruptable = self.hw.available_cpus( remap_utils.safe_get( data, "priority" ), self.cores )
239 |         if avail > 0 or interruptable > 0:
240 |             logger.info( "Volunteering with %d cores, %d interruptable"%( avail, interruptable ))
241 |             msg = remap_utils.pack_msg( "tracker.raisehand.%s"%( self.nodeid ), {"free":avail,"interruptable":interruptable} ) 
242 |             self.forward_to_broker( msg )
243 | 
244 |     # Some app initiator wants this node to start work
245 |     def handle_jobstart( self, recipientid, senderid, data ):
246 |         avail, interruptable = self.hw.available_cpus( remap_utils.safe_get( data, "priority" ), self.cores )
247 |         numcores = len(remap_utils.safe_get( data, "cores" ))
248 |         if (avail + interruptable) >= numcores:
249 |             logger.info("Starting job with %d cores"%( numcores ))
250 |             if not self.hw.start_job( self.remaproot, senderid, numcores, data ):
251 |                 logger.error("Error starting job")
252 |         else:
253 |             # Something changed in the meantime. Reject
254 |             logger.info( "Initiator requested %d cores, %d can be committed. Rejecting"%( numcores, avail + interruptable ))
255 |             msg = remap_utils.pack_msg( "%s.rejectjob.%s"%( senderid, self.nodeid ), {} ) 
256 |             self.forward_to_broker( msg )
257 | 
258 |         self.coresChanged = True
259 | 
260 | if __name__ == "__main__":
261 |     logger.info("Starting node daemon")
262 |     health_check = time.time()
263 | 
264 |     if ( len(sys.argv) < 2 ):
265 |         print("Must supply one argument, the 'rootdir'")
266 |         sys.exit(-1)
267 | 
268 |     node = NodeDaemon( sys.argv[1] )
269 |     node.setup_bus()
270 |     node.apply_timeouts()
271 | 
272 |     # wait 200ms to find broker, establish local connection
273 |     time.sleep( 0.2 )
274 | 
275 |     # nanomsg doesn't event when a connection is lost
276 |     # so we explicitly request reregistration of cores.
277 |     node.req_registration()
278 | 
279 |     logger.info("Node daemon started")
280 | 
281 |     while( True ):
282 |         try:
283 |             while (node.process_bus_messages()):
284 |                 pass
285 |             while (node.process_broker_messages()):
286 |                 pass
287 |             if node.brokerChanged:
288 |                 node.setup_broker()
289 |         except RemapException as re:
290 |             logger.exception( re )
291 | 
292 |         # Every now and then check core heartbeats and remove cores no longer active.
293 |         new_ts = time.time()
294 |         if (new_ts - health_check) > remap_constants.HEALTH_CHECK_DELAY:
295 |             health_check = new_ts            
296 |             node.purge_inactive_cores( new_ts )
297 |             node.maybe_send_status()
298 | 
299 | 


--------------------------------------------------------------------------------
/daemons/node/node_hardware.py:
--------------------------------------------------------------------------------
 1 | import multiprocessing
 2 | import subprocess
 3 | import os
 4 | 
 5 | class NodeHardware(object):
 6 |     def __init__(self):
 7 |         self.waiting = []
 8 | 
 9 |     def available_cpus( self, priority, active_cores ):
10 |         cpus = multiprocessing.cpu_count()
11 |         interruptable = 0
12 |         for key, coredata in active_cores.items():
13 |             if coredata["priority"] < priority:
14 |                 interruptable = interruptable + 1
15 |         available = cpus - len(active_cores)
16 |         return available, interruptable
17 | 
18 |     def start_job( self, remaproot, jobid, numcores, data ):
19 |         self.waiting = data["cores"]
20 | 
21 |         for i in range( 0, numcores ):
22 |             coredata = data["cores"][i]
23 |             jobid = None
24 |             appdir = None
25 |             if "jobid" in coredata:
26 |                 jobid = coredata["jobid"]
27 |             if "appdir" in coredata:
28 |                 appdir = coredata["appdir"]
29 | 
30 |             if jobid == None or appdir == None:
31 |                 return False
32 | 
33 |             appdir = os.path.join( remaproot, "job", jobid, "app", appdir )
34 | 
35 |             env = os.environ
36 |             pythonpath = ""
37 |             if "PYTHONPATH" in env:
38 |                 pythonpath = env["PYTHONPATH"]
39 |                 pythonpath = pythonpath + ":" + appdir
40 |             else:
41 |                 pythonpath = appdir
42 |             env["PYTHONPATH"] = pythonpath
43 | 
44 |             path = ""
45 |             thisdir = os.path.dirname( os.path.realpath(__file__) )
46 |             coredir = os.path.abspath( os.path.join( thisdir, "..", "core" ) )
47 |             path = path + coredir
48 |             daemonfile = os.path.join( path, "core_daemon.py" )
49 |             subprocess.Popen(["python3.4", daemonfile, remaproot], env=env)
50 | 
51 |         return True
52 | 
53 |     def grab_work_item( self ):
54 |         if len(self.waiting) > 0:
55 |             # Just grab any item
56 |             return self.waiting.pop()
57 |         return None
58 | 
59 | 
60 | 


--------------------------------------------------------------------------------
/daemons/vertexbroker/.gitignore:
--------------------------------------------------------------------------------
 1 | autom4te.cache/
 2 | compile
 3 | ckefile
 4 | Makefile.in
 5 | config.status
 6 | depcomp
 7 | configure
 8 | install-sh
 9 | missing
10 | Makefile
11 | m4
12 | 
13 | 


--------------------------------------------------------------------------------
/daemons/vertexbroker/AUTHORS:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gtoonstra/remap/cfd9a413858c54e7f1049c546d39bd0edccded0c/daemons/vertexbroker/AUTHORS


--------------------------------------------------------------------------------
/daemons/vertexbroker/COPYING:
--------------------------------------------------------------------------------
1 | /usr/share/automake-1.14/COPYING


--------------------------------------------------------------------------------
/daemons/vertexbroker/ChangeLog:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gtoonstra/remap/cfd9a413858c54e7f1049c546d39bd0edccded0c/daemons/vertexbroker/ChangeLog


--------------------------------------------------------------------------------
/daemons/vertexbroker/INSTALL:
--------------------------------------------------------------------------------
1 | /usr/share/automake-1.14/INSTALL


--------------------------------------------------------------------------------
/daemons/vertexbroker/Makefile.am:
--------------------------------------------------------------------------------
1 | SUBDIRS = src
2 | 
3 | 


--------------------------------------------------------------------------------
/daemons/vertexbroker/NEWS:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gtoonstra/remap/cfd9a413858c54e7f1049c546d39bd0edccded0c/daemons/vertexbroker/NEWS


--------------------------------------------------------------------------------
/daemons/vertexbroker/README:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gtoonstra/remap/cfd9a413858c54e7f1049c546d39bd0edccded0c/daemons/vertexbroker/README


--------------------------------------------------------------------------------
/daemons/vertexbroker/aclocal.m4:
--------------------------------------------------------------------------------
   1 | # generated automatically by aclocal 1.14.1 -*- Autoconf -*-
   2 | 
   3 | # Copyright (C) 1996-2013 Free Software Foundation, Inc.
   4 | 
   5 | # This file is free software; the Free Software Foundation
   6 | # gives unlimited permission to copy and/or distribute it,
   7 | # with or without modifications, as long as this notice is preserved.
   8 | 
   9 | # This program is distributed in the hope that it will be useful,
  10 | # but WITHOUT ANY WARRANTY, to the extent permitted by law; without
  11 | # even the implied warranty of MERCHANTABILITY or FITNESS FOR A
  12 | # PARTICULAR PURPOSE.
  13 | 
  14 | m4_ifndef([AC_CONFIG_MACRO_DIRS], [m4_defun([_AM_CONFIG_MACRO_DIRS], [])m4_defun([AC_CONFIG_MACRO_DIRS], [_AM_CONFIG_MACRO_DIRS($@)])])
  15 | m4_ifndef([AC_AUTOCONF_VERSION],
  16 |   [m4_copy([m4_PACKAGE_VERSION], [AC_AUTOCONF_VERSION])])dnl
  17 | m4_if(m4_defn([AC_AUTOCONF_VERSION]), [2.69],,
  18 | [m4_warning([this file was generated for autoconf 2.69.
  19 | You have another version of autoconf.  It may work, but is not guaranteed to.
  20 | If you have problems, you may need to regenerate the build system entirely.
  21 | To do so, use the procedure documented by the package, typically 'autoreconf'.])])
  22 | 
  23 | # Copyright (C) 2002-2013 Free Software Foundation, Inc.
  24 | #
  25 | # This file is free software; the Free Software Foundation
  26 | # gives unlimited permission to copy and/or distribute it,
  27 | # with or without modifications, as long as this notice is preserved.
  28 | 
  29 | # AM_AUTOMAKE_VERSION(VERSION)
  30 | # ----------------------------
  31 | # Automake X.Y traces this macro to ensure aclocal.m4 has been
  32 | # generated from the m4 files accompanying Automake X.Y.
  33 | # (This private macro should not be called outside this file.)
  34 | AC_DEFUN([AM_AUTOMAKE_VERSION],
  35 | [am__api_version='1.14'
  36 | dnl Some users find AM_AUTOMAKE_VERSION and mistake it for a way to
  37 | dnl require some minimum version.  Point them to the right macro.
  38 | m4_if([$1], [1.14.1], [],
  39 |       [AC_FATAL([Do not call $0, use AM_INIT_AUTOMAKE([$1]).])])dnl
  40 | ])
  41 | 
  42 | # _AM_AUTOCONF_VERSION(VERSION)
  43 | # -----------------------------
  44 | # aclocal traces this macro to find the Autoconf version.
  45 | # This is a private macro too.  Using m4_define simplifies
  46 | # the logic in aclocal, which can simply ignore this definition.
  47 | m4_define([_AM_AUTOCONF_VERSION], [])
  48 | 
  49 | # AM_SET_CURRENT_AUTOMAKE_VERSION
  50 | # -------------------------------
  51 | # Call AM_AUTOMAKE_VERSION and AM_AUTOMAKE_VERSION so they can be traced.
  52 | # This function is AC_REQUIREd by AM_INIT_AUTOMAKE.
  53 | AC_DEFUN([AM_SET_CURRENT_AUTOMAKE_VERSION],
  54 | [AM_AUTOMAKE_VERSION([1.14.1])dnl
  55 | m4_ifndef([AC_AUTOCONF_VERSION],
  56 |   [m4_copy([m4_PACKAGE_VERSION], [AC_AUTOCONF_VERSION])])dnl
  57 | _AM_AUTOCONF_VERSION(m4_defn([AC_AUTOCONF_VERSION]))])
  58 | 
  59 | # AM_AUX_DIR_EXPAND                                         -*- Autoconf -*-
  60 | 
  61 | # Copyright (C) 2001-2013 Free Software Foundation, Inc.
  62 | #
  63 | # This file is free software; the Free Software Foundation
  64 | # gives unlimited permission to copy and/or distribute it,
  65 | # with or without modifications, as long as this notice is preserved.
  66 | 
  67 | # For projects using AC_CONFIG_AUX_DIR([foo]), Autoconf sets
  68 | # $ac_aux_dir to '$srcdir/foo'.  In other projects, it is set to
  69 | # '$srcdir', '$srcdir/..', or '$srcdir/../..'.
  70 | #
  71 | # Of course, Automake must honor this variable whenever it calls a
  72 | # tool from the auxiliary directory.  The problem is that $srcdir (and
  73 | # therefore $ac_aux_dir as well) can be either absolute or relative,
  74 | # depending on how configure is run.  This is pretty annoying, since
  75 | # it makes $ac_aux_dir quite unusable in subdirectories: in the top
  76 | # source directory, any form will work fine, but in subdirectories a
  77 | # relative path needs to be adjusted first.
  78 | #
  79 | # $ac_aux_dir/missing
  80 | #    fails when called from a subdirectory if $ac_aux_dir is relative
  81 | # $top_srcdir/$ac_aux_dir/missing
  82 | #    fails if $ac_aux_dir is absolute,
  83 | #    fails when called from a subdirectory in a VPATH build with
  84 | #          a relative $ac_aux_dir
  85 | #
  86 | # The reason of the latter failure is that $top_srcdir and $ac_aux_dir
  87 | # are both prefixed by $srcdir.  In an in-source build this is usually
  88 | # harmless because $srcdir is '.', but things will broke when you
  89 | # start a VPATH build or use an absolute $srcdir.
  90 | #
  91 | # So we could use something similar to $top_srcdir/$ac_aux_dir/missing,
  92 | # iff we strip the leading $srcdir from $ac_aux_dir.  That would be:
  93 | #   am_aux_dir='\$(top_srcdir)/'`expr "$ac_aux_dir" : "$srcdir//*\(.*\)"`
  94 | # and then we would define $MISSING as
  95 | #   MISSING="\${SHELL} $am_aux_dir/missing"
  96 | # This will work as long as MISSING is not called from configure, because
  97 | # unfortunately $(top_srcdir) has no meaning in configure.
  98 | # However there are other variables, like CC, which are often used in
  99 | # configure, and could therefore not use this "fixed" $ac_aux_dir.
 100 | #
 101 | # Another solution, used here, is to always expand $ac_aux_dir to an
 102 | # absolute PATH.  The drawback is that using absolute paths prevent a
 103 | # configured tree to be moved without reconfiguration.
 104 | 
 105 | AC_DEFUN([AM_AUX_DIR_EXPAND],
 106 | [dnl Rely on autoconf to set up CDPATH properly.
 107 | AC_PREREQ([2.50])dnl
 108 | # expand $ac_aux_dir to an absolute path
 109 | am_aux_dir=`cd $ac_aux_dir && pwd`
 110 | ])
 111 | 
 112 | # AM_CONDITIONAL                                            -*- Autoconf -*-
 113 | 
 114 | # Copyright (C) 1997-2013 Free Software Foundation, Inc.
 115 | #
 116 | # This file is free software; the Free Software Foundation
 117 | # gives unlimited permission to copy and/or distribute it,
 118 | # with or without modifications, as long as this notice is preserved.
 119 | 
 120 | # AM_CONDITIONAL(NAME, SHELL-CONDITION)
 121 | # -------------------------------------
 122 | # Define a conditional.
 123 | AC_DEFUN([AM_CONDITIONAL],
 124 | [AC_PREREQ([2.52])dnl
 125 |  m4_if([$1], [TRUE],  [AC_FATAL([$0: invalid condition: $1])],
 126 |        [$1], [FALSE], [AC_FATAL([$0: invalid condition: $1])])dnl
 127 | AC_SUBST([$1_TRUE])dnl
 128 | AC_SUBST([$1_FALSE])dnl
 129 | _AM_SUBST_NOTMAKE([$1_TRUE])dnl
 130 | _AM_SUBST_NOTMAKE([$1_FALSE])dnl
 131 | m4_define([_AM_COND_VALUE_$1], [$2])dnl
 132 | if $2; then
 133 |   $1_TRUE=
 134 |   $1_FALSE='#'
 135 | else
 136 |   $1_TRUE='#'
 137 |   $1_FALSE=
 138 | fi
 139 | AC_CONFIG_COMMANDS_PRE(
 140 | [if test -z "${$1_TRUE}" && test -z "${$1_FALSE}"; then
 141 |   AC_MSG_ERROR([[conditional "$1" was never defined.
 142 | Usually this means the macro was only invoked conditionally.]])
 143 | fi])])
 144 | 
 145 | # Copyright (C) 1999-2013 Free Software Foundation, Inc.
 146 | #
 147 | # This file is free software; the Free Software Foundation
 148 | # gives unlimited permission to copy and/or distribute it,
 149 | # with or without modifications, as long as this notice is preserved.
 150 | 
 151 | 
 152 | # There are a few dirty hacks below to avoid letting 'AC_PROG_CC' be
 153 | # written in clear, in which case automake, when reading aclocal.m4,
 154 | # will think it sees a *use*, and therefore will trigger all it's
 155 | # C support machinery.  Also note that it means that autoscan, seeing
 156 | # CC etc. in the Makefile, will ask for an AC_PROG_CC use...
 157 | 
 158 | 
 159 | # _AM_DEPENDENCIES(NAME)
 160 | # ----------------------
 161 | # See how the compiler implements dependency checking.
 162 | # NAME is "CC", "CXX", "OBJC", "OBJCXX", "UPC", or "GJC".
 163 | # We try a few techniques and use that to set a single cache variable.
 164 | #
 165 | # We don't AC_REQUIRE the corresponding AC_PROG_CC since the latter was
 166 | # modified to invoke _AM_DEPENDENCIES(CC); we would have a circular
 167 | # dependency, and given that the user is not expected to run this macro,
 168 | # just rely on AC_PROG_CC.
 169 | AC_DEFUN([_AM_DEPENDENCIES],
 170 | [AC_REQUIRE([AM_SET_DEPDIR])dnl
 171 | AC_REQUIRE([AM_OUTPUT_DEPENDENCY_COMMANDS])dnl
 172 | AC_REQUIRE([AM_MAKE_INCLUDE])dnl
 173 | AC_REQUIRE([AM_DEP_TRACK])dnl
 174 | 
 175 | m4_if([$1], [CC],   [depcc="$CC"   am_compiler_list=],
 176 |       [$1], [CXX],  [depcc="$CXX"  am_compiler_list=],
 177 |       [$1], [OBJC], [depcc="$OBJC" am_compiler_list='gcc3 gcc'],
 178 |       [$1], [OBJCXX], [depcc="$OBJCXX" am_compiler_list='gcc3 gcc'],
 179 |       [$1], [UPC],  [depcc="$UPC"  am_compiler_list=],
 180 |       [$1], [GCJ],  [depcc="$GCJ"  am_compiler_list='gcc3 gcc'],
 181 |                     [depcc="$$1"   am_compiler_list=])
 182 | 
 183 | AC_CACHE_CHECK([dependency style of $depcc],
 184 |                [am_cv_$1_dependencies_compiler_type],
 185 | [if test -z "$AMDEP_TRUE" && test -f "$am_depcomp"; then
 186 |   # We make a subdir and do the tests there.  Otherwise we can end up
 187 |   # making bogus files that we don't know about and never remove.  For
 188 |   # instance it was reported that on HP-UX the gcc test will end up
 189 |   # making a dummy file named 'D' -- because '-MD' means "put the output
 190 |   # in D".
 191 |   rm -rf conftest.dir
 192 |   mkdir conftest.dir
 193 |   # Copy depcomp to subdir because otherwise we won't find it if we're
 194 |   # using a relative directory.
 195 |   cp "$am_depcomp" conftest.dir
 196 |   cd conftest.dir
 197 |   # We will build objects and dependencies in a subdirectory because
 198 |   # it helps to detect inapplicable dependency modes.  For instance
 199 |   # both Tru64's cc and ICC support -MD to output dependencies as a
 200 |   # side effect of compilation, but ICC will put the dependencies in
 201 |   # the current directory while Tru64 will put them in the object
 202 |   # directory.
 203 |   mkdir sub
 204 | 
 205 |   am_cv_$1_dependencies_compiler_type=none
 206 |   if test "$am_compiler_list" = ""; then
 207 |      am_compiler_list=`sed -n ['s/^#*\([a-zA-Z0-9]*\))$/\1/p'] < ./depcomp`
 208 |   fi
 209 |   am__universal=false
 210 |   m4_case([$1], [CC],
 211 |     [case " $depcc " in #(
 212 |      *\ -arch\ *\ -arch\ *) am__universal=true ;;
 213 |      esac],
 214 |     [CXX],
 215 |     [case " $depcc " in #(
 216 |      *\ -arch\ *\ -arch\ *) am__universal=true ;;
 217 |      esac])
 218 | 
 219 |   for depmode in $am_compiler_list; do
 220 |     # Setup a source with many dependencies, because some compilers
 221 |     # like to wrap large dependency lists on column 80 (with \), and
 222 |     # we should not choose a depcomp mode which is confused by this.
 223 |     #
 224 |     # We need to recreate these files for each test, as the compiler may
 225 |     # overwrite some of them when testing with obscure command lines.
 226 |     # This happens at least with the AIX C compiler.
 227 |     : > sub/conftest.c
 228 |     for i in 1 2 3 4 5 6; do
 229 |       echo '#include "conftst'$i'.h"' >> sub/conftest.c
 230 |       # Using ": > sub/conftst$i.h" creates only sub/conftst1.h with
 231 |       # Solaris 10 /bin/sh.
 232 |       echo '/* dummy */' > sub/conftst$i.h
 233 |     done
 234 |     echo "${am__include} ${am__quote}sub/conftest.Po${am__quote}" > confmf
 235 | 
 236 |     # We check with '-c' and '-o' for the sake of the "dashmstdout"
 237 |     # mode.  It turns out that the SunPro C++ compiler does not properly
 238 |     # handle '-M -o', and we need to detect this.  Also, some Intel
 239 |     # versions had trouble with output in subdirs.
 240 |     am__obj=sub/conftest.${OBJEXT-o}
 241 |     am__minus_obj="-o $am__obj"
 242 |     case $depmode in
 243 |     gcc)
 244 |       # This depmode causes a compiler race in universal mode.
 245 |       test "$am__universal" = false || continue
 246 |       ;;
 247 |     nosideeffect)
 248 |       # After this tag, mechanisms are not by side-effect, so they'll
 249 |       # only be used when explicitly requested.
 250 |       if test "x$enable_dependency_tracking" = xyes; then
 251 | 	continue
 252 |       else
 253 | 	break
 254 |       fi
 255 |       ;;
 256 |     msvc7 | msvc7msys | msvisualcpp | msvcmsys)
 257 |       # This compiler won't grok '-c -o', but also, the minuso test has
 258 |       # not run yet.  These depmodes are late enough in the game, and
 259 |       # so weak that their functioning should not be impacted.
 260 |       am__obj=conftest.${OBJEXT-o}
 261 |       am__minus_obj=
 262 |       ;;
 263 |     none) break ;;
 264 |     esac
 265 |     if depmode=$depmode \
 266 |        source=sub/conftest.c object=$am__obj \
 267 |        depfile=sub/conftest.Po tmpdepfile=sub/conftest.TPo \
 268 |        $SHELL ./depcomp $depcc -c $am__minus_obj sub/conftest.c \
 269 |          >/dev/null 2>conftest.err &&
 270 |        grep sub/conftst1.h sub/conftest.Po > /dev/null 2>&1 &&
 271 |        grep sub/conftst6.h sub/conftest.Po > /dev/null 2>&1 &&
 272 |        grep $am__obj sub/conftest.Po > /dev/null 2>&1 &&
 273 |        ${MAKE-make} -s -f confmf > /dev/null 2>&1; then
 274 |       # icc doesn't choke on unknown options, it will just issue warnings
 275 |       # or remarks (even with -Werror).  So we grep stderr for any message
 276 |       # that says an option was ignored or not supported.
 277 |       # When given -MP, icc 7.0 and 7.1 complain thusly:
 278 |       #   icc: Command line warning: ignoring option '-M'; no argument required
 279 |       # The diagnosis changed in icc 8.0:
 280 |       #   icc: Command line remark: option '-MP' not supported
 281 |       if (grep 'ignoring option' conftest.err ||
 282 |           grep 'not supported' conftest.err) >/dev/null 2>&1; then :; else
 283 |         am_cv_$1_dependencies_compiler_type=$depmode
 284 |         break
 285 |       fi
 286 |     fi
 287 |   done
 288 | 
 289 |   cd ..
 290 |   rm -rf conftest.dir
 291 | else
 292 |   am_cv_$1_dependencies_compiler_type=none
 293 | fi
 294 | ])
 295 | AC_SUBST([$1DEPMODE], [depmode=$am_cv_$1_dependencies_compiler_type])
 296 | AM_CONDITIONAL([am__fastdep$1], [
 297 |   test "x$enable_dependency_tracking" != xno \
 298 |   && test "$am_cv_$1_dependencies_compiler_type" = gcc3])
 299 | ])
 300 | 
 301 | 
 302 | # AM_SET_DEPDIR
 303 | # -------------
 304 | # Choose a directory name for dependency files.
 305 | # This macro is AC_REQUIREd in _AM_DEPENDENCIES.
 306 | AC_DEFUN([AM_SET_DEPDIR],
 307 | [AC_REQUIRE([AM_SET_LEADING_DOT])dnl
 308 | AC_SUBST([DEPDIR], ["${am__leading_dot}deps"])dnl
 309 | ])
 310 | 
 311 | 
 312 | # AM_DEP_TRACK
 313 | # ------------
 314 | AC_DEFUN([AM_DEP_TRACK],
 315 | [AC_ARG_ENABLE([dependency-tracking], [dnl
 316 | AS_HELP_STRING(
 317 |   [--enable-dependency-tracking],
 318 |   [do not reject slow dependency extractors])
 319 | AS_HELP_STRING(
 320 |   [--disable-dependency-tracking],
 321 |   [speeds up one-time build])])
 322 | if test "x$enable_dependency_tracking" != xno; then
 323 |   am_depcomp="$ac_aux_dir/depcomp"
 324 |   AMDEPBACKSLASH='\'
 325 |   am__nodep='_no'
 326 | fi
 327 | AM_CONDITIONAL([AMDEP], [test "x$enable_dependency_tracking" != xno])
 328 | AC_SUBST([AMDEPBACKSLASH])dnl
 329 | _AM_SUBST_NOTMAKE([AMDEPBACKSLASH])dnl
 330 | AC_SUBST([am__nodep])dnl
 331 | _AM_SUBST_NOTMAKE([am__nodep])dnl
 332 | ])
 333 | 
 334 | # Generate code to set up dependency tracking.              -*- Autoconf -*-
 335 | 
 336 | # Copyright (C) 1999-2013 Free Software Foundation, Inc.
 337 | #
 338 | # This file is free software; the Free Software Foundation
 339 | # gives unlimited permission to copy and/or distribute it,
 340 | # with or without modifications, as long as this notice is preserved.
 341 | 
 342 | 
 343 | # _AM_OUTPUT_DEPENDENCY_COMMANDS
 344 | # ------------------------------
 345 | AC_DEFUN([_AM_OUTPUT_DEPENDENCY_COMMANDS],
 346 | [{
 347 |   # Older Autoconf quotes --file arguments for eval, but not when files
 348 |   # are listed without --file.  Let's play safe and only enable the eval
 349 |   # if we detect the quoting.
 350 |   case $CONFIG_FILES in
 351 |   *\'*) eval set x "$CONFIG_FILES" ;;
 352 |   *)   set x $CONFIG_FILES ;;
 353 |   esac
 354 |   shift
 355 |   for mf
 356 |   do
 357 |     # Strip MF so we end up with the name of the file.
 358 |     mf=`echo "$mf" | sed -e 's/:.*$//'`
 359 |     # Check whether this is an Automake generated Makefile or not.
 360 |     # We used to match only the files named 'Makefile.in', but
 361 |     # some people rename them; so instead we look at the file content.
 362 |     # Grep'ing the first line is not enough: some people post-process
 363 |     # each Makefile.in and add a new line on top of each file to say so.
 364 |     # Grep'ing the whole file is not good either: AIX grep has a line
 365 |     # limit of 2048, but all sed's we know have understand at least 4000.
 366 |     if sed -n 's,^#.*generated by automake.*,X,p' "$mf" | grep X >/dev/null 2>&1; then
 367 |       dirpart=`AS_DIRNAME("$mf")`
 368 |     else
 369 |       continue
 370 |     fi
 371 |     # Extract the definition of DEPDIR, am__include, and am__quote
 372 |     # from the Makefile without running 'make'.
 373 |     DEPDIR=`sed -n 's/^DEPDIR = //p' < "$mf"`
 374 |     test -z "$DEPDIR" && continue
 375 |     am__include=`sed -n 's/^am__include = //p' < "$mf"`
 376 |     test -z "$am__include" && continue
 377 |     am__quote=`sed -n 's/^am__quote = //p' < "$mf"`
 378 |     # Find all dependency output files, they are included files with
 379 |     # $(DEPDIR) in their names.  We invoke sed twice because it is the
 380 |     # simplest approach to changing $(DEPDIR) to its actual value in the
 381 |     # expansion.
 382 |     for file in `sed -n "
 383 |       s/^$am__include $am__quote\(.*(DEPDIR).*\)$am__quote"'$/\1/p' <"$mf" | \
 384 | 	 sed -e 's/\$(DEPDIR)/'"$DEPDIR"'/g'`; do
 385 |       # Make sure the directory exists.
 386 |       test -f "$dirpart/$file" && continue
 387 |       fdir=`AS_DIRNAME(["$file"])`
 388 |       AS_MKDIR_P([$dirpart/$fdir])
 389 |       # echo "creating $dirpart/$file"
 390 |       echo '# dummy' > "$dirpart/$file"
 391 |     done
 392 |   done
 393 | }
 394 | ])# _AM_OUTPUT_DEPENDENCY_COMMANDS
 395 | 
 396 | 
 397 | # AM_OUTPUT_DEPENDENCY_COMMANDS
 398 | # -----------------------------
 399 | # This macro should only be invoked once -- use via AC_REQUIRE.
 400 | #
 401 | # This code is only required when automatic dependency tracking
 402 | # is enabled.  FIXME.  This creates each '.P' file that we will
 403 | # need in order to bootstrap the dependency handling code.
 404 | AC_DEFUN([AM_OUTPUT_DEPENDENCY_COMMANDS],
 405 | [AC_CONFIG_COMMANDS([depfiles],
 406 |      [test x"$AMDEP_TRUE" != x"" || _AM_OUTPUT_DEPENDENCY_COMMANDS],
 407 |      [AMDEP_TRUE="$AMDEP_TRUE" ac_aux_dir="$ac_aux_dir"])
 408 | ])
 409 | 
 410 | # Do all the work for Automake.                             -*- Autoconf -*-
 411 | 
 412 | # Copyright (C) 1996-2013 Free Software Foundation, Inc.
 413 | #
 414 | # This file is free software; the Free Software Foundation
 415 | # gives unlimited permission to copy and/or distribute it,
 416 | # with or without modifications, as long as this notice is preserved.
 417 | 
 418 | # This macro actually does too much.  Some checks are only needed if
 419 | # your package does certain things.  But this isn't really a big deal.
 420 | 
 421 | dnl Redefine AC_PROG_CC to automatically invoke _AM_PROG_CC_C_O.
 422 | m4_define([AC_PROG_CC],
 423 | m4_defn([AC_PROG_CC])
 424 | [_AM_PROG_CC_C_O
 425 | ])
 426 | 
 427 | # AM_INIT_AUTOMAKE(PACKAGE, VERSION, [NO-DEFINE])
 428 | # AM_INIT_AUTOMAKE([OPTIONS])
 429 | # -----------------------------------------------
 430 | # The call with PACKAGE and VERSION arguments is the old style
 431 | # call (pre autoconf-2.50), which is being phased out.  PACKAGE
 432 | # and VERSION should now be passed to AC_INIT and removed from
 433 | # the call to AM_INIT_AUTOMAKE.
 434 | # We support both call styles for the transition.  After
 435 | # the next Automake release, Autoconf can make the AC_INIT
 436 | # arguments mandatory, and then we can depend on a new Autoconf
 437 | # release and drop the old call support.
 438 | AC_DEFUN([AM_INIT_AUTOMAKE],
 439 | [AC_PREREQ([2.65])dnl
 440 | dnl Autoconf wants to disallow AM_ names.  We explicitly allow
 441 | dnl the ones we care about.
 442 | m4_pattern_allow([^AM_[A-Z]+FLAGS$])dnl
 443 | AC_REQUIRE([AM_SET_CURRENT_AUTOMAKE_VERSION])dnl
 444 | AC_REQUIRE([AC_PROG_INSTALL])dnl
 445 | if test "`cd $srcdir && pwd`" != "`pwd`"; then
 446 |   # Use -I$(srcdir) only when $(srcdir) != ., so that make's output
 447 |   # is not polluted with repeated "-I."
 448 |   AC_SUBST([am__isrc], [' -I$(srcdir)'])_AM_SUBST_NOTMAKE([am__isrc])dnl
 449 |   # test to see if srcdir already configured
 450 |   if test -f $srcdir/config.status; then
 451 |     AC_MSG_ERROR([source directory already configured; run "make distclean" there first])
 452 |   fi
 453 | fi
 454 | 
 455 | # test whether we have cygpath
 456 | if test -z "$CYGPATH_W"; then
 457 |   if (cygpath --version) >/dev/null 2>/dev/null; then
 458 |     CYGPATH_W='cygpath -w'
 459 |   else
 460 |     CYGPATH_W=echo
 461 |   fi
 462 | fi
 463 | AC_SUBST([CYGPATH_W])
 464 | 
 465 | # Define the identity of the package.
 466 | dnl Distinguish between old-style and new-style calls.
 467 | m4_ifval([$2],
 468 | [AC_DIAGNOSE([obsolete],
 469 |              [$0: two- and three-arguments forms are deprecated.])
 470 | m4_ifval([$3], [_AM_SET_OPTION([no-define])])dnl
 471 |  AC_SUBST([PACKAGE], [$1])dnl
 472 |  AC_SUBST([VERSION], [$2])],
 473 | [_AM_SET_OPTIONS([$1])dnl
 474 | dnl Diagnose old-style AC_INIT with new-style AM_AUTOMAKE_INIT.
 475 | m4_if(
 476 |   m4_ifdef([AC_PACKAGE_NAME], [ok]):m4_ifdef([AC_PACKAGE_VERSION], [ok]),
 477 |   [ok:ok],,
 478 |   [m4_fatal([AC_INIT should be called with package and version arguments])])dnl
 479 |  AC_SUBST([PACKAGE], ['AC_PACKAGE_TARNAME'])dnl
 480 |  AC_SUBST([VERSION], ['AC_PACKAGE_VERSION'])])dnl
 481 | 
 482 | _AM_IF_OPTION([no-define],,
 483 | [AC_DEFINE_UNQUOTED([PACKAGE], ["$PACKAGE"], [Name of package])
 484 |  AC_DEFINE_UNQUOTED([VERSION], ["$VERSION"], [Version number of package])])dnl
 485 | 
 486 | # Some tools Automake needs.
 487 | AC_REQUIRE([AM_SANITY_CHECK])dnl
 488 | AC_REQUIRE([AC_ARG_PROGRAM])dnl
 489 | AM_MISSING_PROG([ACLOCAL], [aclocal-${am__api_version}])
 490 | AM_MISSING_PROG([AUTOCONF], [autoconf])
 491 | AM_MISSING_PROG([AUTOMAKE], [automake-${am__api_version}])
 492 | AM_MISSING_PROG([AUTOHEADER], [autoheader])
 493 | AM_MISSING_PROG([MAKEINFO], [makeinfo])
 494 | AC_REQUIRE([AM_PROG_INSTALL_SH])dnl
 495 | AC_REQUIRE([AM_PROG_INSTALL_STRIP])dnl
 496 | AC_REQUIRE([AC_PROG_MKDIR_P])dnl
 497 | # For better backward compatibility.  To be removed once Automake 1.9.x
 498 | # dies out for good.  For more background, see:
 499 | # <http://lists.gnu.org/archive/html/automake/2012-07/msg00001.html>
 500 | # <http://lists.gnu.org/archive/html/automake/2012-07/msg00014.html>
 501 | AC_SUBST([mkdir_p], ['$(MKDIR_P)'])
 502 | # We need awk for the "check" target.  The system "awk" is bad on
 503 | # some platforms.
 504 | AC_REQUIRE([AC_PROG_AWK])dnl
 505 | AC_REQUIRE([AC_PROG_MAKE_SET])dnl
 506 | AC_REQUIRE([AM_SET_LEADING_DOT])dnl
 507 | _AM_IF_OPTION([tar-ustar], [_AM_PROG_TAR([ustar])],
 508 | 	      [_AM_IF_OPTION([tar-pax], [_AM_PROG_TAR([pax])],
 509 | 			     [_AM_PROG_TAR([v7])])])
 510 | _AM_IF_OPTION([no-dependencies],,
 511 | [AC_PROVIDE_IFELSE([AC_PROG_CC],
 512 | 		  [_AM_DEPENDENCIES([CC])],
 513 | 		  [m4_define([AC_PROG_CC],
 514 | 			     m4_defn([AC_PROG_CC])[_AM_DEPENDENCIES([CC])])])dnl
 515 | AC_PROVIDE_IFELSE([AC_PROG_CXX],
 516 | 		  [_AM_DEPENDENCIES([CXX])],
 517 | 		  [m4_define([AC_PROG_CXX],
 518 | 			     m4_defn([AC_PROG_CXX])[_AM_DEPENDENCIES([CXX])])])dnl
 519 | AC_PROVIDE_IFELSE([AC_PROG_OBJC],
 520 | 		  [_AM_DEPENDENCIES([OBJC])],
 521 | 		  [m4_define([AC_PROG_OBJC],
 522 | 			     m4_defn([AC_PROG_OBJC])[_AM_DEPENDENCIES([OBJC])])])dnl
 523 | AC_PROVIDE_IFELSE([AC_PROG_OBJCXX],
 524 | 		  [_AM_DEPENDENCIES([OBJCXX])],
 525 | 		  [m4_define([AC_PROG_OBJCXX],
 526 | 			     m4_defn([AC_PROG_OBJCXX])[_AM_DEPENDENCIES([OBJCXX])])])dnl
 527 | ])
 528 | AC_REQUIRE([AM_SILENT_RULES])dnl
 529 | dnl The testsuite driver may need to know about EXEEXT, so add the
 530 | dnl 'am__EXEEXT' conditional if _AM_COMPILER_EXEEXT was seen.  This
 531 | dnl macro is hooked onto _AC_COMPILER_EXEEXT early, see below.
 532 | AC_CONFIG_COMMANDS_PRE(dnl
 533 | [m4_provide_if([_AM_COMPILER_EXEEXT],
 534 |   [AM_CONDITIONAL([am__EXEEXT], [test -n "$EXEEXT"])])])dnl
 535 | 
 536 | # POSIX will say in a future version that running "rm -f" with no argument
 537 | # is OK; and we want to be able to make that assumption in our Makefile
 538 | # recipes.  So use an aggressive probe to check that the usage we want is
 539 | # actually supported "in the wild" to an acceptable degree.
 540 | # See automake bug#10828.
 541 | # To make any issue more visible, cause the running configure to be aborted
 542 | # by default if the 'rm' program in use doesn't match our expectations; the
 543 | # user can still override this though.
 544 | if rm -f && rm -fr && rm -rf; then : OK; else
 545 |   cat >&2 <<'END'
 546 | Oops!
 547 | 
 548 | Your 'rm' program seems unable to run without file operands specified
 549 | on the command line, even when the '-f' option is present.  This is contrary
 550 | to the behaviour of most rm programs out there, and not conforming with
 551 | the upcoming POSIX standard: <http://austingroupbugs.net/view.php?id=542>
 552 | 
 553 | Please tell bug-automake@gnu.org about your system, including the value
 554 | of your $PATH and any error possibly output before this message.  This
 555 | can help us improve future automake versions.
 556 | 
 557 | END
 558 |   if test x"$ACCEPT_INFERIOR_RM_PROGRAM" = x"yes"; then
 559 |     echo 'Configuration will proceed anyway, since you have set the' >&2
 560 |     echo 'ACCEPT_INFERIOR_RM_PROGRAM variable to "yes"' >&2
 561 |     echo >&2
 562 |   else
 563 |     cat >&2 <<'END'
 564 | Aborting the configuration process, to ensure you take notice of the issue.
 565 | 
 566 | You can download and install GNU coreutils to get an 'rm' implementation
 567 | that behaves properly: <http://www.gnu.org/software/coreutils/>.
 568 | 
 569 | If you want to complete the configuration process using your problematic
 570 | 'rm' anyway, export the environment variable ACCEPT_INFERIOR_RM_PROGRAM
 571 | to "yes", and re-run configure.
 572 | 
 573 | END
 574 |     AC_MSG_ERROR([Your 'rm' program is bad, sorry.])
 575 |   fi
 576 | fi
 577 | ])
 578 | 
 579 | dnl Hook into '_AC_COMPILER_EXEEXT' early to learn its expansion.  Do not
 580 | dnl add the conditional right here, as _AC_COMPILER_EXEEXT may be further
 581 | dnl mangled by Autoconf and run in a shell conditional statement.
 582 | m4_define([_AC_COMPILER_EXEEXT],
 583 | m4_defn([_AC_COMPILER_EXEEXT])[m4_provide([_AM_COMPILER_EXEEXT])])
 584 | 
 585 | # When config.status generates a header, we must update the stamp-h file.
 586 | # This file resides in the same directory as the config header
 587 | # that is generated.  The stamp files are numbered to have different names.
 588 | 
 589 | # Autoconf calls _AC_AM_CONFIG_HEADER_HOOK (when defined) in the
 590 | # loop where config.status creates the headers, so we can generate
 591 | # our stamp files there.
 592 | AC_DEFUN([_AC_AM_CONFIG_HEADER_HOOK],
 593 | [# Compute $1's index in $config_headers.
 594 | _am_arg=$1
 595 | _am_stamp_count=1
 596 | for _am_header in $config_headers :; do
 597 |   case $_am_header in
 598 |     $_am_arg | $_am_arg:* )
 599 |       break ;;
 600 |     * )
 601 |       _am_stamp_count=`expr $_am_stamp_count + 1` ;;
 602 |   esac
 603 | done
 604 | echo "timestamp for $_am_arg" >`AS_DIRNAME(["$_am_arg"])`/stamp-h[]$_am_stamp_count])
 605 | 
 606 | # Copyright (C) 2001-2013 Free Software Foundation, Inc.
 607 | #
 608 | # This file is free software; the Free Software Foundation
 609 | # gives unlimited permission to copy and/or distribute it,
 610 | # with or without modifications, as long as this notice is preserved.
 611 | 
 612 | # AM_PROG_INSTALL_SH
 613 | # ------------------
 614 | # Define $install_sh.
 615 | AC_DEFUN([AM_PROG_INSTALL_SH],
 616 | [AC_REQUIRE([AM_AUX_DIR_EXPAND])dnl
 617 | if test x"${install_sh}" != xset; then
 618 |   case $am_aux_dir in
 619 |   *\ * | *\	*)
 620 |     install_sh="\${SHELL} '$am_aux_dir/install-sh'" ;;
 621 |   *)
 622 |     install_sh="\${SHELL} $am_aux_dir/install-sh"
 623 |   esac
 624 | fi
 625 | AC_SUBST([install_sh])])
 626 | 
 627 | # Copyright (C) 2003-2013 Free Software Foundation, Inc.
 628 | #
 629 | # This file is free software; the Free Software Foundation
 630 | # gives unlimited permission to copy and/or distribute it,
 631 | # with or without modifications, as long as this notice is preserved.
 632 | 
 633 | # Check whether the underlying file-system supports filenames
 634 | # with a leading dot.  For instance MS-DOS doesn't.
 635 | AC_DEFUN([AM_SET_LEADING_DOT],
 636 | [rm -rf .tst 2>/dev/null
 637 | mkdir .tst 2>/dev/null
 638 | if test -d .tst; then
 639 |   am__leading_dot=.
 640 | else
 641 |   am__leading_dot=_
 642 | fi
 643 | rmdir .tst 2>/dev/null
 644 | AC_SUBST([am__leading_dot])])
 645 | 
 646 | # Check to see how 'make' treats includes.	            -*- Autoconf -*-
 647 | 
 648 | # Copyright (C) 2001-2013 Free Software Foundation, Inc.
 649 | #
 650 | # This file is free software; the Free Software Foundation
 651 | # gives unlimited permission to copy and/or distribute it,
 652 | # with or without modifications, as long as this notice is preserved.
 653 | 
 654 | # AM_MAKE_INCLUDE()
 655 | # -----------------
 656 | # Check to see how make treats includes.
 657 | AC_DEFUN([AM_MAKE_INCLUDE],
 658 | [am_make=${MAKE-make}
 659 | cat > confinc << 'END'
 660 | am__doit:
 661 | 	@echo this is the am__doit target
 662 | .PHONY: am__doit
 663 | END
 664 | # If we don't find an include directive, just comment out the code.
 665 | AC_MSG_CHECKING([for style of include used by $am_make])
 666 | am__include="#"
 667 | am__quote=
 668 | _am_result=none
 669 | # First try GNU make style include.
 670 | echo "include confinc" > confmf
 671 | # Ignore all kinds of additional output from 'make'.
 672 | case `$am_make -s -f confmf 2> /dev/null` in #(
 673 | *the\ am__doit\ target*)
 674 |   am__include=include
 675 |   am__quote=
 676 |   _am_result=GNU
 677 |   ;;
 678 | esac
 679 | # Now try BSD make style include.
 680 | if test "$am__include" = "#"; then
 681 |    echo '.include "confinc"' > confmf
 682 |    case `$am_make -s -f confmf 2> /dev/null` in #(
 683 |    *the\ am__doit\ target*)
 684 |      am__include=.include
 685 |      am__quote="\""
 686 |      _am_result=BSD
 687 |      ;;
 688 |    esac
 689 | fi
 690 | AC_SUBST([am__include])
 691 | AC_SUBST([am__quote])
 692 | AC_MSG_RESULT([$_am_result])
 693 | rm -f confinc confmf
 694 | ])
 695 | 
 696 | # Fake the existence of programs that GNU maintainers use.  -*- Autoconf -*-
 697 | 
 698 | # Copyright (C) 1997-2013 Free Software Foundation, Inc.
 699 | #
 700 | # This file is free software; the Free Software Foundation
 701 | # gives unlimited permission to copy and/or distribute it,
 702 | # with or without modifications, as long as this notice is preserved.
 703 | 
 704 | # AM_MISSING_PROG(NAME, PROGRAM)
 705 | # ------------------------------
 706 | AC_DEFUN([AM_MISSING_PROG],
 707 | [AC_REQUIRE([AM_MISSING_HAS_RUN])
 708 | $1=${$1-"${am_missing_run}$2"}
 709 | AC_SUBST($1)])
 710 | 
 711 | # AM_MISSING_HAS_RUN
 712 | # ------------------
 713 | # Define MISSING if not defined so far and test if it is modern enough.
 714 | # If it is, set am_missing_run to use it, otherwise, to nothing.
 715 | AC_DEFUN([AM_MISSING_HAS_RUN],
 716 | [AC_REQUIRE([AM_AUX_DIR_EXPAND])dnl
 717 | AC_REQUIRE_AUX_FILE([missing])dnl
 718 | if test x"${MISSING+set}" != xset; then
 719 |   case $am_aux_dir in
 720 |   *\ * | *\	*)
 721 |     MISSING="\${SHELL} \"$am_aux_dir/missing\"" ;;
 722 |   *)
 723 |     MISSING="\${SHELL} $am_aux_dir/missing" ;;
 724 |   esac
 725 | fi
 726 | # Use eval to expand $SHELL
 727 | if eval "$MISSING --is-lightweight"; then
 728 |   am_missing_run="$MISSING "
 729 | else
 730 |   am_missing_run=
 731 |   AC_MSG_WARN(['missing' script is too old or missing])
 732 | fi
 733 | ])
 734 | 
 735 | # Helper functions for option handling.                     -*- Autoconf -*-
 736 | 
 737 | # Copyright (C) 2001-2013 Free Software Foundation, Inc.
 738 | #
 739 | # This file is free software; the Free Software Foundation
 740 | # gives unlimited permission to copy and/or distribute it,
 741 | # with or without modifications, as long as this notice is preserved.
 742 | 
 743 | # _AM_MANGLE_OPTION(NAME)
 744 | # -----------------------
 745 | AC_DEFUN([_AM_MANGLE_OPTION],
 746 | [[_AM_OPTION_]m4_bpatsubst($1, [[^a-zA-Z0-9_]], [_])])
 747 | 
 748 | # _AM_SET_OPTION(NAME)
 749 | # --------------------
 750 | # Set option NAME.  Presently that only means defining a flag for this option.
 751 | AC_DEFUN([_AM_SET_OPTION],
 752 | [m4_define(_AM_MANGLE_OPTION([$1]), [1])])
 753 | 
 754 | # _AM_SET_OPTIONS(OPTIONS)
 755 | # ------------------------
 756 | # OPTIONS is a space-separated list of Automake options.
 757 | AC_DEFUN([_AM_SET_OPTIONS],
 758 | [m4_foreach_w([_AM_Option], [$1], [_AM_SET_OPTION(_AM_Option)])])
 759 | 
 760 | # _AM_IF_OPTION(OPTION, IF-SET, [IF-NOT-SET])
 761 | # -------------------------------------------
 762 | # Execute IF-SET if OPTION is set, IF-NOT-SET otherwise.
 763 | AC_DEFUN([_AM_IF_OPTION],
 764 | [m4_ifset(_AM_MANGLE_OPTION([$1]), [$2], [$3])])
 765 | 
 766 | # Copyright (C) 1999-2013 Free Software Foundation, Inc.
 767 | #
 768 | # This file is free software; the Free Software Foundation
 769 | # gives unlimited permission to copy and/or distribute it,
 770 | # with or without modifications, as long as this notice is preserved.
 771 | 
 772 | # _AM_PROG_CC_C_O
 773 | # ---------------
 774 | # Like AC_PROG_CC_C_O, but changed for automake.  We rewrite AC_PROG_CC
 775 | # to automatically call this.
 776 | AC_DEFUN([_AM_PROG_CC_C_O],
 777 | [AC_REQUIRE([AM_AUX_DIR_EXPAND])dnl
 778 | AC_REQUIRE_AUX_FILE([compile])dnl
 779 | AC_LANG_PUSH([C])dnl
 780 | AC_CACHE_CHECK(
 781 |   [whether $CC understands -c and -o together],
 782 |   [am_cv_prog_cc_c_o],
 783 |   [AC_LANG_CONFTEST([AC_LANG_PROGRAM([])])
 784 |   # Make sure it works both with $CC and with simple cc.
 785 |   # Following AC_PROG_CC_C_O, we do the test twice because some
 786 |   # compilers refuse to overwrite an existing .o file with -o,
 787 |   # though they will create one.
 788 |   am_cv_prog_cc_c_o=yes
 789 |   for am_i in 1 2; do
 790 |     if AM_RUN_LOG([$CC -c conftest.$ac_ext -o conftest2.$ac_objext]) \
 791 |          && test -f conftest2.$ac_objext; then
 792 |       : OK
 793 |     else
 794 |       am_cv_prog_cc_c_o=no
 795 |       break
 796 |     fi
 797 |   done
 798 |   rm -f core conftest*
 799 |   unset am_i])
 800 | if test "$am_cv_prog_cc_c_o" != yes; then
 801 |    # Losing compiler, so override with the script.
 802 |    # FIXME: It is wrong to rewrite CC.
 803 |    # But if we don't then we get into trouble of one sort or another.
 804 |    # A longer-term fix would be to have automake use am__CC in this case,
 805 |    # and then we could set am__CC="\$(top_srcdir)/compile \$(CC)"
 806 |    CC="$am_aux_dir/compile $CC"
 807 | fi
 808 | AC_LANG_POP([C])])
 809 | 
 810 | # For backward compatibility.
 811 | AC_DEFUN_ONCE([AM_PROG_CC_C_O], [AC_REQUIRE([AC_PROG_CC])])
 812 | 
 813 | # Copyright (C) 2001-2013 Free Software Foundation, Inc.
 814 | #
 815 | # This file is free software; the Free Software Foundation
 816 | # gives unlimited permission to copy and/or distribute it,
 817 | # with or without modifications, as long as this notice is preserved.
 818 | 
 819 | # AM_RUN_LOG(COMMAND)
 820 | # -------------------
 821 | # Run COMMAND, save the exit status in ac_status, and log it.
 822 | # (This has been adapted from Autoconf's _AC_RUN_LOG macro.)
 823 | AC_DEFUN([AM_RUN_LOG],
 824 | [{ echo "$as_me:$LINENO: $1" >&AS_MESSAGE_LOG_FD
 825 |    ($1) >&AS_MESSAGE_LOG_FD 2>&AS_MESSAGE_LOG_FD
 826 |    ac_status=$?
 827 |    echo "$as_me:$LINENO: \$? = $ac_status" >&AS_MESSAGE_LOG_FD
 828 |    (exit $ac_status); }])
 829 | 
 830 | # Check to make sure that the build environment is sane.    -*- Autoconf -*-
 831 | 
 832 | # Copyright (C) 1996-2013 Free Software Foundation, Inc.
 833 | #
 834 | # This file is free software; the Free Software Foundation
 835 | # gives unlimited permission to copy and/or distribute it,
 836 | # with or without modifications, as long as this notice is preserved.
 837 | 
 838 | # AM_SANITY_CHECK
 839 | # ---------------
 840 | AC_DEFUN([AM_SANITY_CHECK],
 841 | [AC_MSG_CHECKING([whether build environment is sane])
 842 | # Reject unsafe characters in $srcdir or the absolute working directory
 843 | # name.  Accept space and tab only in the latter.
 844 | am_lf='
 845 | '
 846 | case `pwd` in
 847 |   *[[\\\"\#\$\&\'\`$am_lf]]*)
 848 |     AC_MSG_ERROR([unsafe absolute working directory name]);;
 849 | esac
 850 | case $srcdir in
 851 |   *[[\\\"\#\$\&\'\`$am_lf\ \	]]*)
 852 |     AC_MSG_ERROR([unsafe srcdir value: '$srcdir']);;
 853 | esac
 854 | 
 855 | # Do 'set' in a subshell so we don't clobber the current shell's
 856 | # arguments.  Must try -L first in case configure is actually a
 857 | # symlink; some systems play weird games with the mod time of symlinks
 858 | # (eg FreeBSD returns the mod time of the symlink's containing
 859 | # directory).
 860 | if (
 861 |    am_has_slept=no
 862 |    for am_try in 1 2; do
 863 |      echo "timestamp, slept: $am_has_slept" > conftest.file
 864 |      set X `ls -Lt "$srcdir/configure" conftest.file 2> /dev/null`
 865 |      if test "$[*]" = "X"; then
 866 | 	# -L didn't work.
 867 | 	set X `ls -t "$srcdir/configure" conftest.file`
 868 |      fi
 869 |      if test "$[*]" != "X $srcdir/configure conftest.file" \
 870 | 	&& test "$[*]" != "X conftest.file $srcdir/configure"; then
 871 | 
 872 | 	# If neither matched, then we have a broken ls.  This can happen
 873 | 	# if, for instance, CONFIG_SHELL is bash and it inherits a
 874 | 	# broken ls alias from the environment.  This has actually
 875 | 	# happened.  Such a system could not be considered "sane".
 876 | 	AC_MSG_ERROR([ls -t appears to fail.  Make sure there is not a broken
 877 |   alias in your environment])
 878 |      fi
 879 |      if test "$[2]" = conftest.file || test $am_try -eq 2; then
 880 |        break
 881 |      fi
 882 |      # Just in case.
 883 |      sleep 1
 884 |      am_has_slept=yes
 885 |    done
 886 |    test "$[2]" = conftest.file
 887 |    )
 888 | then
 889 |    # Ok.
 890 |    :
 891 | else
 892 |    AC_MSG_ERROR([newly created file is older than distributed files!
 893 | Check your system clock])
 894 | fi
 895 | AC_MSG_RESULT([yes])
 896 | # If we didn't sleep, we still need to ensure time stamps of config.status and
 897 | # generated files are strictly newer.
 898 | am_sleep_pid=
 899 | if grep 'slept: no' conftest.file >/dev/null 2>&1; then
 900 |   ( sleep 1 ) &
 901 |   am_sleep_pid=$!
 902 | fi
 903 | AC_CONFIG_COMMANDS_PRE(
 904 |   [AC_MSG_CHECKING([that generated files are newer than configure])
 905 |    if test -n "$am_sleep_pid"; then
 906 |      # Hide warnings about reused PIDs.
 907 |      wait $am_sleep_pid 2>/dev/null
 908 |    fi
 909 |    AC_MSG_RESULT([done])])
 910 | rm -f conftest.file
 911 | ])
 912 | 
 913 | # Copyright (C) 2009-2013 Free Software Foundation, Inc.
 914 | #
 915 | # This file is free software; the Free Software Foundation
 916 | # gives unlimited permission to copy and/or distribute it,
 917 | # with or without modifications, as long as this notice is preserved.
 918 | 
 919 | # AM_SILENT_RULES([DEFAULT])
 920 | # --------------------------
 921 | # Enable less verbose build rules; with the default set to DEFAULT
 922 | # ("yes" being less verbose, "no" or empty being verbose).
 923 | AC_DEFUN([AM_SILENT_RULES],
 924 | [AC_ARG_ENABLE([silent-rules], [dnl
 925 | AS_HELP_STRING(
 926 |   [--enable-silent-rules],
 927 |   [less verbose build output (undo: "make V=1")])
 928 | AS_HELP_STRING(
 929 |   [--disable-silent-rules],
 930 |   [verbose build output (undo: "make V=0")])dnl
 931 | ])
 932 | case $enable_silent_rules in @%:@ (((
 933 |   yes) AM_DEFAULT_VERBOSITY=0;;
 934 |    no) AM_DEFAULT_VERBOSITY=1;;
 935 |     *) AM_DEFAULT_VERBOSITY=m4_if([$1], [yes], [0], [1]);;
 936 | esac
 937 | dnl
 938 | dnl A few 'make' implementations (e.g., NonStop OS and NextStep)
 939 | dnl do not support nested variable expansions.
 940 | dnl See automake bug#9928 and bug#10237.
 941 | am_make=${MAKE-make}
 942 | AC_CACHE_CHECK([whether $am_make supports nested variables],
 943 |    [am_cv_make_support_nested_variables],
 944 |    [if AS_ECHO([['TRUE=$(BAR$(V))
 945 | BAR0=false
 946 | BAR1=true
 947 | V=1
 948 | am__doit:
 949 | 	@$(TRUE)
 950 | .PHONY: am__doit']]) | $am_make -f - >/dev/null 2>&1; then
 951 |   am_cv_make_support_nested_variables=yes
 952 | else
 953 |   am_cv_make_support_nested_variables=no
 954 | fi])
 955 | if test $am_cv_make_support_nested_variables = yes; then
 956 |   dnl Using '$V' instead of '$(V)' breaks IRIX make.
 957 |   AM_V='$(V)'
 958 |   AM_DEFAULT_V='$(AM_DEFAULT_VERBOSITY)'
 959 | else
 960 |   AM_V=$AM_DEFAULT_VERBOSITY
 961 |   AM_DEFAULT_V=$AM_DEFAULT_VERBOSITY
 962 | fi
 963 | AC_SUBST([AM_V])dnl
 964 | AM_SUBST_NOTMAKE([AM_V])dnl
 965 | AC_SUBST([AM_DEFAULT_V])dnl
 966 | AM_SUBST_NOTMAKE([AM_DEFAULT_V])dnl
 967 | AC_SUBST([AM_DEFAULT_VERBOSITY])dnl
 968 | AM_BACKSLASH='\'
 969 | AC_SUBST([AM_BACKSLASH])dnl
 970 | _AM_SUBST_NOTMAKE([AM_BACKSLASH])dnl
 971 | ])
 972 | 
 973 | # Copyright (C) 2001-2013 Free Software Foundation, Inc.
 974 | #
 975 | # This file is free software; the Free Software Foundation
 976 | # gives unlimited permission to copy and/or distribute it,
 977 | # with or without modifications, as long as this notice is preserved.
 978 | 
 979 | # AM_PROG_INSTALL_STRIP
 980 | # ---------------------
 981 | # One issue with vendor 'install' (even GNU) is that you can't
 982 | # specify the program used to strip binaries.  This is especially
 983 | # annoying in cross-compiling environments, where the build's strip
 984 | # is unlikely to handle the host's binaries.
 985 | # Fortunately install-sh will honor a STRIPPROG variable, so we
 986 | # always use install-sh in "make install-strip", and initialize
 987 | # STRIPPROG with the value of the STRIP variable (set by the user).
 988 | AC_DEFUN([AM_PROG_INSTALL_STRIP],
 989 | [AC_REQUIRE([AM_PROG_INSTALL_SH])dnl
 990 | # Installed binaries are usually stripped using 'strip' when the user
 991 | # run "make install-strip".  However 'strip' might not be the right
 992 | # tool to use in cross-compilation environments, therefore Automake
 993 | # will honor the 'STRIP' environment variable to overrule this program.
 994 | dnl Don't test for $cross_compiling = yes, because it might be 'maybe'.
 995 | if test "$cross_compiling" != no; then
 996 |   AC_CHECK_TOOL([STRIP], [strip], :)
 997 | fi
 998 | INSTALL_STRIP_PROGRAM="\$(install_sh) -c -s"
 999 | AC_SUBST([INSTALL_STRIP_PROGRAM])])
1000 | 
1001 | # Copyright (C) 2006-2013 Free Software Foundation, Inc.
1002 | #
1003 | # This file is free software; the Free Software Foundation
1004 | # gives unlimited permission to copy and/or distribute it,
1005 | # with or without modifications, as long as this notice is preserved.
1006 | 
1007 | # _AM_SUBST_NOTMAKE(VARIABLE)
1008 | # ---------------------------
1009 | # Prevent Automake from outputting VARIABLE = @VARIABLE@ in Makefile.in.
1010 | # This macro is traced by Automake.
1011 | AC_DEFUN([_AM_SUBST_NOTMAKE])
1012 | 
1013 | # AM_SUBST_NOTMAKE(VARIABLE)
1014 | # --------------------------
1015 | # Public sister of _AM_SUBST_NOTMAKE.
1016 | AC_DEFUN([AM_SUBST_NOTMAKE], [_AM_SUBST_NOTMAKE($@)])
1017 | 
1018 | # Check how to create a tarball.                            -*- Autoconf -*-
1019 | 
1020 | # Copyright (C) 2004-2013 Free Software Foundation, Inc.
1021 | #
1022 | # This file is free software; the Free Software Foundation
1023 | # gives unlimited permission to copy and/or distribute it,
1024 | # with or without modifications, as long as this notice is preserved.
1025 | 
1026 | # _AM_PROG_TAR(FORMAT)
1027 | # --------------------
1028 | # Check how to create a tarball in format FORMAT.
1029 | # FORMAT should be one of 'v7', 'ustar', or 'pax'.
1030 | #
1031 | # Substitute a variable $(am__tar) that is a command
1032 | # writing to stdout a FORMAT-tarball containing the directory
1033 | # $tardir.
1034 | #     tardir=directory && $(am__tar) > result.tar
1035 | #
1036 | # Substitute a variable $(am__untar) that extract such
1037 | # a tarball read from stdin.
1038 | #     $(am__untar) < result.tar
1039 | #
1040 | AC_DEFUN([_AM_PROG_TAR],
1041 | [# Always define AMTAR for backward compatibility.  Yes, it's still used
1042 | # in the wild :-(  We should find a proper way to deprecate it ...
1043 | AC_SUBST([AMTAR], ['$${TAR-tar}'])
1044 | 
1045 | # We'll loop over all known methods to create a tar archive until one works.
1046 | _am_tools='gnutar m4_if([$1], [ustar], [plaintar]) pax cpio none'
1047 | 
1048 | m4_if([$1], [v7],
1049 |   [am__tar='$${TAR-tar} chof - "$$tardir"' am__untar='$${TAR-tar} xf -'],
1050 | 
1051 |   [m4_case([$1],
1052 |     [ustar],
1053 |      [# The POSIX 1988 'ustar' format is defined with fixed-size fields.
1054 |       # There is notably a 21 bits limit for the UID and the GID.  In fact,
1055 |       # the 'pax' utility can hang on bigger UID/GID (see automake bug#8343
1056 |       # and bug#13588).
1057 |       am_max_uid=2097151 # 2^21 - 1
1058 |       am_max_gid=$am_max_uid
1059 |       # The $UID and $GID variables are not portable, so we need to resort
1060 |       # to the POSIX-mandated id(1) utility.  Errors in the 'id' calls
1061 |       # below are definitely unexpected, so allow the users to see them
1062 |       # (that is, avoid stderr redirection).
1063 |       am_uid=`id -u || echo unknown`
1064 |       am_gid=`id -g || echo unknown`
1065 |       AC_MSG_CHECKING([whether UID '$am_uid' is supported by ustar format])
1066 |       if test $am_uid -le $am_max_uid; then
1067 |          AC_MSG_RESULT([yes])
1068 |       else
1069 |          AC_MSG_RESULT([no])
1070 |          _am_tools=none
1071 |       fi
1072 |       AC_MSG_CHECKING([whether GID '$am_gid' is supported by ustar format])
1073 |       if test $am_gid -le $am_max_gid; then
1074 |          AC_MSG_RESULT([yes])
1075 |       else
1076 |         AC_MSG_RESULT([no])
1077 |         _am_tools=none
1078 |       fi],
1079 | 
1080 |   [pax],
1081 |     [],
1082 | 
1083 |   [m4_fatal([Unknown tar format])])
1084 | 
1085 |   AC_MSG_CHECKING([how to create a $1 tar archive])
1086 | 
1087 |   # Go ahead even if we have the value already cached.  We do so because we
1088 |   # need to set the values for the 'am__tar' and 'am__untar' variables.
1089 |   _am_tools=${am_cv_prog_tar_$1-$_am_tools}
1090 | 
1091 |   for _am_tool in $_am_tools; do
1092 |     case $_am_tool in
1093 |     gnutar)
1094 |       for _am_tar in tar gnutar gtar; do
1095 |         AM_RUN_LOG([$_am_tar --version]) && break
1096 |       done
1097 |       am__tar="$_am_tar --format=m4_if([$1], [pax], [posix], [$1]) -chf - "'"$$tardir"'
1098 |       am__tar_="$_am_tar --format=m4_if([$1], [pax], [posix], [$1]) -chf - "'"$tardir"'
1099 |       am__untar="$_am_tar -xf -"
1100 |       ;;
1101 |     plaintar)
1102 |       # Must skip GNU tar: if it does not support --format= it doesn't create
1103 |       # ustar tarball either.
1104 |       (tar --version) >/dev/null 2>&1 && continue
1105 |       am__tar='tar chf - "$$tardir"'
1106 |       am__tar_='tar chf - "$tardir"'
1107 |       am__untar='tar xf -'
1108 |       ;;
1109 |     pax)
1110 |       am__tar='pax -L -x $1 -w "$$tardir"'
1111 |       am__tar_='pax -L -x $1 -w "$tardir"'
1112 |       am__untar='pax -r'
1113 |       ;;
1114 |     cpio)
1115 |       am__tar='find "$$tardir" -print | cpio -o -H $1 -L'
1116 |       am__tar_='find "$tardir" -print | cpio -o -H $1 -L'
1117 |       am__untar='cpio -i -H $1 -d'
1118 |       ;;
1119 |     none)
1120 |       am__tar=false
1121 |       am__tar_=false
1122 |       am__untar=false
1123 |       ;;
1124 |     esac
1125 | 
1126 |     # If the value was cached, stop now.  We just wanted to have am__tar
1127 |     # and am__untar set.
1128 |     test -n "${am_cv_prog_tar_$1}" && break
1129 | 
1130 |     # tar/untar a dummy directory, and stop if the command works.
1131 |     rm -rf conftest.dir
1132 |     mkdir conftest.dir
1133 |     echo GrepMe > conftest.dir/file
1134 |     AM_RUN_LOG([tardir=conftest.dir && eval $am__tar_ >conftest.tar])
1135 |     rm -rf conftest.dir
1136 |     if test -s conftest.tar; then
1137 |       AM_RUN_LOG([$am__untar <conftest.tar])
1138 |       AM_RUN_LOG([cat conftest.dir/file])
1139 |       grep GrepMe conftest.dir/file >/dev/null 2>&1 && break
1140 |     fi
1141 |   done
1142 |   rm -rf conftest.dir
1143 | 
1144 |   AC_CACHE_VAL([am_cv_prog_tar_$1], [am_cv_prog_tar_$1=$_am_tool])
1145 |   AC_MSG_RESULT([$am_cv_prog_tar_$1])])
1146 | 
1147 | AC_SUBST([am__tar])
1148 | AC_SUBST([am__untar])
1149 | ]) # _AM_PROG_TAR
1150 | 
1151 | m4_include([m4/pkg.m4])
1152 | 


--------------------------------------------------------------------------------
/daemons/vertexbroker/autogen.sh:
--------------------------------------------------------------------------------
1 | #!/bin/sh
2 | 
3 | aclocal --install -I m4 &&
4 |   autoconf &&
5 |   automake --add-missing --copy &&
6 |   ./configure "$@"
7 | 
8 | 


--------------------------------------------------------------------------------
/daemons/vertexbroker/bin/vertexbroker-0.1.1.tar.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gtoonstra/remap/cfd9a413858c54e7f1049c546d39bd0edccded0c/daemons/vertexbroker/bin/vertexbroker-0.1.1.tar.gz


--------------------------------------------------------------------------------
/daemons/vertexbroker/configure.ac:
--------------------------------------------------------------------------------
 1 | AC_INIT([VertexBroker], 0.1.1)
 2 | AM_INIT_AUTOMAKE
 3 | AC_PROG_CC
 4 | 
 5 | PKG_CHECK_MODULES([NANOMSG], [libnanomsg >= 0.4])
 6 | 
 7 | AC_CONFIG_FILES([Makefile src/Makefile])
 8 | AC_OUTPUT
 9 | 
10 | 


--------------------------------------------------------------------------------
/daemons/vertexbroker/src/.gitignore:
--------------------------------------------------------------------------------
1 | .deps/
2 | *.o
3 | vertexbroker
4 | 
5 | 


--------------------------------------------------------------------------------
/daemons/vertexbroker/src/Makefile.am:
--------------------------------------------------------------------------------
1 | bin_PROGRAMS = vertexbroker
2 | 
3 | vertexbroker_CFLAGS = $(NANOMSG_CFLAGS)
4 | vertexbroker_LDADD = $(NANOMSG_LIBS)
5 | 
6 | vertexbroker_SOURCES = main.c
7 | 
8 | 


--------------------------------------------------------------------------------
/daemons/vertexbroker/src/control.c:
--------------------------------------------------------------------------------
 1 | #include "control.h"
 2 | #include "globals.h"
 3 | 
 4 | extern int mode;
 5 | extern const char *vertex_control_channel;
 6 | 
 7 | int process_control_message( void *buf, int len )
 8 | {
 9 | 
10 | }
11 | 
12 | 


--------------------------------------------------------------------------------
/daemons/vertexbroker/src/control.h:
--------------------------------------------------------------------------------
1 | #ifndef CONTROL_H
2 | #define CONTROL_H
3 | 
4 | int process_control_message( void *buf, int len );
5 | 
6 | #endif
7 | 
8 | 


--------------------------------------------------------------------------------
/daemons/vertexbroker/src/globals.h:
--------------------------------------------------------------------------------
 1 | #ifndef GLOBALS_H
 2 | #define GLOBALS_H
 3 | 
 4 | #define MODE_RECEIVING  0
 5 | #define MODE_SENDING    1
 6 | #define MODE_RELAY      2
 7 | 
 8 | #endif
 9 | 
10 | 


--------------------------------------------------------------------------------
/daemons/vertexbroker/src/main.c:
--------------------------------------------------------------------------------
  1 | #include <stdio.h>
  2 | #include <nanomsg/nn.h>
  3 | #include <nanomsg/pubsub.h>
  4 | #include <assert.h>
  5 | #include <errno.h>
  6 | #include <string.h>
  7 | 
  8 | #include "control.h"
  9 | #include "globals.h"
 10 | 
 11 | #define NN_IN 1
 12 | #define NN_OUT 2
 13 | 
 14 | const char *vertex_control_channel = "+vertex_control";
 15 | 
 16 | int mode = MODE_RECEIVING;
 17 | 
 18 | int main (int argc, char **argv)
 19 | {
 20 |     struct nn_pollfd pfd[2];
 21 |     int rc;
 22 |     void *buf = NULL;
 23 | 
 24 |     printf("Starting vertex broker\n" );
 25 | 
 26 |     int pub = nn_socket (AF_SP, NN_PUB);
 27 |     assert (pub >= 0);
 28 |     assert (nn_bind (pub, "tcp://0.0.0.0:8690") >= 0);
 29 | 
 30 |     int sub = nn_socket (AF_SP, NN_SUB);
 31 |     assert (sub >= 0);
 32 | 
 33 |     assert (nn_setsockopt (sub, NN_SUB, NN_SUB_SUBSCRIBE, "", 0) >= 0);
 34 |     assert (nn_bind (sub, "tcp://0.0.0.0:8689") >= 0);
 35 | 
 36 |     int controlsub = nn_socket (AF_SP, NN_SUB);
 37 |     assert (controlsub >= 0);
 38 |     assert (nn_setsockopt (controlsub, NN_SUB, NN_SUB_SUBSCRIBE, vertex_control_channel, strlen(vertex_control_channel)) >= 0);
 39 |     assert (nn_connect (controlsub, "tcp://localhost:8687") >= 0);
 40 | 
 41 |     printf("Ready for polling\n" );
 42 | 
 43 |     /*  Initialise the pollset. */
 44 |     pfd[0].fd = pub;
 45 |     // only set this poll event if we actually have something to send to prevent infinite loop
 46 |     pfd[0].events = 0;//NN_POLLOUT;
 47 |     pfd[1].fd = sub;
 48 |     pfd[1].events = NN_POLLIN;
 49 |     //pfd[2].fd = controlsub;
 50 |     //pfd[2].events = NN_POLLIN;
 51 | 
 52 |     while (1) {
 53 |         rc = nn_poll (pfd, 2, 2000);
 54 |         if (rc == 0) {
 55 |             // timeout. Check if we need to continue
 56 |             continue;
 57 |         }
 58 |         if (rc == -1) {
 59 |             // error. Probably break out and shut down
 60 |             fprintf( stderr, "nn_poll() error: %s\n", strerror(errno));
 61 |             break;
 62 |         }
 63 |         if (pfd [0].revents & NN_POLLOUT) {
 64 |             // vertex PUB is ready to send another message
 65 |             printf("PUB message ready\n" );
 66 |         }
 67 |         if (pfd [1].revents & NN_POLLIN) {
 68 |             // vertex SUB is receiving a message
 69 |             rc = nn_recv (sub, &buf, NN_MSG, NN_DONTWAIT);
 70 |             if ( rc < 0 ) {
 71 |                 if ( rc == EAGAIN ) {
 72 |                     printf( "Poll indicated readiness to read, but returned EAGAIN\n" );
 73 |                 } 
 74 |                 fprintf( stderr, "nn_poll() error: %s\n", strerror(errno));
 75 |                 break;
 76 |             } else {
 77 |                 // do something with buf and rc (len)
 78 |                 char tst[100] = {"\0"};
 79 |                 snprintf( tst, 100, "%s", buf );
 80 |                 printf( "%s\n", tst );
 81 | 
 82 |                 // sheer violation of POLLOUT, but who cares..?
 83 |                 nn_send(pub, &buf, NN_MSG, NN_DONTWAIT );
 84 | 
 85 |                 // no need to deallocate. We just did zero copy.
 86 |                 // nn_freemsg (buf);
 87 |             }
 88 |         }
 89 |         /*
 90 |         if (pfd [2].revents & NN_POLLIN) {
 91 |             // control SUB is receiving a message
 92 |             printf("control SUB message ready\n" );
 93 |             rc = nn_recv (controlsub, buf, NN_MSG, NN_DONTWAIT);
 94 |             if ( rc < 0 ) {
 95 |                 if ( rc == EAGAIN ) {
 96 |                     printf( "Poll indicated readiness to read, but returned EAGAIN\n" );
 97 |                 } 
 98 |                 fprintf( stderr, "nn_poll() error: %s\n", strerror(errno));
 99 |                 break;
100 |             } else {
101 |                 // do something with buf and rc (len)
102 |                 nn_freemsg (buf);
103 |             }
104 |         }
105 |         */
106 |     }
107 | 
108 |     nn_shutdown (controlsub, 0);
109 |     nn_shutdown (sub, 0);
110 |     nn_shutdown (pub, 0);
111 | 
112 |     return 0;
113 | }
114 | 
115 | 


--------------------------------------------------------------------------------
/dev/basic_elements.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import json
  3 | import errno
  4 | 
  5 | # A class for reading in raw data to be processed.
  6 | # Used as input to the mapper
  7 | class TextFileReader(object):
  8 |     def __init__( self, filename ):
  9 |         # super(object, self).__init__()
 10 |         self.f = open(filename, 'r')
 11 |         self.filename = filename
 12 | 
 13 |     def read( self ):
 14 |         for line in self.f:
 15 |             yield self.filename, line
 16 | 
 17 |     def close( self ):
 18 |         self.f.close()
 19 | 
 20 | # A class that maintains intermediate data. The data is kept in memory,
 21 | # so that chunks can be written out to file in a controlled manner,
 22 | # such that at least each partition subfile is fully sorted.
 23 | class Partitioner( object ):
 24 |     def __init__( self, filename ):
 25 |         # super(object, self).__init__()
 26 |         try:
 27 |             os.makedirs( os.path.dirname( filename ) )
 28 |         except OSError as exc: # Python >2.5
 29 |             if exc.errno == errno.EEXIST:
 30 |                 pass
 31 |             else: raise
 32 |         self.f = open(filename, 'w')
 33 |         self.mem = {}
 34 |         self.total_keys = 0
 35 |         self.total_values = 0
 36 | 
 37 |     # Statistics handling here allow future splitting up of further data
 38 |     # if this partition overfloweth.
 39 |     def store( self, k2, v2  ):
 40 |         if k2 not in self.mem:
 41 |             self.mem[ k2 ] = []
 42 |             self.total_keys = self.total_keys + 1
 43 | 
 44 |         self.mem[ k2 ].append( v2 )
 45 |         self.total_values = self.total_values + 1
 46 | 
 47 |     def sort_flush_close( self ):
 48 |         for k in sorted(self.mem):
 49 |             l = self.mem[k]
 50 |             out = json.dumps( l )
 51 |             self.f.write( "%s,%s\n"%( k,out ) )
 52 |         self.f.close()
 53 | 
 54 | # The part file reader reads back in one single partition file.
 55 | class PartFileReader(object):
 56 |     def __init__( self, filename ):
 57 |         # super(object, self).__init__()
 58 |         self.f = open(filename, 'r')
 59 | 
 60 |     def read( self ):
 61 |         for line in self.f:
 62 |             key, data = line.split(',', 1)
 63 |             l = json.loads( data )
 64 |             yield (key, l)
 65 | 
 66 | # The reduce writer dumps the final results to some file for one single
 67 | # reducer instance. 
 68 | class ReduceWriter( object ):
 69 |     def __init__( self, filename ):
 70 |         # super(object, self).__init__()
 71 |         self.f = open(filename, 'w')
 72 | 
 73 |     def store( self, k3, v3  ):
 74 |         self.f.write( "%s,%d\n"%( k3, v3 ) )
 75 | 
 76 |     def close( self ):
 77 |         self.f.close()
 78 | 
 79 | # ---- map and reduce implementations ----
 80 | def map( key, value ):
 81 |     remove = ".,?:;!\""
 82 |     trans = str.maketrans(remove, ' ' * len(remove))
 83 | 
 84 |     words = value.translate( trans ).split()
 85 |     for word in words:
 86 |         # remove comma's, they create issues for our file format
 87 |         word = word.lower()
 88 |         if word[0] in 'abcde':
 89 |             yield 'a2e', word, 1
 90 |         elif word[0] in 'fghijklmn':
 91 |             yield 'f2n', word, 1
 92 |         elif word[0] in 'opqrs':
 93 |             yield 'o2s', word, 1
 94 |         elif word[0] in 'tuvwxyz':
 95 |             yield 't2z', word, 1
 96 |         else:
 97 |             yield '_default', word, 1
 98 | 
 99 | def reduce( key, list_of_values ):
100 |     yield (key, sum(list_of_values))
101 | 
102 | 
103 | # --- main program start ---
104 | if __name__ == '__main__':
105 | 
106 |     # ---- what a mapper does ----
107 | 
108 |     # read in data
109 |     # A 'real life' example would have many input files and thus multiple instances
110 |     # of this 'mapper' object, distributed across nodes.
111 |     fr = TextFileReader( "../testdata/tomsawyer.txt" )
112 | 
113 |     # set up partitions for the mapper output data.
114 |     # every mapper instance does this.
115 |     partitions = {}
116 |     partitions[ "a2e" ] = Partitioner( "../im/a2e/part-r-0000.txt" )
117 |     partitions[ "f2n" ] = Partitioner( "../im/f2n/part-r-0000.txt" )
118 |     partitions[ "o2s" ] = Partitioner( "../im/o2s/part-r-0000.txt" )
119 |     partitions[ "t2z" ] = Partitioner( "../im/t2z/part-r-0000.txt" )
120 |     partitions[ "_default" ] = Partitioner( "../im/default/part-r-0000.txt" )
121 | 
122 |     # Map it.
123 |     for k1, v1 in fr.read():
124 |         for part, k2, v2 in map( k1, v1 ):
125 |             partitions[ part ].store( k2, v2 )
126 | 
127 |     fr.close()
128 |     fr = None
129 | 
130 |     # Sort partition before output and flush sorted output to file.
131 |     for part in partitions:
132 |         partitions[part].sort_flush_close()
133 | 
134 |     # ---- what a reducer does ----
135 | 
136 |     # Grab the partitions for this reducer instance, made by some anonymous
137 |     # collection of mappers.
138 |     # In real life, you'd have one reducer per partition key. So here we simply reuse
139 |     # the reducer in a serial fashion. Starting with partition #1, run reducer, partition #2,
140 |     # run reducer, etc...
141 |     #
142 |     # The partition is not necessarily a single file, it can be many if the data overflows
143 |     # the max file size (or memory).
144 |     partitions = {}
145 |     partitions[ "a2e" ] = PartFileReader( "../im/a2e/part-r-0000.txt" )
146 |     partitions[ "f2n" ] = PartFileReader( "../im/f2n/part-r-0000.txt" )
147 |     partitions[ "o2s" ] = PartFileReader( "../im/o2s/part-r-0000.txt" )
148 |     partitions[ "t2z" ] = PartFileReader( "../im/t2z/part-r-0000.txt" )
149 |     partitions[ "_default" ] = PartFileReader( "../im/default/part-r-0000.txt" )
150 | 
151 |     # A way to output the results of the reduce operation.
152 |     rw = ReduceWriter( "../im/result-r-0000.txt" )
153 | 
154 |     # reduce data and produce results.
155 |     # In real life, the reducewriter would be recreated for each partition, which is another way
156 |     # of saying that reducers are actually isolated from each other and have their own output files.
157 |     for part in sorted(partitions):
158 |         for key,list_of_values in partitions[part].read():
159 |             for k3,v3 in reduce( key, list_of_values ):
160 |                 rw.store( k3, v3 )
161 | 
162 |     rw.close()
163 | 
164 | 


--------------------------------------------------------------------------------
/examples/collation/appconfig.json:
--------------------------------------------------------------------------------
1 | {
2 |     "module":"collation"
3 | }
4 | 


--------------------------------------------------------------------------------
/examples/collation/collation.py:
--------------------------------------------------------------------------------
 1 | import remap
 2 | 
 3 | # --- create file i/o objects to be used ----
 4 | def create_mapper_reader( filename ):
 5 |     return remap.HTMLFileReader( filename )
 6 | 
 7 | def create_mapper_partitioner( outputdir, partition, mapperid ):
 8 |     return remap.TextPartitioner( outputdir, partition, mapperid, combiner=list_combiner )
 9 | 
10 | def create_reducer_reader( inputdir ):
11 |     return remap.TextPartFileReader( inputdir )
12 | 
13 | def create_reducer_writer( outputdir, partition ):
14 |     return remap.TextReduceWriter( outputdir, partition )
15 | 
16 | # ---- map and reduce implementations ----
17 | 
18 | def list_combiner( l ):
19 |     return list(set(l))
20 | 
21 | def map( key, value ):
22 |     remove = ".,?:;!\""
23 |     trans = str.maketrans(remove, ' ' * len(remove))
24 | 
25 |     words = value.translate( trans ).split()
26 |     for word in words:
27 |         # remove comma's, they create issues for our file format
28 |         word = word.lower()
29 |         if word[0] in 'abcde':
30 |             yield 'a2e', word, key
31 |         elif word[0] in 'fghijklmn':
32 |             yield 'f2n', word, key
33 |         elif word[0] in 'opqrs':
34 |             yield 'o2s', word, key
35 |         elif word[0] in 'tuvwxyz':
36 |             yield 't2z', word, key
37 |         else:
38 |             yield '_default', word, key
39 | 
40 | # The reduce operation sums all the values in the sequence and outputs.
41 | def reduce( key, list_of_values ):
42 |     yield (key, list_combiner(list_of_values))
43 | 
44 | 


--------------------------------------------------------------------------------
/examples/highest/appconfig.json:
--------------------------------------------------------------------------------
1 | {
2 |     "module":"highest"
3 | }
4 | 
5 | 


--------------------------------------------------------------------------------
/examples/highest/highest.py:
--------------------------------------------------------------------------------
 1 | import remap
 2 | 
 3 | # --- create file i/o objects to be used ----
 4 | def create_vertex_reader( filename ):
 5 |     return remap.TextFileReader( filename, yieldkv=False )
 6 | 
 7 | def create_vertex_partitioner( outputdir, partition, mapperid ):
 8 |     return remap.TextPartitioner( outputdir, partition, mapperid )
 9 | 
10 | # ---- pagerank vertex implementation ----
11 | def prepare( line ):
12 |     line = line.strip()
13 |     if len(line) == 0:
14 |         return None, None
15 | 
16 |     elems = line.split()
17 |     out = []
18 | 
19 |     for i in range(2,len(elems)):
20 |         if len(elems[i]) > 0:
21 |             out.append( elems[ i ] )
22 | 
23 |     vertex = ( int(elems[1]), out )
24 |     return elems[0], vertex
25 | 
26 | def compute( forward, sub, unsub, superstep, vertex, messages ):
27 |     (val, out) = vertex
28 | 
29 |     halt = True
30 |     for data in messages:
31 |         if int(data) > val:
32 |             val = int(data)
33 |             halt = False
34 | 
35 |     if superstep == 0:
36 |         halt = False
37 | 
38 |     vertex = (val,out)
39 | 
40 |     for vertex_id in out:
41 |         forward( vertex_id, "%d"%( val ))
42 | 
43 |     return vertex, halt
44 | 
45 | 
46 | 


--------------------------------------------------------------------------------
/examples/pagerank/appconfig.json:
--------------------------------------------------------------------------------
1 | {
2 |     "module":"pagerank"
3 | }
4 | 
5 | 


--------------------------------------------------------------------------------
/examples/pagerank/pagerank.py:
--------------------------------------------------------------------------------
 1 | import remap
 2 | 
 3 | # --- create file i/o objects to be used ----
 4 | def create_vertex_reader( filename ):
 5 |     return remap.TextFileReader( filename, yieldkv=False )
 6 | 
 7 | def create_vertex_partitioner( outputdir, partition, mapperid ):
 8 |     return remap.TextPartitioner( outputdir, partition, mapperid )
 9 | 
10 | NUM_VERTICES = 10
11 | 
12 | # ---- pagerank vertex implementation ----
13 | def prepare( line ):
14 |     line = line.strip()
15 |     if len(line) == 0:
16 |         return None, None
17 | 
18 |     elems = line.split()
19 | 
20 |     out = []
21 |     for i in range(1,len(elems)):
22 |         if len(elems[i]) > 0:
23 |             out.append( elems[ i ] )
24 | 
25 |     vertex = ( 1.0 / NUM_VERTICES, out )
26 |     return elems[0], vertex
27 | 
28 | def compute( send_fn, superstep, vertex, messages ):
29 |     (val, out) = vertex
30 |     if (superstep >= 1):
31 |         sum = 0
32 |         
33 |         for data in messages:
34 |             sum = sum + float(data)
35 | 
36 |         val = 0.15 / NUM_VERTICES + 0.85 * sum
37 |         vertex = ( val, out )
38 | 
39 |     if superstep < 30:
40 |         for vertex_id in out:
41 |             send_fn( vertex_id, "%f"%( val / len(out) ))
42 |     else:
43 |         return vertex, True
44 | 
45 |     return vertex, False
46 | 
47 | 


--------------------------------------------------------------------------------
/examples/secondarysort/appconfig.json:
--------------------------------------------------------------------------------
1 | {
2 |     "module":"secondarysort"
3 | }
4 | 
5 | 


--------------------------------------------------------------------------------
/examples/secondarysort/secondarysort.py:
--------------------------------------------------------------------------------
 1 | import remap
 2 | from operator import itemgetter
 3 | 
 4 | # --- create file i/o objects to be used ----
 5 | def create_mapper_reader( filename ):
 6 |     return remap.TextFileReader( filename )
 7 | 
 8 | def create_mapper_partitioner( outputdir, partition, mapperid ):
 9 |     return remap.TextPartitioner( outputdir, partition, mapperid, combiner=None, customkey=itemgetter(3) )
10 | 
11 | ...etc...
12 | 
13 | # ---- map and reduce implementations ----
14 | def map( key, value ):
15 |     words = value.split(',')
16 |     part = words[2].replace(" ", "_")
17 |     yield part, tuple(words), ""
18 | 
19 | def reduce( key, value ):
20 |     yield (key, value)
21 | 
22 | 


--------------------------------------------------------------------------------
/examples/wordcount/appconfig.json:
--------------------------------------------------------------------------------
1 | {
2 |     "module":"wordcount"
3 | }
4 | 
5 | 
6 | 


--------------------------------------------------------------------------------
/examples/wordcount/wordcount.py:
--------------------------------------------------------------------------------
 1 | import remap
 2 | 
 3 | # --- create file i/o objects to be used ----
 4 | def create_mapper_reader( filename ):
 5 |     return remap.TextFileReader( filename )
 6 | 
 7 | def create_mapper_partitioner( outputdir, partition, mapperid ):
 8 |     return remap.TextPartitioner( outputdir, partition, mapperid )
 9 | 
10 | def create_reducer_reader( inputdir ):
11 |     return remap.TextPartFileReader( inputdir )
12 | 
13 | def create_reducer_writer( outputdir, partition ):
14 |     return remap.TextReduceWriter( outputdir, partition )
15 | 
16 | # ---- map and reduce implementations ----
17 | 
18 | # map just creates one record of the word and a '1' to count it,
19 | # it also directs the mapped value to a specific partition
20 | def map( key, value ):
21 |     remove = ".,?:;!\""
22 |     trans = str.maketrans(remove, ' ' * len(remove))
23 | 
24 |     words = value.translate( trans ).split()
25 |     for word in words:
26 |         # remove comma's, they create issues for our file format
27 |         word = word.lower()
28 |         if word[0] in 'abcde':
29 |             yield 'a2e', word, 1
30 |         elif word[0] in 'fghijklmn':
31 |             yield 'f2n', word, 1
32 |         elif word[0] in 'opqrs':
33 |             yield 'o2s', word, 1
34 |         elif word[0] in 'tuvwxyz':
35 |             yield 't2z', word, 1
36 |         else:
37 |             yield '_default', word, 1
38 | 
39 | # The reduce operation sums all the values in the sequence and outputs.
40 | def reduce( key, list_of_values ):
41 |     yield (key, str(sum(list_of_values)))
42 | 
43 | 


--------------------------------------------------------------------------------
/images/flow.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gtoonstra/remap/cfd9a413858c54e7f1049c546d39bd0edccded0c/images/flow.png


--------------------------------------------------------------------------------
/images/flow.svg:
--------------------------------------------------------------------------------
  1 | <?xml version="1.0" encoding="UTF-8" standalone="no"?>
  2 | <!-- Created with Inkscape (http://www.inkscape.org/) -->
  3 | 
  4 | <svg
  5 |    xmlns:dc="http://purl.org/dc/elements/1.1/"
  6 |    xmlns:cc="http://creativecommons.org/ns#"
  7 |    xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
  8 |    xmlns:svg="http://www.w3.org/2000/svg"
  9 |    xmlns="http://www.w3.org/2000/svg"
 10 |    xmlns:sodipodi="http://sodipodi.sourceforge.net/DTD/sodipodi-0.dtd"
 11 |    xmlns:inkscape="http://www.inkscape.org/namespaces/inkscape"
 12 |    width="744.09448819"
 13 |    height="1052.3622047"
 14 |    id="svg2"
 15 |    version="1.1"
 16 |    inkscape:version="0.48.5 r10040"
 17 |    sodipodi:docname="flow.svg">
 18 |   <defs
 19 |      id="defs4">
 20 |     <marker
 21 |        inkscape:stockid="Arrow2Lend"
 22 |        orient="auto"
 23 |        refY="0.0"
 24 |        refX="0.0"
 25 |        id="Arrow2Lend"
 26 |        style="overflow:visible;">
 27 |       <path
 28 |          id="path3875"
 29 |          style="fill-rule:evenodd;stroke-width:0.62500000;stroke-linejoin:round;"
 30 |          d="M 8.7185878,4.0337352 L -2.2072895,0.016013256 L 8.7185884,-4.0017078 C 6.9730900,-1.6296469 6.9831476,1.6157441 8.7185878,4.0337352 z "
 31 |          transform="scale(1.1) rotate(180) translate(1,0)" />
 32 |     </marker>
 33 |     <marker
 34 |        inkscape:stockid="Arrow1Lend"
 35 |        orient="auto"
 36 |        refY="0.0"
 37 |        refX="0.0"
 38 |        id="Arrow1Lend"
 39 |        style="overflow:visible;">
 40 |       <path
 41 |          id="path3857"
 42 |          d="M 0.0,0.0 L 5.0,-5.0 L -12.5,0.0 L 5.0,5.0 L 0.0,0.0 z "
 43 |          style="fill-rule:evenodd;stroke:#000000;stroke-width:1.0pt;"
 44 |          transform="scale(0.8) rotate(180) translate(12.5,0)" />
 45 |     </marker>
 46 |     <marker
 47 |        inkscape:stockid="Arrow1Lend"
 48 |        orient="auto"
 49 |        refY="0"
 50 |        refX="0"
 51 |        id="Arrow1Lend-8"
 52 |        style="overflow:visible">
 53 |       <path
 54 |          inkscape:connector-curvature="0"
 55 |          id="path3857-4"
 56 |          d="M 0,0 5,-5 -12.5,0 5,5 0,0 z"
 57 |          style="fill-rule:evenodd;stroke:#000000;stroke-width:1pt"
 58 |          transform="matrix(-0.8,0,0,-0.8,-10,0)" />
 59 |     </marker>
 60 |     <marker
 61 |        inkscape:stockid="Arrow1Lend"
 62 |        orient="auto"
 63 |        refY="0"
 64 |        refX="0"
 65 |        id="Arrow1Lend-2"
 66 |        style="overflow:visible">
 67 |       <path
 68 |          inkscape:connector-curvature="0"
 69 |          id="path3857-9"
 70 |          d="M 0,0 5,-5 -12.5,0 5,5 0,0 z"
 71 |          style="fill-rule:evenodd;stroke:#000000;stroke-width:1pt"
 72 |          transform="matrix(-0.8,0,0,-0.8,-10,0)" />
 73 |     </marker>
 74 |     <marker
 75 |        inkscape:stockid="Arrow2Lend"
 76 |        orient="auto"
 77 |        refY="0"
 78 |        refX="0"
 79 |        id="Arrow2Lend-7"
 80 |        style="overflow:visible">
 81 |       <path
 82 |          inkscape:connector-curvature="0"
 83 |          id="path3875-9"
 84 |          style="fill-rule:evenodd;stroke-width:0.625;stroke-linejoin:round"
 85 |          d="M 8.7185878,4.0337352 -2.2072895,0.01601326 8.7185884,-4.0017078 c -1.7454984,2.3720609 -1.7354408,5.6174519 -6e-7,8.035443 z"
 86 |          transform="matrix(-1.1,0,0,-1.1,-1.1,0)" />
 87 |     </marker>
 88 |     <marker
 89 |        inkscape:stockid="Arrow1Lend"
 90 |        orient="auto"
 91 |        refY="0"
 92 |        refX="0"
 93 |        id="Arrow1Lend-4"
 94 |        style="overflow:visible">
 95 |       <path
 96 |          inkscape:connector-curvature="0"
 97 |          id="path3857-6"
 98 |          d="M 0,0 5,-5 -12.5,0 5,5 0,0 z"
 99 |          style="fill-rule:evenodd;stroke:#000000;stroke-width:1pt"
100 |          transform="matrix(-0.8,0,0,-0.8,-10,0)" />
101 |     </marker>
102 |   </defs>
103 |   <sodipodi:namedview
104 |      id="base"
105 |      pagecolor="#ffffff"
106 |      bordercolor="#666666"
107 |      borderopacity="1.0"
108 |      inkscape:pageopacity="0.0"
109 |      inkscape:pageshadow="2"
110 |      inkscape:zoom="1.4"
111 |      inkscape:cx="384.9103"
112 |      inkscape:cy="814.004"
113 |      inkscape:document-units="px"
114 |      inkscape:current-layer="layer1"
115 |      showgrid="false"
116 |      inkscape:window-width="1920"
117 |      inkscape:window-height="1176"
118 |      inkscape:window-x="0"
119 |      inkscape:window-y="24"
120 |      inkscape:window-maximized="1" />
121 |   <metadata
122 |      id="metadata7">
123 |     <rdf:RDF>
124 |       <cc:Work
125 |          rdf:about="">
126 |         <dc:format>image/svg+xml</dc:format>
127 |         <dc:type
128 |            rdf:resource="http://purl.org/dc/dcmitype/StillImage" />
129 |         <dc:title></dc:title>
130 |       </cc:Work>
131 |     </rdf:RDF>
132 |   </metadata>
133 |   <g
134 |      inkscape:label="Layer 1"
135 |      inkscape:groupmode="layer"
136 |      id="layer1">
137 |     <g
138 |        id="g4568"
139 |        transform="translate(-21.428571,0)"
140 |        inkscape:export-filename="/work/remap/images/flow.png"
141 |        inkscape:export-xdpi="72"
142 |        inkscape:export-ydpi="72">
143 |       <rect
144 |          y="35.933609"
145 |          x="40.714287"
146 |          height="59.285713"
147 |          width="48.57143"
148 |          id="rect2985"
149 |          style="fill:none;stroke:#000000;stroke-width:2;stroke-linecap:round;stroke-linejoin:miter;stroke-miterlimit:4;stroke-opacity:1;stroke-dasharray:none;stroke-dashoffset:0" />
150 |       <rect
151 |          y="46.647896"
152 |          x="48.57143"
153 |          height="62.857143"
154 |          width="50.714287"
155 |          id="rect2987"
156 |          style="fill:none;stroke:#000000;stroke-width:2;stroke-linecap:round;stroke-linejoin:miter;stroke-miterlimit:4;stroke-opacity:1;stroke-dasharray:none;stroke-dashoffset:0" />
157 |       <rect
158 |          y="59.505039"
159 |          x="55.714287"
160 |          height="61.42857"
161 |          width="52.142857"
162 |          id="rect2992"
163 |          style="fill:none;stroke:#000000;stroke-width:2;stroke-linecap:round;stroke-linejoin:miter;stroke-miterlimit:4;stroke-opacity:1;stroke-dasharray:none;stroke-dashoffset:0" />
164 |     </g>
165 |     <text
166 |        xml:space="preserve"
167 |        style="font-size:16px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;line-height:125%;letter-spacing:0px;word-spacing:0px;fill:#000000;fill-opacity:1;stroke:none;font-family:Arial;-inkscape-font-specification:Arial"
168 |        x="7.1428561"
169 |        y="168.93361"
170 |        id="text2994"
171 |        sodipodi:linespacing="125%"
172 |        inkscape:export-filename="/work/remap/images/flow.png"
173 |        inkscape:export-xdpi="72"
174 |        inkscape:export-ydpi="72"><tspan
175 |          sodipodi:role="line"
176 |          id="tspan2996"
177 |          x="7.1428561"
178 |          y="168.93361">input file reader</tspan></text>
179 |     <g
180 |        id="g5057"
181 |        transform="translate(0,0.6428578)"
182 |        inkscape:export-filename="/work/remap/images/flow.png"
183 |        inkscape:export-xdpi="72"
184 |        inkscape:export-ydpi="72">
185 |       <g
186 |          id="g3836">
187 |         <rect
188 |            style="fill:#8f8fff;fill-opacity:1;stroke:#000000;stroke-width:2;stroke-linecap:round;stroke-linejoin:miter;stroke-miterlimit:4;stroke-opacity:1;stroke-dasharray:none;stroke-dashoffset:0"
189 |            id="rect3000"
190 |            width="87.85714"
191 |            height="17.142857"
192 |            x="167.14285"
193 |            y="43.647896" />
194 |         <text
195 |            xml:space="preserve"
196 |            style="font-size:16px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;line-height:125%;letter-spacing:0px;word-spacing:0px;fill:#000000;fill-opacity:1;stroke:none;font-family:Arial;-inkscape-font-specification:Arial"
197 |            x="169.28571"
198 |            y="40.790752"
199 |            id="text3020"
200 |            sodipodi:linespacing="125%"><tspan
201 |              sodipodi:role="line"
202 |              id="tspan3022"
203 |              x="169.28571"
204 |              y="40.790752">k1          v1</tspan></text>
205 |         <path
206 |            style="fill:none;stroke:#000000;stroke-width:1;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-opacity:1;stroke-dasharray:4, 1;stroke-dashoffset:0"
207 |            d="m 208.57143,39.362182 0,27.142857"
208 |            id="path3045"
209 |            inkscape:connector-curvature="0" />
210 |       </g>
211 |       <g
212 |          transform="translate(0,12)"
213 |          id="g3842">
214 |         <rect
215 |            style="fill:#ff9d9d;fill-opacity:1;stroke:#000000;stroke-width:2;stroke-linecap:round;stroke-linejoin:miter;stroke-miterlimit:4;stroke-opacity:1;stroke-dasharray:none;stroke-dashoffset:0"
216 |            id="rect3000-7"
217 |            width="87.85714"
218 |            height="17.142857"
219 |            x="167.14285"
220 |            y="91.647903" />
221 |         <text
222 |            xml:space="preserve"
223 |            style="font-size:16px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;line-height:125%;letter-spacing:0px;word-spacing:0px;fill:#000000;fill-opacity:1;stroke:none;font-family:Arial;-inkscape-font-specification:Arial"
224 |            x="168.54575"
225 |            y="87.833725"
226 |            id="text3020-3"
227 |            sodipodi:linespacing="125%"><tspan
228 |              sodipodi:role="line"
229 |              id="tspan3022-3"
230 |              x="168.54575"
231 |              y="87.833725">k2          v2</tspan></text>
232 |         <path
233 |            style="fill:none;stroke:#000000;stroke-width:1;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-opacity:1;stroke-dasharray:4, 1;stroke-dashoffset:0"
234 |            d="m 208.57143,84.505039 0,29.285711"
235 |            id="path3047"
236 |            inkscape:connector-curvature="0" />
237 |       </g>
238 |       <path
239 |          inkscape:connector-curvature="0"
240 |          id="path3848"
241 |          d="m 208.57143,73.076467 0,18.571429"
242 |          style="fill:none;stroke:#000000;stroke-width:1.10000002;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-opacity:1;stroke-dasharray:none;marker-end:url(#Arrow2Lend)" />
243 |     </g>
244 |     <text
245 |        xml:space="preserve"
246 |        style="font-size:16px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;line-height:125%;letter-spacing:0px;word-spacing:0px;fill:#000000;fill-opacity:1;stroke:none;font-family:Arial;-inkscape-font-specification:Arial"
247 |        x="182.75111"
248 |        y="167.35158"
249 |        id="text2994-8"
250 |        sodipodi:linespacing="125%"
251 |        inkscape:export-filename="/work/remap/images/flow.png"
252 |        inkscape:export-xdpi="72"
253 |        inkscape:export-ydpi="72"><tspan
254 |          sodipodi:role="line"
255 |          id="tspan2996-5"
256 |          x="182.75111"
257 |          y="167.35158">mapper</tspan></text>
258 |     <g
259 |        id="g5036"
260 |        transform="translate(0,-2.1428556)"
261 |        inkscape:export-filename="/work/remap/images/flow.png"
262 |        inkscape:export-xdpi="72"
263 |        inkscape:export-ydpi="72">
264 |       <g
265 |          id="g4573">
266 |         <rect
267 |            style="fill:none;stroke:#000000;stroke-width:2;stroke-linecap:round;stroke-linejoin:miter;stroke-miterlimit:4;stroke-opacity:1;stroke-dasharray:none;stroke-dashoffset:0"
268 |            id="rect2985-4"
269 |            width="48.57143"
270 |            height="59.285713"
271 |            x="319.28571"
272 |            y="12.00504" />
273 |         <rect
274 |            style="fill:none;stroke:#000000;stroke-width:2;stroke-linecap:round;stroke-linejoin:miter;stroke-miterlimit:4;stroke-opacity:1;stroke-dasharray:none;stroke-dashoffset:0"
275 |            id="rect2987-1"
276 |            width="50.714287"
277 |            height="62.857143"
278 |            x="327.14285"
279 |            y="22.719318" />
280 |         <rect
281 |            style="fill:none;stroke:#000000;stroke-width:2;stroke-linecap:round;stroke-linejoin:miter;stroke-miterlimit:4;stroke-opacity:1;stroke-dasharray:none;stroke-dashoffset:0"
282 |            id="rect2992-7"
283 |            width="52.142857"
284 |            height="61.42857"
285 |            x="334.28571"
286 |            y="35.576466" />
287 |       </g>
288 |       <rect
289 |          y="10.933611"
290 |          x="401.78574"
291 |          height="61.42857"
292 |          width="52.142857"
293 |          id="rect2992-7-4"
294 |          style="fill:none;stroke:#000000;stroke-width:2;stroke-linecap:round;stroke-linejoin:miter;stroke-miterlimit:4;stroke-opacity:1;stroke-dasharray:none;stroke-dashoffset:0" />
295 |       <g
296 |          id="g4578">
297 |         <rect
298 |            style="fill:none;stroke:#000000;stroke-width:2;stroke-linecap:round;stroke-linejoin:miter;stroke-miterlimit:4;stroke-opacity:1;stroke-dasharray:none;stroke-dashoffset:0"
299 |            id="rect2992-7-0"
300 |            width="52.142857"
301 |            height="61.42857"
302 |            x="401.78571"
303 |            y="81.647896" />
304 |         <rect
305 |            style="fill:none;stroke:#000000;stroke-width:2;stroke-linecap:round;stroke-linejoin:miter;stroke-miterlimit:4;stroke-opacity:1;stroke-dasharray:none;stroke-dashoffset:0"
306 |            id="rect2992-7-9"
307 |            width="52.142857"
308 |            height="61.42857"
309 |            x="408.21426"
310 |            y="88.790749" />
311 |       </g>
312 |     </g>
313 |     <path
314 |        style="fill:none;stroke:#000000;stroke-width:1.60000002;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-opacity:1;stroke-dasharray:none;marker-end:url(#Arrow1Lend)"
315 |        d="m 113.57143,78.433609 48.57142,0"
316 |        id="path4582"
317 |        inkscape:connector-curvature="0"
318 |        inkscape:export-filename="/work/remap/images/flow.png"
319 |        inkscape:export-xdpi="72"
320 |        inkscape:export-ydpi="72" />
321 |     <path
322 |        style="fill:none;stroke:#000000;stroke-width:1.60000002;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-opacity:1;stroke-dasharray:none;marker-end:url(#Arrow1Lend)"
323 |        d="m 260.71429,78.433609 48.57142,0"
324 |        id="path4582-3"
325 |        inkscape:connector-curvature="0"
326 |        inkscape:export-filename="/work/remap/images/flow.png"
327 |        inkscape:export-xdpi="72"
328 |        inkscape:export-ydpi="72" />
329 |     <path
330 |        style="fill:none;stroke:#000000;stroke-width:1.60000002;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-opacity:1;stroke-dasharray:none;marker-end:url(#Arrow1Lend)"
331 |        d="m 467.14286,78.433609 48.57142,0"
332 |        id="path4582-0"
333 |        inkscape:connector-curvature="0"
334 |        inkscape:export-filename="/work/remap/images/flow.png"
335 |        inkscape:export-xdpi="72"
336 |        inkscape:export-ydpi="72" />
337 |     <text
338 |        xml:space="preserve"
339 |        style="font-size:16px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;line-height:125%;letter-spacing:0px;word-spacing:0px;fill:#000000;fill-opacity:1;stroke:none;font-family:Angelina;-inkscape-font-specification:Angelina"
340 |        x="606.42859"
341 |        y="99.505035"
342 |        id="text4837"
343 |        sodipodi:linespacing="125%"><tspan
344 |          sodipodi:role="line"
345 |          id="tspan4839"></tspan></text>
346 |     <g
347 |        id="g5046"
348 |        transform="translate(0,7.5606846)"
349 |        inkscape:export-filename="/work/remap/images/flow.png"
350 |        inkscape:export-xdpi="72"
351 |        inkscape:export-ydpi="72">
352 |       <rect
353 |          y="37.269276"
354 |          x="528.92859"
355 |          height="17.142857"
356 |          width="87.85714"
357 |          id="rect3000-7-5"
358 |          style="fill:#ff9d9d;fill-opacity:1;stroke:#000000;stroke-width:2;stroke-linecap:round;stroke-linejoin:miter;stroke-miterlimit:4;stroke-opacity:1;stroke-dasharray:none;stroke-dashoffset:0" />
359 |       <text
360 |          sodipodi:linespacing="125%"
361 |          id="text3020-3-0"
362 |          y="29.883669"
363 |          x="520.33148"
364 |          style="font-size:16px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;line-height:125%;letter-spacing:0px;word-spacing:0px;fill:#000000;fill-opacity:1;stroke:none;font-family:Arial;-inkscape-font-specification:Arial"
365 |          xml:space="preserve"><tspan
366 |            y="29.883669"
367 |            x="520.33148"
368 |            id="tspan3022-3-7"
369 |            sodipodi:role="line">k2          v2</tspan></text>
370 |       <path
371 |          inkscape:connector-curvature="0"
372 |          id="path3047-1"
373 |          d="m 536.78572,28.69784 0,29.285711"
374 |          style="fill:none;stroke:#000000;stroke-width:1;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-opacity:1;stroke-dasharray:4, 1;stroke-dashoffset:0" />
375 |       <path
376 |          inkscape:connector-curvature="0"
377 |          id="path3848-2"
378 |          d="m 570.69491,66.192483 0,18.57143"
379 |          style="fill:none;stroke:#000000;stroke-width:1.10000002;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-opacity:1;stroke-dasharray:none;marker-end:url(#Arrow2Lend)" />
380 |       <rect
381 |          y="100.00504"
382 |          x="527.5"
383 |          height="17.142857"
384 |          width="87.85714"
385 |          id="rect3000-5"
386 |          style="fill:#8f8fff;fill-opacity:1;stroke:#000000;stroke-width:2;stroke-linecap:round;stroke-linejoin:miter;stroke-miterlimit:4;stroke-opacity:1;stroke-dasharray:none;stroke-dashoffset:0" />
387 |       <text
388 |          sodipodi:linespacing="125%"
389 |          id="text3020-7"
390 |          y="97.147896"
391 |          x="529.64288"
392 |          style="font-size:16px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;line-height:125%;letter-spacing:0px;word-spacing:0px;fill:#000000;fill-opacity:1;stroke:none;font-family:Arial;-inkscape-font-specification:Arial"
393 |          xml:space="preserve"><tspan
394 |            y="97.147896"
395 |            x="529.64288"
396 |            id="tspan3022-9"
397 |            sodipodi:role="line">k3          v3</tspan></text>
398 |       <path
399 |          inkscape:connector-curvature="0"
400 |          id="path3045-4"
401 |          d="m 568.92858,95.719325 0,27.142855"
402 |          style="fill:none;stroke:#000000;stroke-width:1;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-opacity:1;stroke-dasharray:4, 1;stroke-dashoffset:0" />
403 |     </g>
404 |     <text
405 |        xml:space="preserve"
406 |        style="font-size:16px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;line-height:125%;letter-spacing:0px;word-spacing:0px;fill:#000000;fill-opacity:1;stroke:none;font-family:Arial;-inkscape-font-specification:Arial"
407 |        x="542.21484"
408 |        y="170.33205"
409 |        id="text2994-8-3"
410 |        sodipodi:linespacing="125%"
411 |        inkscape:export-filename="/work/remap/images/flow.png"
412 |        inkscape:export-xdpi="72"
413 |        inkscape:export-ydpi="72"><tspan
414 |          sodipodi:role="line"
415 |          id="tspan2996-5-5"
416 |          x="542.21484"
417 |          y="170.33205">reducer</tspan></text>
418 |     <text
419 |        xml:space="preserve"
420 |        style="font-size:16px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;line-height:125%;letter-spacing:0px;word-spacing:0px;fill:#000000;fill-opacity:1;stroke:none;font-family:Angelina;-inkscape-font-specification:Angelina"
421 |        x="361.42856"
422 |        y="195.93361"
423 |        id="text4960"
424 |        sodipodi:linespacing="125%"><tspan
425 |          sodipodi:role="line"
426 |          id="tspan4962"
427 |          x="361.42856"
428 |          y="195.93361" /></text>
429 |     <text
430 |        xml:space="preserve"
431 |        style="font-size:16px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;line-height:125%;letter-spacing:0px;word-spacing:0px;fill:#000000;fill-opacity:1;stroke:none;font-family:Arial;-inkscape-font-specification:Arial"
432 |        x="332.21484"
433 |        y="168.93361"
434 |        id="text2994-8-7"
435 |        sodipodi:linespacing="125%"
436 |        inkscape:export-filename="/work/remap/images/flow.png"
437 |        inkscape:export-xdpi="72"
438 |        inkscape:export-ydpi="72"><tspan
439 |          sodipodi:role="line"
440 |          id="tspan2996-5-2"
441 |          x="332.21484"
442 |          y="168.93361">sets of partition files</tspan></text>
443 |     <g
444 |        transform="translate(655.71429,0)"
445 |        id="g4568-9"
446 |        inkscape:export-filename="/work/remap/images/flow.png"
447 |        inkscape:export-xdpi="72"
448 |        inkscape:export-ydpi="72">
449 |       <rect
450 |          y="35.933609"
451 |          x="40.714287"
452 |          height="59.285713"
453 |          width="48.57143"
454 |          id="rect2985-7"
455 |          style="fill:none;stroke:#000000;stroke-width:2;stroke-linecap:round;stroke-linejoin:miter;stroke-miterlimit:4;stroke-opacity:1;stroke-dasharray:none;stroke-dashoffset:0" />
456 |       <rect
457 |          y="46.647896"
458 |          x="48.57143"
459 |          height="62.857143"
460 |          width="50.714287"
461 |          id="rect2987-3"
462 |          style="fill:none;stroke:#000000;stroke-width:2;stroke-linecap:round;stroke-linejoin:miter;stroke-miterlimit:4;stroke-opacity:1;stroke-dasharray:none;stroke-dashoffset:0" />
463 |       <rect
464 |          y="59.505039"
465 |          x="55.714287"
466 |          height="61.42857"
467 |          width="52.142857"
468 |          id="rect2992-9"
469 |          style="fill:none;stroke:#000000;stroke-width:2;stroke-linecap:round;stroke-linejoin:miter;stroke-miterlimit:4;stroke-opacity:1;stroke-dasharray:none;stroke-dashoffset:0" />
470 |     </g>
471 |     <path
472 |        style="fill:none;stroke:#000000;stroke-width:1.60000002;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-opacity:1;stroke-dasharray:none;marker-end:url(#Arrow1Lend)"
473 |        d="m 632.85715,78.433609 48.57142,0"
474 |        id="path4582-0-2"
475 |        inkscape:connector-curvature="0"
476 |        inkscape:export-filename="/work/remap/images/flow.png"
477 |        inkscape:export-xdpi="72"
478 |        inkscape:export-ydpi="72" />
479 |     <text
480 |        xml:space="preserve"
481 |        style="font-size:16px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;line-height:125%;letter-spacing:0px;word-spacing:0px;fill:#000000;fill-opacity:1;stroke:none;font-family:Arial;-inkscape-font-specification:Arial"
482 |        x="697.22266"
483 |        y="170.4297"
484 |        id="text2994-8-3-3"
485 |        sodipodi:linespacing="125%"
486 |        inkscape:export-filename="/work/remap/images/flow.png"
487 |        inkscape:export-xdpi="72"
488 |        inkscape:export-ydpi="72"><tspan
489 |          sodipodi:role="line"
490 |          id="tspan2996-5-5-6"
491 |          x="697.22266"
492 |          y="170.4297">result files</tspan></text>
493 |   </g>
494 | </svg>
495 | 


--------------------------------------------------------------------------------
/prepare_env.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/python3.4
 2 | 
 3 | import os
 4 | import sys
 5 | from distutils import dir_util
 6 | import fileinput
 7 | 
 8 | root = None
 9 | 
10 | if len(sys.argv) > 1:
11 |     root = sys.argv[1]
12 | else:
13 |     print("(The root directory for remap files must be writable for the current user)")
14 |     root = input("Where to create the directory layout for remap? : ")
15 | 
16 |     confirm = input("Do you really want to install remap at %s?  (y/n)  : "%( root ))
17 |     if confirm != "y":
18 |         print("Install cancelled.")
19 |         sys.exit(-1)
20 | 
21 | # /remote/app/xxx/yyy/zzz = directory to files for a particular application
22 | 
23 | def create_dirs( newdir ):
24 |     try:
25 |         os.makedirs( newdir )
26 |     except OSError as oe:
27 |         pass
28 | 
29 | # create directory structure
30 | create_dirs( os.path.join( root, "job" ) )
31 | create_dirs( os.path.join( root, "app" ) )
32 | create_dirs( os.path.join( root, "data" ) )
33 | create_dirs( os.path.join( root, "cluster" ) )
34 | create_dirs( os.path.join( root, "test" ) )
35 | 
36 | # Get location this file is at (remap git directory)
37 | modpath = os.path.realpath(os.path.dirname(os.path.abspath(__file__)))
38 | 
39 | # Copy data from remap git to "data" directory in new location
40 | srctree = os.path.join( modpath, "testdata/" )
41 | dir_util.copy_tree( srctree, os.path.join( root, "data" ) )
42 | 
43 | # Copy examples from remap git to "app" directory in new location
44 | srctree = os.path.join( modpath, "examples/" )
45 | dir_util.copy_tree( srctree, os.path.join( root, "app" ) )
46 | 
47 | # Copy test scripts from remap git to "test" directory in new location
48 | srctree = os.path.join( modpath, "tests/examples/" )
49 | dir_util.copy_tree( srctree, os.path.join( root, "test" ) )
50 | 
51 | all_test_scripts = os.listdir( os.path.join( root, "test" ) )
52 | 
53 | for filename in all_test_scripts:
54 |     fullpath = os.path.join( root, "test", filename )
55 |     for line in fileinput.input([fullpath], inplace=True):
56 |         print(line.replace('REMAP_ROOT', root ))
57 | 
58 |     os.chmod(fullpath, 0o755)
59 | 
60 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | """A distributed execution engine.
 2 | See:
 3 | https://github.com/gtoonstra/remap
 4 | """
 5 | 
 6 | # Always prefer setuptools over distutils
 7 | from setuptools import setup, find_packages
 8 | # To use a consistent encoding
 9 | from codecs import open
10 | from os import path
11 | 
12 | here = path.abspath(path.dirname(__file__))
13 | 
14 | # Get the long description from the relevant file
15 | with open(path.join(here, 'DESCRIPTION.rst'), encoding='utf-8') as f:
16 |     long_description = f.read()
17 | 
18 | setup(
19 |     name='remap',
20 |     version='0.0.1',
21 |     description='Distributed execution engine',
22 |     long_description=long_description,
23 |     url='https://github.com/gtoonstra/remap',
24 |     author='Gerard Toonstra',
25 |     author_email='gtoonstra@gmail.com',
26 |     license='MIT',
27 |     classifiers=[
28 |         'Development Status :: 3 - Alpha',
29 |         'Intended Audience :: Developers',
30 |         'Intended Audience :: Information Technology',
31 |         'Environment :: Console',
32 |         'License :: OSI Approved :: MIT License',
33 |         'Operating System :: MacOS :: MacOS X',
34 |         'Operating System :: POSIX',
35 |         'Programming Language :: C',
36 |         'Programming Language :: Python :: 3',
37 |         'Programming Language :: Python :: 3.2',
38 |         'Programming Language :: Python :: 3.3',
39 |         'Programming Language :: Python :: 3.4',
40 |         'Topic :: Scientific/Engineering :: Information Analysis'
41 |     ],
42 |     keywords='pregel mapreduce grid',
43 |     packages=find_packages(exclude=['contrib', 'docs', 'tests*']),
44 |     include_package_data=True,
45 |     zip_safe=False,
46 |     scripts=['remap/bin/remap'],
47 |     install_requires=['nanomsg','flask','flask-simple-api'],
48 |     extras_require={},
49 |     download_url=(
50 |         'https://github.com/gtoonstra/remap/tarball/' + version),
51 | )
52 | 


--------------------------------------------------------------------------------
/testdata/graph/graph1.txt:
--------------------------------------------------------------------------------
1 | 1 2 3 4 5 6 7 8 9 0
2 | 2 3 5 6 8
3 | 3 0 9 2
4 | 
5 | 


--------------------------------------------------------------------------------
/testdata/graph/graph2.txt:
--------------------------------------------------------------------------------
1 | 4 8 9 0
2 | 5 8
3 | 6 1
4 | 7 2 3 4
5 | 8 0
6 | 9 0
7 | 0 1
8 | 
9 | 


--------------------------------------------------------------------------------
/testdata/highest/test.txt:
--------------------------------------------------------------------------------
1 | a 3 b
2 | b 6 a d
3 | c 2 b d
4 | d 1 c 
5 | 
6 | 


--------------------------------------------------------------------------------
/tests/adhoc/app-jobstart-reducer-3cores.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | 
3 | # Create the directory for output by reduce (this is always overwritten)
4 | mkdir REMAP_ROOT/data/wordcount/
5 | 
6 | nanocat --pub --connect-local 8686 --delay 1 --data 'local.jobstart.3cores {"priority":5,"appdir":"REMAP_ROOT/app/wordcount","cores":[{"jobid":"3cores","appmodule":"wordcount","appconfig":"REMAP_ROOT/app/wordcount/appconfig.json","type":"reducer","outputdir":"REMAP_ROOT/data/wordcount","inputdir":"REMAP_ROOT/job/3cores/part/a2e","partition":"a2e"},{"jobid":"3cores","appmodule":"wordcount","appconfig":"REMAP_ROOT/app/wordcount/appconfig.json","type":"reducer","outputdir":"REMAP_ROOT/data/wordcount","inputdir":"REMAP_ROOT/job/3cores/part/_default","partition":"_default"},{"jobid":"3cores","appmodule":"wordcount","appconfig":"REMAP_ROOT/app/wordcount/appconfig.json","type":"reducer","outputdir":"REMAP_ROOT/data/wordcount","inputdir":"REMAP_ROOT/job/3cores/part/f2n","partition":"f2n"}]}'
7 | 


--------------------------------------------------------------------------------
/tests/adhoc/app-jobstart-reducer.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | nanocat --pub --connect-local 8686 --delay 1 --data 'local.jobstart.jobid {"priority":5,"appdir":"/remote/job/jobid/app","cores":[{"jobid":"jobid","appmodule":"wordcount","appconfig":"/remote/job/jobid/app/appconfig.json","type":"reducer","outputdir":"/remote/data/wordscounted","inputdir":"/remote/job/jobid/part/a2e","partition":"a2e"}]}'
 4 | 
 5 | sleep 5
 6 | 
 7 | nanocat --pub --connect-local 8686 --delay 1 --data 'local.jobstart.jobid {"priority":5,"appdir":"/remote/job/jobid/app","cores":[{"jobid":"jobid","appmodule":"wordcount","appconfig":"/remote/job/jobid/app/appconfig.json","type":"reducer","outputdir":"/remote/data/wordscounted","inputdir":"/remote/job/jobid/part/_default","partition":"_default"}]}'
 8 | 
 9 | sleep 5
10 | 
11 | nanocat --pub --connect-local 8686 --delay 1 --data 'local.jobstart.jobid {"priority":5,"appdir":"/remote/job/jobid/app","cores":[{"jobid":"jobid","appmodule":"wordcount","appconfig":"/remote/job/jobid/app/appconfig.json","type":"reducer","outputdir":"/remote/data/wordscounted","inputdir":"/remote/job/jobid/part/f2n","partition":"f2n"}]}'
12 | 
13 | sleep 5
14 | 
15 | nanocat --pub --connect-local 8686 --delay 1 --data 'local.jobstart.jobid {"priority":5,"appdir":"/remote/job/jobid/app","cores":[{"jobid":"jobid","appmodule":"wordcount","appconfig":"/remote/job/jobid/app/appconfig.json","type":"reducer","outputdir":"/remote/data/wordscounted","inputdir":"/remote/job/jobid/part/o2s","partition":"o2s"}]}'
16 | 
17 | sleep 5
18 | 
19 | nanocat --pub --connect-local 8686 --delay 1 --data 'local.jobstart.jobid {"priority":5,"appdir":"/remote/job/jobid/app","cores":[{"jobid":"jobid","appmodule":"wordcount","appconfig":"/remote/job/jobid/app/appconfig.json","type":"reducer","outputdir":"/remote/data/wordscounted","inputdir":"/remote/job/jobid/part/t2z","partition":"t2z"}]}'
20 | 
21 | 
22 | 


--------------------------------------------------------------------------------
/tests/adhoc/app-jobstart.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | 
3 | nanocat --pub --connect-local 8686 --delay 1 --data 'local.jobstart.jobid {"priority":5,"appdir":"/remote/job/jobid/app","cores":[{"jobid":"jobid","appmodule":"wordcount","appconfig":"/remote/job/jobid/app/appconfig.json","type":"mapper","inputfile":"/remote/data/tomsawyer.txt","outputdir":"/remote/job/jobid/part"}]}'
4 | 
5 | 
6 | 


--------------------------------------------------------------------------------
/tests/adhoc/app-showhands.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | 
3 | nanocat --pub --connect-local 8686 --delay 1 --data 'local.showhands.adhoc {"priority":5}'
4 | 
5 | 


--------------------------------------------------------------------------------
/tests/adhoc/nanocat-sub-vbroker.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | 
3 | nanocat --sub --connect-local 8690 --ascii
4 | 
5 | 
6 | 


--------------------------------------------------------------------------------
/tests/adhoc/nanocat-sub.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | 
3 | nanocat --sub --connect-local 8687 --ascii
4 | 
5 | 
6 | 


--------------------------------------------------------------------------------
/tests/examples/app-jobstart-3cores.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # Create the directory for output by reduce (this is always overwritten)
 4 | mkdir REMAP_ROOT/data/wordcount/
 5 | 
 6 | # Clean potential data from previous run, because that is not overwritten;
 7 | # (mapper-id is always different)
 8 | rm -rf REMAP_ROOT/job/3cores/part/*
 9 | 
10 | nanocat --pub --connect-local 8686 --delay 1 --data 'local.jobstart.3cores {"priority":5,"appdir":"REMAP_ROOT/app/wordcount","cores":[{"jobid":"3cores","appmodule":"wordcount","appconfig":"REMAP_ROOT/app/wordcount/appconfig.json","type":"mapper","inputfile":"REMAP_ROOT/data/gutenberg/tomsawyer.txt","outputdir":"REMAP_ROOT/job/3cores/part"},{"jobid":"3cores","appmodule":"wordcount","appconfig":"REMAP_ROOT/app/wordcount/appconfig.json","type":"mapper","inputfile":"REMAP_ROOT/data/gutenberg/beowulf.txt","outputdir":"REMAP_ROOT/job/3cores/part"},{"jobid":"3cores","appmodule":"wordcount","appconfig":"REMAP_ROOT/app/wordcount/appconfig.json","type":"mapper","inputfile":"REMAP_ROOT/data/gutenberg/alice-in-wonderland.txt","outputdir":"REMAP_ROOT/job/3cores/part"}]}'
11 | 
12 | sleep 5
13 | 
14 | nanocat --pub --connect-local 8686 --delay 1 --data 'local.jobstart.3cores {"priority":5,"appdir":"REMAP_ROOT/app/wordcount","cores":[{"jobid":"3cores","appmodule":"wordcount","appconfig":"REMAP_ROOT/app/wordcount/appconfig.json","type":"reducer","outputdir":"REMAP_ROOT/data/wordcount","inputdir":"REMAP_ROOT/job/3cores/part/a2e","partition":"a2e"},{"jobid":"3cores","appmodule":"wordcount","appconfig":"REMAP_ROOT/app/wordcount/appconfig.json","type":"reducer","outputdir":"REMAP_ROOT/data/wordcount","inputdir":"REMAP_ROOT/job/3cores/part/_default","partition":"_default"},{"jobid":"3cores","appmodule":"wordcount","appconfig":"REMAP_ROOT/app/wordcount/appconfig.json","type":"reducer","outputdir":"REMAP_ROOT/data/wordcount","inputdir":"REMAP_ROOT/job/3cores/part/f2n","partition":"f2n"}]}'
15 | 
16 | sleep 5
17 | 
18 | nanocat --pub --connect-local 8686 --delay 1 --data 'local.jobstart.3cores {"priority":5,"appdir":"REMAP_ROOT/app/wordcount","cores":[{"jobid":"3cores","appmodule":"wordcount","appconfig":"REMAP_ROOT/app/wordcount/appconfig.json","type":"reducer","outputdir":"REMAP_ROOT/data/wordcount","inputdir":"REMAP_ROOT/job/3cores/part/o2s","partition":"o2s"},{"jobid":"3cores","appmodule":"wordcount","appconfig":"REMAP_ROOT/app/wordcount/appconfig.json","type":"reducer","outputdir":"REMAP_ROOT/data/wordcount","inputdir":"REMAP_ROOT/job/3cores/part/t2z","partition":"t2z"}]}'
19 | 


--------------------------------------------------------------------------------
/tests/examples/run_wordcount.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # Create the directory for output by reduce (this is always overwritten)
 4 | mkdir REMAP_ROOT/data/wordcount/
 5 | 
 6 | # Clean potential data from previous run, because that is not overwritten;
 7 | # (mapper-id is always different)
 8 | rm -rf REMAP_ROOT/job/jobid/part/*
 9 | 
10 | nanocat --pub --connect-local 8686 --delay 1 --data 'local.jobstart.jobid {"priority":5,"appdir":"REMAP_ROOT/app/wordcount","cores":[{"jobid":"jobid","appmodule":"wordcount","appconfig":"REMAP_ROOT/app/wordcount/appconfig.json","type":"mapper","inputfile":"REMAP_ROOT/data/gutenberg/tomsawyer.txt","outputdir":"REMAP_ROOT/job/jobid/part"}]}'
11 | 
12 | sleep 5
13 | 
14 | nanocat --pub --connect-local 8686 --delay 1 --data 'local.jobstart.jobid {"priority":5,"appdir":"REMAP_ROOT/app/wordcount","cores":[{"jobid":"jobid","appmodule":"wordcount","appconfig":"REMAP_ROOT/app/wordcount/appconfig.json","type":"reducer","outputdir":"REMAP_ROOT/data/wordcount","inputdir":"REMAP_ROOT/job/jobid/part/a2e","partition":"a2e"}]}'
15 | 
16 | sleep 5
17 | 
18 | nanocat --pub --connect-local 8686 --delay 1 --data 'local.jobstart.jobid {"priority":5,"appdir":"REMAP_ROOT/app/wordcount","cores":[{"jobid":"jobid","appmodule":"wordcount","appconfig":"REMAP_ROOT/app/wordcount/appconfig.json","type":"reducer","outputdir":"REMAP_ROOT/data/wordcount","inputdir":"REMAP_ROOT/job/jobid/part/_default","partition":"_default"}]}'
19 | 
20 | sleep 5
21 | 
22 | nanocat --pub --connect-local 8686 --delay 1 --data 'local.jobstart.jobid {"priority":5,"appdir":"REMAP_ROOT/app/wordcount","cores":[{"jobid":"jobid","appmodule":"wordcount","appconfig":"REMAP_ROOT/app/wordcount/appconfig.json","type":"reducer","outputdir":"REMAP_ROOT/data/wordcount","inputdir":"REMAP_ROOT/job/jobid/part/f2n","partition":"f2n"}]}'
23 | 
24 | sleep 5
25 | 
26 | nanocat --pub --connect-local 8686 --delay 1 --data 'local.jobstart.jobid {"priority":5,"appdir":"REMAP_ROOT/app/wordcount","cores":[{"jobid":"jobid","appmodule":"wordcount","appconfig":"REMAP_ROOT/app/wordcount/appconfig.json","type":"reducer","outputdir":"REMAP_ROOT/data/wordcount","inputdir":"REMAP_ROOT/job/jobid/part/o2s","partition":"o2s"}]}'
27 | 
28 | sleep 5
29 | 
30 | nanocat --pub --connect-local 8686 --delay 1 --data 'local.jobstart.jobid {"priority":5,"appdir":"REMAP_ROOT/app/wordcount","cores":[{"jobid":"jobid","appmodule":"wordcount","appconfig":"REMAP_ROOT/app/wordcount/appconfig.json","type":"reducer","outputdir":"REMAP_ROOT/data/wordcount","inputdir":"REMAP_ROOT/job/jobid/part/t2z","partition":"t2z"}]}'
31 | 
32 | 


--------------------------------------------------------------------------------
/tests/scripts/run_publisher.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | import os
 3 | import nanomsg as nn
 4 | from nanomsg import wrapper as nn_wrapper
 5 | import time
 6 | 
 7 | if __name__ == "__main__":
 8 |     # Local pub and sub
 9 |     lpub = nn.Socket( nn.PUB, domain=nn.AF_SP )
10 |     lpub.connect( "tcp://localhost:8686" )
11 |     lpub.send( "test" )
12 | 
13 |     while( True ):
14 |         lpub.send( "global.test.09325235325_12124 {}" )
15 |         time.sleep( 5 )
16 | 
17 | 


--------------------------------------------------------------------------------
/tests/scripts/run_subscriber.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | import os
 3 | import nanomsg as nn
 4 | from nanomsg import wrapper as nn_wrapper
 5 | import time
 6 | 
 7 | if __name__ == "__main__":
 8 |     # Local pub and sub
 9 |     lsub = nn.Socket( nn.SUB, domain=nn.AF_SP )
10 |     lsub.connect( "tcp://localhost:8687" )
11 |     lsub.set_string_option( nn.SUB, nn.SUB_SUBSCRIBE, "")
12 |     while(True):
13 |         data = lsub.recv()
14 |         print(data)
15 | 
16 | 


--------------------------------------------------------------------------------
/tests/unit/UNITTESTS_GO_HERE:
--------------------------------------------------------------------------------
1 | bb
2 | 
3 | 


--------------------------------------------------------------------------------